MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
SoftmaxCrossEntropyLossLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using MyCaffe.basecode;
6using MyCaffe.common;
7using MyCaffe.param;
8
9namespace MyCaffe.layers
10{
20 {
21 SoftmaxLayer<T> m_softmaxLayer;
22 Blob<T> m_blobSoftmaxOutput;
23 Blob<T> m_blobTarget = null;
24 Blob<T> m_blobLoss;
25 BlobCollection<T> m_colSoftmaxBottomVec = new BlobCollection<T>();
26 BlobCollection<T> m_colSoftmaxTopVec = new BlobCollection<T>();
27
28 // How to normalize the loss.
29 double m_dfNormalizer = 0;
30
31
40 : base(cuda, log, p)
41 {
42 m_type = LayerParameter.LayerType.SOFTMAXCROSSENTROPY_LOSS;
43 m_blobSoftmaxOutput = new Blob<T>(cuda, log);
44 m_blobSoftmaxOutput.Name = m_param.name + " softmax out";
45 m_blobLoss = new Blob<T>(cuda, log);
46 m_blobLoss.Name = m_param.name + " loss";
47
48 LayerParameter param_softmax = p.Clone(false);
49 param_softmax.loss_weight.Clear();
50 m_softmaxLayer = new SoftmaxLayer<T>(cuda, log, param_softmax);
51 }
52
54 protected override void dispose()
55 {
56 m_blobSoftmaxOutput.Dispose();
57 m_softmaxLayer.Dispose();
58 m_blobLoss.Dispose();
59
60 if (m_blobTarget != null)
61 m_blobTarget.Dispose();
62
63 base.dispose();
64 }
65
69 public override int ExactNumTopBlobs
70 {
71 get { return -1; }
72 }
73
77 public override int MinTopBlobs
78 {
79 get { return 1; }
80 }
81
85 public override int MaxTopBlobs
86 {
87 get { return 2; }
88 }
89
95 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
96 {
97 base.LayerSetUp(colBottom, colTop);
98
99 m_colSoftmaxBottomVec = new BlobCollection<T>();
100 m_colSoftmaxBottomVec.Add(colBottom[0]);
101 m_colSoftmaxTopVec = new BlobCollection<T>();
102 m_colSoftmaxTopVec.Add(m_blobSoftmaxOutput);
103 m_softmaxLayer.Setup(m_colSoftmaxBottomVec, m_colSoftmaxTopVec);
104 }
105
111 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
112 {
113 base.Reshape(colBottom, colTop);
114 int nAxis = colBottom[0].CanonicalAxisIndex(m_param.softmax_param.axis);
115 m_nOuterNum = colBottom[0].count(0, nAxis); // batch size
116 m_nInnerNum = colBottom[0].count(nAxis); // instance size: |output| == |target|
117
118 if (colBottom[0].count() != colBottom[1].count())
119 {
120 m_log.CHECK_EQ(colBottom[0].count(0, nAxis), colBottom[1].count(0, nAxis), "SOFTMAX_CROSS_ENTROPY_LOSS layer inputs must have the same count, or the target must have 'num' items of indexes.");
121
122 // Set the label at the target index = 1.0
123 if (m_blobTarget == null)
124 {
125 m_blobTarget = new Blob<T>(m_cuda, m_log);
126 m_blobTarget.Name = "full_label";
127 }
128
129 m_blobTarget.ReshapeLike(colBottom[0]);
130 }
131
132 m_softmaxLayer.Reshape(m_colSoftmaxBottomVec, m_colSoftmaxTopVec);
133 m_blobLoss.ReshapeLike(colBottom[0]);
134 }
135
157 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
158 {
159 // Set the target data.
160 if (m_blobTarget != null)
161 {
162 int nAxis = colBottom[0].CanonicalAxisIndex(m_param.softmax_param.axis);
163 m_log.CHECK_EQ(colBottom[0].count(0, nAxis), colBottom[1].count(0, nAxis), "SOFTMAX_CROSS_ENTROPY_LOSS layer inputs must have the same count, or the target must have 'num' items of indexes.");
164 m_blobTarget.SetData(0);
165
166 float[] rgfTarget = convertF(colBottom[1].mutable_cpu_data);
167 for (int i = 0; i < colBottom[1].num; i++)
168 {
169 int nTargetIdx = (int)rgfTarget[i];
170 m_blobTarget.SetData(1.0, m_nInnerNum * i + nTargetIdx);
171 }
172 }
173
174 // The forward pass computes the softmax outputs.
175 m_colSoftmaxBottomVec[0] = colBottom[0];
176 m_softmaxLayer.Forward(m_colSoftmaxBottomVec, m_colSoftmaxTopVec);
177
178 // Compute the loss (negative log likelihood)
179 int nCount = colBottom[0].count();
180
181 // Stable version of loss computation for input data.
182 long hInputData = colBottom[0].gpu_data;
183 long hTarget = (m_blobTarget != null) ? m_blobTarget.gpu_data : colBottom[1].gpu_data;
184
185 // Since this memory is not used for anything, we use it here to avoid having
186 // to allocate the GPU memory to accumulate intermediate results.
187 long hLossData = colBottom[0].mutable_gpu_diff;
188 long hCountData = (m_blobTarget != null) ? m_blobTarget.mutable_gpu_diff : colBottom[1].mutable_gpu_diff;
189
190 m_cuda.sigmoid_cross_entropy_fwd(nCount, hInputData, hTarget, hLossData, false, -1, hCountData);
191
192 double dfValidCount = nCount;
193 double dfLoss = m_cuda.asum_double(nCount, hLossData);
194 m_dfNormalizer = get_normalizer(m_normalization, (int)dfValidCount);
195
196 colTop[0].SetData(dfLoss / m_dfNormalizer, 0);
197
198 // Return the losses in colTop[1] if it exists.
199 if (colTop.Count == 2)
200 {
201 m_cuda.copy(nCount, hLossData, m_blobLoss.mutable_gpu_data);
202 colTop[1].ShareData(m_blobLoss);
203 }
204
205 // Clear scratch memory to prevent interfering with the backward pass (see #6202)
206 colBottom[0].SetDiff(0);
207 colBottom[1].SetDiff(0);
208
209 if (m_blobTarget != null)
210 m_blobTarget.SetDiff(0);
211 }
212
245 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
246 {
247 if (rgbPropagateDown[1])
248 m_log.FAIL(m_type.ToString() + " Layer cannot backpropagate to label inputs.");
249
250 if (rgbPropagateDown[0])
251 {
252 // First, compute the diff.
253 int nCount = colBottom[0].count();
254 long hSoftmaxOutputData = m_blobSoftmaxOutput.gpu_data;
255 long hTarget = (m_blobTarget != null) ? m_blobTarget.gpu_data : colBottom[1].gpu_data;
256 long hBottomDiff = colBottom[0].mutable_gpu_diff;
257
258 m_cuda.copy(nCount, hSoftmaxOutputData, hBottomDiff);
259 m_cuda.axpy(nCount, convert(-1.0), hTarget, hBottomDiff);
260
261 // Scale down gradient
262 double dfLossWeight = convertD(colTop[0].GetDiff(0)) / m_dfNormalizer;
263 m_cuda.scal(nCount, dfLossWeight, hBottomDiff);
264 }
265 }
266 }
267}
The Log class provides general output in text form.
Definition: Log.cs:13
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
Definition: Log.cs:394
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
void SetData(double df)
Set all blob data to the value specified.
void SetDiff(double df)
Set all blob diff to the value specified.
int Count
Returns the number of items in the collection.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
Definition: Blob.cs:1922
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
Definition: Blob.cs:402
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
Definition: Blob.cs:1981
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
void convert(BlobCollection< T > col)
Convert a collection of blobs from / to half size.
Definition: Layer.cs:535
float convertF(T df)
Converts a generic to a float value.
Definition: Layer.cs:1359
double convertD(T df)
Converts a generic to a double value.
Definition: Layer.cs:1349
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
The LossLayer provides an interface for Layer's that take two blobs as input – usually (1) prediction...
Definition: LossLayer.cs:23
int m_nOuterNum
Specifies the outer num, such as the batch count (e.g. count(0, axis)). Each derivative class must se...
Definition: LossLayer.cs:39
int m_nInnerNum
Specifies the inner num, such as the channel + height + width (e.g. count(axis + 1))....
Definition: LossLayer.cs:43
virtual double get_normalizer(LossParameter.NormalizationMode normalization_mode, int nValidCount)
Returns the normalizer used to normalize the loss.
Definition: LossLayer.cs:92
LossParameter.NormalizationMode m_normalization
Specifies the normalization mode used to normalize the loss.
Definition: LossLayer.cs:35
The SoftmaxCrossEntropyLossLayer computes the cross-entropy (logisitic) loss and is often used for pr...
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation.
override void dispose()
Releases all GPU and host resources used by the Layer.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the softmax cross-entropy loss error gradient w.r.t. the predictions.
override int MinTopBlobs
Returns the minimum number of required top (output) Blobs: loss.
SoftmaxCrossEntropyLossLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The SoftmaxCrossEntropyLayer constructor.
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs as variable.
override int MaxTopBlobs
Returns the maximum number of required top (output) Blobs: loss, loss values
The SoftmaxLayer computes the softmax function. This layer is initialized with the MyCaffe....
Definition: SoftmaxLayer.cs:24
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
List< double > loss_weight
Specifies the loss weight.
SoftmaxParameter softmax_param
Returns the parameter set when initialized with LayerType.SOFTMAX
LayerType
Specifies the layer type.
override string ToString()
Returns a string representation of the LayerParameter.
virtual LayerParameter Clone(bool bCloneBlobs)
Creates a new copy of this instance of the parameter.
int axis
The axis along which to perform the softmax – may be negative to index from the end (e....
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12