MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
SoftmaxCrossEntropy2LossLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using MyCaffe.basecode;
6using MyCaffe.common;
7using MyCaffe.param;
8
9namespace MyCaffe.layers
10{
17 {
18 Layer<T> m_softmaxLayer;
19 Layer<T> m_logLayer;
20 Blob<T> m_blobProb;
21 Blob<T> m_blobLogProb;
22 Blob<T> m_blobLoss;
23 BlobCollection<T> m_colSoftmaxBottomVec = new BlobCollection<T>();
24 BlobCollection<T> m_colSoftmaxTopVec = new BlobCollection<T>();
25 BlobCollection<T> m_colLogBottomVec = new BlobCollection<T>();
26 BlobCollection<T> m_colLogTopVec = new BlobCollection<T>();
27
28 // How to normalize the loss.
29 int? m_nIgnoreLabel = null;
30 double m_dfNormalizer = 0;
31 int m_nSoftmaxAxis = 1;
32
41 : base(cuda, log, p)
42 {
43 m_type = LayerParameter.LayerType.SOFTMAXCROSSENTROPY2_LOSS;
44 m_blobProb = new Blob<T>(cuda, log);
45 m_blobProb.Name = m_param.name + " prob";
46 m_blobLogProb = new Blob<T>(cuda, log);
47 m_blobLogProb.Name = m_param.name + " logprob";
48 m_blobLoss = new Blob<T>(cuda, log);
49 m_blobLoss.Name = m_param.name + " loss";
50 }
51
53 protected override void dispose()
54 {
55 m_blobProb.Dispose();
56
57 if (m_softmaxLayer != null)
58 m_softmaxLayer.Dispose();
59
60 base.dispose();
61 }
62
64 protected override void setup_internal_blobs(BlobCollection<T> col)
65 {
66 if (col.Count > 0)
67 return;
68
69 col.Add(m_blobProb);
70 }
71
75 public override int ExactNumTopBlobs
76 {
77 get { return -1; }
78 }
79
83 public override int MinTopBlobs
84 {
85 get { return 1; }
86 }
87
91 public override int MaxTopBlobs
92 {
93 get { return 2; }
94 }
95
101 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
102 {
103 base.LayerSetUp(colBottom, colTop);
104
105 m_nIgnoreLabel = m_param.loss_param.ignore_label;
106
107 LayerParameter param_softmax = m_param.Clone(false);
108 param_softmax.SetType(LayerParameter.LayerType.SOFTMAX);
110 param_softmax.loss_weight.Clear();
111
112 m_softmaxLayer = Layer<T>.Create(m_cuda, m_log, param_softmax, null);
113 m_colSoftmaxBottomVec = new BlobCollection<T>() { colBottom[0] };
114 m_colSoftmaxTopVec = new BlobCollection<T>() { m_blobProb };
115
116 m_softmaxLayer.Setup(m_colSoftmaxBottomVec, m_colSoftmaxTopVec);
117
119
120 m_logLayer = Layer<T>.Create(m_cuda, m_log, param_log, null);
121 m_colLogBottomVec = new BlobCollection<T>() { m_blobProb };
122 m_colLogTopVec = new BlobCollection<T>() { m_blobLogProb };
123
124 m_logLayer.Setup(m_colLogBottomVec, m_colLogTopVec);
125 }
126
132 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
133 {
134 base.Reshape(colBottom, colTop);
135
136 m_blobLoss.ReshapeLike(colBottom[0]);
137
138 m_softmaxLayer.Reshape(m_colSoftmaxBottomVec, m_colSoftmaxTopVec);
139 m_logLayer.Reshape(m_colLogBottomVec, m_colLogTopVec);
140
141 m_nSoftmaxAxis = colBottom[0].CanonicalAxisIndex(m_param.softmax_param.axis);
142 m_nOuterNum = colBottom[0].count(0, m_nSoftmaxAxis);
143 m_nInnerNum = colBottom[0].count(m_nSoftmaxAxis + 1);
144
145 if (!m_bIgnoreLabels)
146 {
147 m_log.CHECK_EQ(colBottom[0].count(0, m_nSoftmaxAxis), colBottom[1].count(0, m_nSoftmaxAxis), "Number of labels must match number of predictions; e.g., if softmax axis == 1 and prediction shape is (N, C, H, W), label count (number of labels) must be N*H*W, with integer values in {0, 1, ..., C-1}.");
148
149 if (colTop.Count >= 2)
150 {
151 // softmax output
152 colTop[1].ReshapeLike(colBottom[0]);
153 }
154 }
155 }
156
175 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
176 {
177 // The forward pass computes the sotmax outputs (which are probabilities).
178 m_softmaxLayer.Forward(m_colSoftmaxBottomVec, m_colSoftmaxTopVec);
179
180 // Run the log on the Probabilities to get LogSoftmax
181 m_logLayer.Forward(m_colLogBottomVec, m_colLogTopVec);
182
183 // Use the softmax output for input data.
184 long hProbData = m_blobLogProb.gpu_data;
185 long hTarget = colBottom[1].gpu_data;
186 int nInputCount = m_blobProb.count();
187 int nDim = m_blobProb.shape()[m_nSoftmaxAxis];
188 int nCount = m_nOuterNum * m_nInnerNum;
189
190 m_blobLoss.SetDiff(0.0);
191 long hLossData = m_blobLoss.mutable_gpu_data;
192 long hLossDiff = m_blobLoss.mutable_gpu_diff;
193
194 // Since this memory is not used for anything, we use it here to avoid having
195 // to allocate the GPU memory to accumulate intermediate results.
196 colBottom[1].SetDiff(0);
197 long hCountData = colBottom[1].mutable_gpu_diff;
198
199 // Run the NLL Loss portion to get the loss.
200 m_cuda.softmax_cross_entropy_fwd(colBottom[0].count(), hProbData, hTarget, hLossDiff, hLossData, m_nOuterNum, nDim, m_nInnerNum, hCountData, m_nIgnoreLabel.GetValueOrDefault(-1));
201 double dfLoss = m_cuda.asum_double(colBottom[0].count(), hLossData);
202
203 double dfValidCount = nCount;
204 // Only launch another CUDA kernel if we actually need the valid count.
205 if (m_normalization == LossParameter.NormalizationMode.VALID && m_nIgnoreLabel.HasValue)
206 dfValidCount = m_cuda.asum_double(nCount, hCountData);
207
208 m_dfNormalizer = get_normalizer(m_normalization, (int)dfValidCount);
209 double dfFinalLoss = dfLoss / m_dfNormalizer;
210
211 colTop[0].SetData(dfFinalLoss, 0);
212
213 // Return the losses in colTop[1] if it exists.
214 if (colTop.Count == 2)
215 colTop[1].CopyFrom(m_blobLoss);
216
217 // Clear scratch memory to prevent interfering with the backward pass (see #6202)
218 colBottom[1].SetDiff(0);
219 }
220
251 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
252 {
253 if (!rgbPropagateDown[0])
254 return;
255
256 // Calculate the NLL Loss Gradient
257 float fGrad = convertF(colTop[0].GetDiff(0));
258 fGrad = -1.0f * fGrad / (float)m_dfNormalizer;
259
260 m_blobLoss.scale_diff(fGrad);
261
262 // Calculate the log gradient.
263 m_blobLogProb.CopyFrom(m_blobLoss, true);
264 m_logLayer.Backward(m_colLogTopVec, rgbPropagateDown, m_colLogBottomVec);
265
266 // Calculate the Softmax gradient.
267 m_softmaxLayer.Backward(m_colSoftmaxTopVec, rgbPropagateDown, m_colSoftmaxBottomVec);
268 }
269 }
270}
The Log class provides general output in text form.
Definition: Log.cs:13
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
void SetData(double df)
Set all blob data to the value specified.
void SetDiff(double df)
Set all blob diff to the value specified.
int Count
Returns the number of items in the collection.
void ReshapeLike(BlobCollection< T > src)
Reshapes all blobs in the collection to the sizes of the source.
void CopyFrom(BlobCollection< T > bSrc, bool bCopyDiff=false)
Copy the data or diff from another BlobCollection into this one.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
Definition: Blob.cs:903
List< int > shape()
Returns an array where each element contains the shape of an axis of the Blob.
Definition: Blob.cs:684
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
Definition: Blob.cs:402
void scale_diff(double df)
Scale the diff by a scaling factor.
Definition: Blob.cs:1763
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
Definition: Blob.cs:1981
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
void Backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Given the top Blob error gradients, compute the bottom Blob error gradients.
Definition: Layer.cs:815
double Forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Given the bottom (input) Blobs, this function computes the top (output) Blobs and the loss.
Definition: Layer.cs:728
float convertF(T df)
Converts a generic to a float value.
Definition: Layer.cs:1359
abstract void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Adjust the shapes of top blobs and internal buffers to accomodate the shapes of the bottom blobs.
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
void Setup(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Implements common Layer setup functionality.
Definition: Layer.cs:439
static Layer< T > Create(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel, IXDatabaseBase db=null, TransferInput trxinput=null)
Create a new Layer based on the LayerParameter.
Definition: Layer.cs:1468
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
The LossLayer provides an interface for Layer's that take two blobs as input – usually (1) prediction...
Definition: LossLayer.cs:23
bool m_bIgnoreLabels
Set to true when labels are to be ignored.
Definition: LossLayer.cs:31
int m_nOuterNum
Specifies the outer num, such as the batch count (e.g. count(0, axis)). Each derivative class must se...
Definition: LossLayer.cs:39
int m_nInnerNum
Specifies the inner num, such as the channel + height + width (e.g. count(axis + 1))....
Definition: LossLayer.cs:43
virtual double get_normalizer(LossParameter.NormalizationMode normalization_mode, int nValidCount)
Returns the normalizer used to normalize the loss.
Definition: LossLayer.cs:92
LossParameter.NormalizationMode m_normalization
Specifies the normalization mode used to normalize the loss.
Definition: LossLayer.cs:35
The SoftmaxCrossEntropy2Layer computes the cross-entropy (logisitic) loss and is often used for predi...
SoftmaxCrossEntropy2LossLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The SoftmaxCrossEntropyLayer constructor.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the softmax cross entropy loss error gradient w.r.t the predictions.
override int MaxTopBlobs
Returns the maximum number of required top (output) Blobs: loss, loss values
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
override int MinTopBlobs
Returns the minimum number of required top (output) Blobs: loss.
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs as variable.
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
The forward computation for softmax cross entropy loss.
override void dispose()
Releases all GPU and host resources used by the Layer.
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
List< double > loss_weight
Specifies the loss weight.
SoftmaxParameter softmax_param
Returns the parameter set when initialized with LayerType.SOFTMAX
void SetType(LayerType type, bool bNewParam=true)
Set the layer type.
LayerType
Specifies the layer type.
LossParameter loss_param
Returns the parameter set when initialized with LayerType.LOSS
virtual LayerParameter Clone(bool bCloneBlobs)
Creates a new copy of this instance of the parameter.
Stores the parameters used by loss layers.
NormalizationMode
How to normalize the loss for loss layers that aggregate across batches, spatial dimensions,...
int? ignore_label
If specified, the ignore instances with the given label.
Specifies the parameters for the SoftmaxLayer
override LayerParameterBase Clone()
Creates a new copy of this instance of the parameter.
int axis
The axis along which to perform the softmax – may be negative to index from the end (e....
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12