MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
SoftmaxLossLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using MyCaffe.basecode;
6using MyCaffe.common;
7using MyCaffe.param;
8
9namespace MyCaffe.layers
10{
26 public class SoftmaxLossLayer<T> : LossLayer<T>
27 {
28 Layer<T> m_softmaxLayer;
29 Blob<T> m_blobProb;
30 BlobCollection<T> m_colSoftmaxBottom;
31 BlobCollection<T> m_colSoftmaxTop;
32 int? m_nIgnoreLabel = null;
33 int m_nSoftmaxAxis;
34
48 : base(cuda, log, p)
49 {
50 m_type = LayerParameter.LayerType.SOFTMAXWITH_LOSS;
51 m_blobProb = new Blob<T>(cuda, log);
52 m_blobProb.Name = m_param.name + " prob";
53 }
54
56 protected override void dispose()
57 {
58 m_blobProb.Dispose();
59
60 if (m_softmaxLayer != null)
61 m_softmaxLayer.Dispose();
62
63 base.dispose();
64 }
65
67 protected override void setup_internal_blobs(BlobCollection<T> col)
68 {
69 if (col.Count > 0)
70 return;
71
72 col.Add(m_blobProb);
73 }
74
78 public override int ExactNumTopBlobs
79 {
80 get { return -1; }
81 }
82
86 public override int MinTopBlobs
87 {
88 get { return 1; }
89 }
90
94 public override int MaxTopBlobs
95 {
96 get { return 2; }
97 }
98
104 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
105 {
106 base.LayerSetUp(colBottom, colTop);
107
108 LayerParameter param_softmax = m_param.Clone(false);
109 param_softmax.SetType(LayerParameter.LayerType.SOFTMAX);
111 param_softmax.loss_weight.Clear();
112
113 m_softmaxLayer = new SoftmaxLayer<T>(m_cuda, m_log, param_softmax);
114 m_colSoftmaxBottom = new BlobCollection<T>();
115 m_colSoftmaxTop = new BlobCollection<T>();
116
117 m_colSoftmaxBottom.Add(colBottom[0]);
118 m_colSoftmaxTop.Add(m_blobProb);
119 m_softmaxLayer.Setup(m_colSoftmaxBottom, m_colSoftmaxTop);
120
121 m_nIgnoreLabel = m_param.loss_param.ignore_label;
122 }
123
129 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
130 {
131 base.Reshape(colBottom, colTop);
132
133 m_softmaxLayer.Reshape(m_colSoftmaxBottom, m_colSoftmaxTop);
134 m_nSoftmaxAxis = colBottom[0].CanonicalAxisIndex(m_param.softmax_param.axis);
135 m_nOuterNum = colBottom[0].count(0, m_nSoftmaxAxis);
136 m_nInnerNum = colBottom[0].count(m_nSoftmaxAxis + 1);
137
138 if (!m_bIgnoreLabels)
139 {
140 m_log.CHECK_EQ(m_nOuterNum * m_nInnerNum, colBottom[1].count(), "Number of labels must match number of predictions; e.g., if softmax axis == 1 and prediction shape is (N, C, H, W), label count (number of labels) must be N*H*W, with integer values in {0, 1, ..., C-1}.");
141
142 if (colTop.Count >= 2)
143 {
144 // softmax output
145 colTop[1].ReshapeLike(colBottom[0]);
146 }
147 }
148 }
149
168 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
169 {
170 m_softmaxLayer.Forward(m_colSoftmaxBottom, m_colSoftmaxTop);
171
172 long hProbData = m_blobProb.gpu_data;
173 long hLabel = colBottom[1].gpu_data;
174 int nDim = m_blobProb.count() / m_nOuterNum;
175 int nCount = m_nOuterNum * m_nInnerNum;
176
177 // Since this memory is not used for anything, we use it here to avoid having
178 // to allocate new GPU memory to accumulate intermediate results.
179 long hLossData = colBottom[0].mutable_gpu_diff;
180
181 // Similarly, this memory is never used elsewhere, and thus we can use it
182 // to avoid having to allocate additional GPU memory.
183 long hCounts = m_blobProb.mutable_gpu_diff;
184
185 m_cuda.softmaxloss_fwd(nCount, hProbData, hLabel, hLossData, m_nOuterNum, nDim, m_nInnerNum, hCounts, m_nIgnoreLabel);
186 T fLoss = m_cuda.asum(nCount, hLossData);
187 double dfValidCount = -1;
188
189 // Only launch another cuda kernel if we actually need the count of valid
190 // outputs.
191 if (m_normalization == LossParameter.NormalizationMode.VALID && m_nIgnoreLabel.HasValue)
192 dfValidCount = convertD(m_cuda.asum(nCount, hCounts));
193
194 double dfLoss = convertD(fLoss);
195 double dfNormalizer = get_normalizer(m_normalization, (int)dfValidCount);
196 double dfFinalLoss = dfLoss / dfNormalizer;
197
198 colTop[0].SetData(dfFinalLoss, 0);
199
200 if (colTop.Count == 2)
201 colTop[1].ShareData(m_blobProb);
202
203 // Clear scratch memory to prevent with interfering with backward pass (see #602)
204 colBottom[0].SetDiff(0);
205 }
206
237 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
238 {
239 if (!rgbPropagateDown[0])
240 return;
241
242 long hBottomDiff = colBottom[0].mutable_gpu_diff;
243 long hProbData = m_blobProb.gpu_data;
244 long hTopData = colTop[0].gpu_data;
245
246 m_cuda.copy(m_blobProb.count(), hProbData, hBottomDiff);
247
248 long hLabel = colBottom[1].gpu_data;
249 int nDim = m_blobProb.count() / m_nOuterNum;
250 int nCount = m_nOuterNum * m_nInnerNum;
251
252 // Since this memory is not used for anything else,
253 // we use to avoid allocating new GPU memory.
254 long hCounts = m_blobProb.mutable_gpu_diff;
255
256 m_cuda.softmaxloss_bwd(nCount, hTopData, hLabel, hBottomDiff, m_nOuterNum, nDim, m_nInnerNum, hCounts, m_nIgnoreLabel);
257
258 double dfValidCount = -1;
259
260 // Only launch another cuda kernel if we acutally need the count of valid
261 // outputs.
262 if (m_normalization == LossParameter.NormalizationMode.VALID && m_nIgnoreLabel.HasValue)
263 dfValidCount = convertD(m_cuda.asum(nCount, hCounts));
264
265 double dfTopDiff = convertD(colTop[0].GetDiff(0));
266 double dfNormalizer = get_normalizer(m_normalization, (int)dfValidCount);
267 double dfLossWeight = dfTopDiff / dfNormalizer;
268
269 m_cuda.scal(m_blobProb.count(), convert(dfLossWeight), hBottomDiff);
270 }
271 }
272}
The Log class provides general output in text form.
Definition: Log.cs:13
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
void SetData(double df)
Set all blob data to the value specified.
void SetDiff(double df)
Set all blob diff to the value specified.
int Count
Returns the number of items in the collection.
void ReshapeLike(BlobCollection< T > src)
Reshapes all blobs in the collection to the sizes of the source.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
Definition: Blob.cs:402
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
void convert(BlobCollection< T > col)
Convert a collection of blobs from / to half size.
Definition: Layer.cs:535
double convertD(T df)
Converts a generic to a double value.
Definition: Layer.cs:1349
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
The LossLayer provides an interface for Layer's that take two blobs as input – usually (1) prediction...
Definition: LossLayer.cs:23
bool m_bIgnoreLabels
Set to true when labels are to be ignored.
Definition: LossLayer.cs:31
int m_nOuterNum
Specifies the outer num, such as the batch count (e.g. count(0, axis)). Each derivative class must se...
Definition: LossLayer.cs:39
int m_nInnerNum
Specifies the inner num, such as the channel + height + width (e.g. count(axis + 1))....
Definition: LossLayer.cs:43
virtual double get_normalizer(LossParameter.NormalizationMode normalization_mode, int nValidCount)
Returns the normalizer used to normalize the loss.
Definition: LossLayer.cs:92
LossParameter.NormalizationMode m_normalization
Specifies the normalization mode used to normalize the loss.
Definition: LossLayer.cs:35
The SoftmaxLayer computes the softmax function. This layer is initialized with the MyCaffe....
Definition: SoftmaxLayer.cs:24
Computes the multinomial logistic loss for a one-of-many classification task, passing real-valued pre...
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
SoftmaxLossLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
Constructor.
override void dispose()
Releases all GPU and host resources used by the Layer.
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the softmax loss error gradient w.r.t the predictions.
override int MaxTopBlobs
Returns the maximum number of required top (output) Blobs: loss, labels
override int MinTopBlobs
Returns the minimum number of required top (output) Blobs: loss.
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs as variable.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
The forward computation.
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
List< double > loss_weight
Specifies the loss weight.
SoftmaxParameter softmax_param
Returns the parameter set when initialized with LayerType.SOFTMAX
void SetType(LayerType type, bool bNewParam=true)
Set the layer type.
LayerType
Specifies the layer type.
LossParameter loss_param
Returns the parameter set when initialized with LayerType.LOSS
virtual LayerParameter Clone(bool bCloneBlobs)
Creates a new copy of this instance of the parameter.
Stores the parameters used by loss layers.
NormalizationMode
How to normalize the loss for loss layers that aggregate across batches, spatial dimensions,...
int? ignore_label
If specified, the ignore instances with the given label.
Specifies the parameters for the SoftmaxLayer
override LayerParameterBase Clone()
Creates a new copy of this instance of the parameter.
int axis
The axis along which to perform the softmax – may be negative to index from the end (e....
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12