MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
SigmoidCrossEntropyLossLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using MyCaffe.basecode;
6using MyCaffe.common;
7using MyCaffe.param;
8
9namespace MyCaffe.layers
10{
34 {
35 SigmoidLayer<T> m_sigmoidLayer;
36 Blob<T> m_blobSigmoidOutput;
37 Blob<T> m_blobTarget = null;
38 Blob<T> m_blobLoss;
39 BlobCollection<T> m_colSigmoidBottomVec = new BlobCollection<T>();
40 BlobCollection<T> m_colSigmoidTopVec = new BlobCollection<T>();
41
42 // The label indicating that an instance should be ignored.
43 int? m_nIgnoreLabel = null;
44 double m_dfNormalizer = 0;
45
46
55 : base(cuda, log, p)
56 {
57 m_type = LayerParameter.LayerType.SIGMOIDCROSSENTROPY_LOSS;
58 m_blobSigmoidOutput = new Blob<T>(cuda, log);
59 m_blobSigmoidOutput.Name = m_param.name + " sigmoid out";
60 m_blobLoss = new Blob<T>(cuda, log);
61 m_blobLoss.Name = m_param.name + " loss";
62
63 LayerParameter param_sigmoid = p.Clone(false);
64 param_sigmoid.loss_weight.Clear();
65 m_sigmoidLayer = new SigmoidLayer<T>(cuda, log, param_sigmoid);
66 }
67
69 protected override void dispose()
70 {
71 m_blobSigmoidOutput.Dispose();
72 m_sigmoidLayer.Dispose();
73 m_blobLoss.Dispose();
74
75 if (m_blobTarget != null)
76 m_blobTarget.Dispose();
77
78 base.dispose();
79 }
80
84 public override int ExactNumTopBlobs
85 {
86 get { return -1; }
87 }
88
92 public override int MinTopBlobs
93 {
94 get { return 1; }
95 }
96
100 public override int MaxTopBlobs
101 {
102 get { return 2; }
103 }
104
110 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
111 {
112 base.LayerSetUp(colBottom, colTop);
113
114 m_colSigmoidBottomVec.Clear();
115 m_colSigmoidBottomVec.Add(colBottom[0]);
116 m_colSigmoidTopVec.Clear();
117 m_colSigmoidTopVec.Add(m_blobSigmoidOutput);
118 m_sigmoidLayer.Setup(m_colSigmoidBottomVec, m_colSigmoidTopVec);
119
120 m_nIgnoreLabel = m_param.loss_param.ignore_label;
121 }
122
128 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
129 {
130 base.Reshape(colBottom, colTop);
131 m_nOuterNum = colBottom[0].shape(0); // batch size
132 m_nInnerNum = colBottom[0].count(1); // instance size: |output| == |target|
133
134 if (colBottom[0].count() != colBottom[1].count())
135 {
136 if (colBottom[1].count() != colBottom[0].num)
137 m_log.FAIL("SIGMOID_CROSS_ENTROPY_LOSS layer inputs must have the same count, or the target must have 'num' items of indexes.");
138
139 // Set the label at the target index = 1.0
140 if (m_blobTarget == null)
141 {
142 m_blobTarget = new Blob<T>(m_cuda, m_log);
143 m_blobTarget.Name = "full_label";
144 }
145
146 m_blobTarget.ReshapeLike(colBottom[0]);
147 }
148
149 m_sigmoidLayer.Reshape(m_colSigmoidBottomVec, m_colSigmoidTopVec);
150 m_blobLoss.ReshapeLike(colBottom[0]);
151 }
152
174 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
175 {
176 // Set the target data.
177 if (m_blobTarget != null)
178 {
179 m_log.CHECK_EQ(colBottom[0].num, colBottom[1].count(), "SIGMOID_CROSS_ENTROPY_LOSS layer inputs must have the same count, or the target must have 'num' items of indexes.");
180 m_blobTarget.SetData(0);
181
182 float[] rgfTarget = convertF(colBottom[1].mutable_cpu_data);
183 for (int i = 0; i < colBottom[1].num; i++)
184 {
185 int nTargetIdx = (int)rgfTarget[i];
186 m_blobTarget.SetData(1.0, m_nInnerNum * i + nTargetIdx);
187 }
188 }
189
190 // The forward pass computes the sigmoid outputs.
191 m_colSigmoidBottomVec[0] = colBottom[0];
192 m_sigmoidLayer.Forward(m_colSigmoidBottomVec, m_colSigmoidTopVec);
193
194 // Compute the loss (negative log likelihood)
195 int nCount = colBottom[0].count();
196
197 // Stable version of loss computation for input data.
198 long hInputData = colBottom[0].gpu_data;
199 long hTarget = (m_blobTarget != null) ? m_blobTarget.gpu_data : colBottom[1].gpu_data;
200
201 // Since this memory is not used for anything, we use it here to avoid having
202 // to allocate the GPU memory to accumulate intermediate results.
203 long hLossData = colBottom[0].mutable_gpu_diff;
204 long hCountData = (m_blobTarget != null) ? m_blobTarget.mutable_gpu_diff : colBottom[1].mutable_gpu_diff;
205
206 m_cuda.sigmoid_cross_entropy_fwd(nCount, hInputData, hTarget, hLossData, m_nIgnoreLabel.HasValue, m_nIgnoreLabel.GetValueOrDefault(-1), hCountData);
207
208 double dfValidCount = nCount;
209 // Only launch another CUDA kernel if we actually need the valid count.
210 if (m_normalization == LossParameter.NormalizationMode.VALID && m_nIgnoreLabel.HasValue)
211 dfValidCount = m_cuda.asum_double(nCount, hCountData);
212
213 double dfLoss = m_cuda.asum_double(nCount, hLossData);
214 m_dfNormalizer = get_normalizer(m_normalization, (int)dfValidCount);
215
216 colTop[0].SetData(dfLoss / m_dfNormalizer, 0);
217
218 // Return the losses in colTop[1] if it exists.
219 if (colTop.Count == 2)
220 {
221 m_cuda.copy(nCount, hLossData, m_blobLoss.mutable_gpu_data);
222 colTop[1].ShareData(m_blobLoss);
223 }
224
225 // Clear scratch memory to prevent interfering with the backward pass (see #6202)
226 colBottom[0].SetDiff(0);
227 colBottom[1].SetDiff(0);
228
229 if (m_blobTarget != null)
230 m_blobTarget.SetDiff(0);
231 }
232
265 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
266 {
267 if (rgbPropagateDown[1])
268 m_log.FAIL(m_type.ToString() + " Layer cannot backpropagate to label inputs.");
269
270 if (rgbPropagateDown[0])
271 {
272 // First, compute the diff.
273 int nCount = colBottom[0].count();
274 long hSigmoidOutputData = m_blobSigmoidOutput.gpu_data;
275 long hTarget = (m_blobTarget != null) ? m_blobTarget.gpu_data : colBottom[1].gpu_data;
276 long hBottomDiff = colBottom[0].mutable_gpu_diff;
277
278 m_cuda.copy(nCount, hSigmoidOutputData, hBottomDiff);
279 m_cuda.axpy(nCount, convert(-1.0), hTarget, hBottomDiff);
280
281 // Zero out gradient for ignored targets
282 if (m_nIgnoreLabel.HasValue)
283 m_cuda.sigmoid_cross_entropy_bwd(nCount, m_nIgnoreLabel.Value, hTarget, hBottomDiff);
284
285 // Scale down gradient
286 double dfLossWeight = convertD(colTop[0].GetDiff(0)) / m_dfNormalizer;
287 m_cuda.scal(nCount, dfLossWeight, hBottomDiff);
288 }
289 }
290 }
291}
The Log class provides general output in text form.
Definition: Log.cs:13
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
Definition: Log.cs:394
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
void SetData(double df)
Set all blob data to the value specified.
void SetDiff(double df)
Set all blob diff to the value specified.
int Count
Returns the number of items in the collection.
void Clear(bool bDispose=false)
Remove all items from the collection.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
Definition: Blob.cs:1922
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
Definition: Blob.cs:402
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
Definition: Blob.cs:1981
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
void convert(BlobCollection< T > col)
Convert a collection of blobs from / to half size.
Definition: Layer.cs:535
float convertF(T df)
Converts a generic to a float value.
Definition: Layer.cs:1359
double convertD(T df)
Converts a generic to a double value.
Definition: Layer.cs:1349
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
The LossLayer provides an interface for Layer's that take two blobs as input – usually (1) prediction...
Definition: LossLayer.cs:23
int m_nOuterNum
Specifies the outer num, such as the batch count (e.g. count(0, axis)). Each derivative class must se...
Definition: LossLayer.cs:39
int m_nInnerNum
Specifies the inner num, such as the channel + height + width (e.g. count(axis + 1))....
Definition: LossLayer.cs:43
virtual double get_normalizer(LossParameter.NormalizationMode normalization_mode, int nValidCount)
Returns the normalizer used to normalize the loss.
Definition: LossLayer.cs:92
LossParameter.NormalizationMode m_normalization
Specifies the normalization mode used to normalize the loss.
Definition: LossLayer.cs:35
The SigmoidCrossEntropyLayer computes the cross-entropy (logisitic) loss and is often used for predic...
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the sigmoid cross-entropy loss error gradient w.r.t. the predictions.
override void dispose()
Releases all GPU and host resources used by the Layer.
SigmoidCrossEntropyLossLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The SigmoidCrossEntropyLayer constructor.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs as variable.
override int MinTopBlobs
Returns the minimum number of required top (output) Blobs: loss.
override int MaxTopBlobs
Returns the maximum number of required top (output) Blobs: loss, loss values
The SigmoidLayer is a neuron layer that calculates the sigmoid function, a classc choice for neural n...
Definition: SigmoidLayer.cs:28
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
List< double > loss_weight
Specifies the loss weight.
LayerType
Specifies the layer type.
override string ToString()
Returns a string representation of the LayerParameter.
LossParameter loss_param
Returns the parameter set when initialized with LayerType.LOSS
virtual LayerParameter Clone(bool bCloneBlobs)
Creates a new copy of this instance of the parameter.
Stores the parameters used by loss layers.
NormalizationMode
How to normalize the loss for loss layers that aggregate across batches, spatial dimensions,...
int? ignore_label
If specified, the ignore instances with the given label.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12