MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
NLLLossLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Runtime.InteropServices;
5using System.Text;
6using MyCaffe.basecode;
7using MyCaffe.common;
8using MyCaffe.param;
9
10namespace MyCaffe.layers.gpt
11{
22 public class NLLLossLayer<T> : LossLayer<T>
23 {
24 Blob<T> m_blobProb;
25 int m_nAxis;
26 int? m_nIgnoreLabel = null;
27
41 : base(cuda, log, p)
42 {
44 m_blobProb = new Blob<T>(m_cuda, m_log);
45 }
46
48 protected override void dispose()
49 {
50 dispose(ref m_blobProb);
51 base.dispose();
52 }
53
55 protected override void setup_internal_blobs(BlobCollection<T> col)
56 {
57 if (col.Count > 0)
58 return;
59 }
60
64 public override int ExactNumTopBlobs
65 {
66 get { return -1; }
67 }
68
72 public override int MinTopBlobs
73 {
74 get { return 1; }
75 }
76
80 public override int MaxTopBlobs
81 {
82 get { return 2; }
83 }
84
90 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
91 {
92 base.LayerSetUp(colBottom, colTop);
93 m_nIgnoreLabel = m_param.loss_param.ignore_label;
94 }
95
101 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
102 {
103 base.Reshape(colBottom, colTop);
104
105 m_blobProb.ReshapeLike(colBottom[0]);
106 m_nAxis = colBottom[0].CanonicalAxisIndex(m_param.nll_loss_param.axis);
107 m_nOuterNum = colBottom[0].count(0, m_nAxis);
108 m_nInnerNum = colBottom[0].count(m_nAxis + 1);
109
110 if (!m_bIgnoreLabels)
111 m_log.CHECK_EQ(m_nOuterNum * m_nInnerNum, colBottom[1].count(), "Number of labels must match number of predictions; e.g., if nll axis == 1 and prediction shape is (N, C, H, W), label count (number of labels) must be N*H*W, with integer values in {0, 1, ..., C-1}.");
112 }
113
131 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
132 {
133 long hProbData = colBottom[0].gpu_data;
134 long hLabel = colBottom[1].gpu_data;
135 int nDim = colBottom[0].count() / m_nOuterNum;
136 int nCount = m_nOuterNum * m_nInnerNum;
137
138 // Since this memory is not used for anything, we use it here to avoid having
139 // to allocate new GPU memory to accumulate intermediate results.
140 long hLossData = colBottom[0].mutable_gpu_diff;
141
142 // Similarly, this memory is never used elsewhere, and thus we can use it
143 // to avoid having to allocate additional GPU memory.
144 long hCounts = m_blobProb.mutable_gpu_diff;
145
146 m_cuda.nllloss_fwd(nCount, hProbData, hLabel, hLossData, m_nOuterNum, nDim, m_nInnerNum, hCounts, m_nIgnoreLabel);
147 T fLoss = m_cuda.asum(nCount, hLossData);
148 double dfValidCount = -1;
149
150 // Only launch another cuda kernel if we actually need the count of valid
151 // outputs.
152 if (m_normalization == LossParameter.NormalizationMode.VALID && m_nIgnoreLabel.HasValue)
153 dfValidCount = convertD(m_cuda.asum(nCount, hCounts));
154
155 double dfLoss = convertD(fLoss);
156 double dfNormalizer = get_normalizer(m_normalization, (int)dfValidCount);
157 double dfFinalLoss = dfLoss / dfNormalizer;
158
159 colTop[0].SetData(dfFinalLoss, 0);
160
161 if (colTop.Count == 2)
162 colTop[1].ShareData(m_blobProb);
163
164 // Clear scratch memory to prevent with interfering with backward pass (see #602)
165 colBottom[0].SetDiff(0);
166 }
167
204 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
205 {
206 if (!rgbPropagateDown[0])
207 return;
208
209 long hBottomDiff = colBottom[0].mutable_gpu_diff;
210 long hTopData = colTop[0].gpu_data;
211
212 long hLabel = colBottom[1].gpu_data;
213 int nDim = m_blobProb.count() / m_nOuterNum;
214 int nCount = m_nOuterNum * m_nInnerNum;
215
216 // Since this memory is not used for anything else,
217 // we use to avoid allocating new GPU memory.
218 long hCounts = m_blobProb.mutable_gpu_diff;
219
220 m_cuda.nllloss_bwd(nCount, hTopData, hLabel, hBottomDiff, m_nOuterNum, nDim, m_nInnerNum, hCounts, m_nIgnoreLabel);
221
222 double dfValidCount = -1;
223
224 // Only launch another cuda kernel if we acutally need the count of valid
225 // outputs.
226 if (m_normalization == LossParameter.NormalizationMode.VALID && m_nIgnoreLabel.HasValue)
227 dfValidCount = convertD(m_cuda.asum(nCount, hCounts));
228
229 double dfTopDiff = convertD(colTop[0].GetDiff(0));
230 double dfNormalizer = get_normalizer(m_normalization, (int)dfValidCount);
231 double dfLossWeight = dfTopDiff / dfNormalizer;
232
233 m_cuda.scal(m_blobProb.count(), convert(dfLossWeight), hBottomDiff);
234 }
235 }
236}
The Log class provides general output in text form.
Definition: Log.cs:13
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
The BlobCollection contains a list of Blobs.
void SetData(double df)
Set all blob data to the value specified.
void SetDiff(double df)
Set all blob diff to the value specified.
int Count
Returns the number of items in the collection.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
void convert(BlobCollection< T > col)
Convert a collection of blobs from / to half size.
Definition: Layer.cs:535
double convertD(T df)
Converts a generic to a double value.
Definition: Layer.cs:1349
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
The LossLayer provides an interface for Layer's that take two blobs as input – usually (1) prediction...
Definition: LossLayer.cs:23
bool m_bIgnoreLabels
Set to true when labels are to be ignored.
Definition: LossLayer.cs:31
int m_nOuterNum
Specifies the outer num, such as the batch count (e.g. count(0, axis)). Each derivative class must se...
Definition: LossLayer.cs:39
int m_nInnerNum
Specifies the inner num, such as the channel + height + width (e.g. count(axis + 1))....
Definition: LossLayer.cs:43
virtual double get_normalizer(LossParameter.NormalizationMode normalization_mode, int nValidCount)
Returns the normalizer used to normalize the loss.
Definition: LossLayer.cs:92
LossParameter.NormalizationMode m_normalization
Specifies the normalization mode used to normalize the loss.
Definition: LossLayer.cs:35
Computes the nll loss for a one-of-many classification task, passing real-valued predictions (from a ...
Definition: NLLLossLayer.cs:23
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
Definition: NLLLossLayer.cs:55
NLLLossLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
Constructor.
Definition: NLLLossLayer.cs:40
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
The forward computation.
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
Definition: NLLLossLayer.cs:90
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override int MinTopBlobs
Returns the minimum number of required top (output) Blobs: loss.
Definition: NLLLossLayer.cs:73
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the nll loss error gradient w.r.t the predictions.
override void dispose()
Releases all GPU and host resources used by the Layer.
Definition: NLLLossLayer.cs:48
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs as variable.
Definition: NLLLossLayer.cs:65
override int MaxTopBlobs
Returns the maximum number of required top (output) Blobs: loss, labels
Definition: NLLLossLayer.cs:81
Specifies the base parameter for all layers.
NLLLossParameter nll_loss_param
Returns the parameter set when initialized with LayerType.NLL_LOSS
LayerType
Specifies the layer type.
LossParameter loss_param
Returns the parameter set when initialized with LayerType.LOSS
Stores the parameters used by loss layers.
NormalizationMode
How to normalize the loss for loss layers that aggregate across batches, spatial dimensions,...
int? ignore_label
If specified, the ignore instances with the given label.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe.layers.gpt namespace contains all GPT related layers.
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12