MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
QuantileLossLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.IO;
5using System.Linq;
6using System.Text;
7using MyCaffe.basecode;
8using MyCaffe.common;
9using MyCaffe.param;
10
11namespace MyCaffe.layers
12{
26 public class QuantileLossLayer<T> : LossLayer<T>
27 {
28 List<int> m_rgShape = new List<int>(4);
29 int m_nCount;
30 int m_nChannels;
31 Blob<T> m_blobTargetsFull;
32 Blob<T> m_blobErrors;
33 Blob<T> m_blobQuantile1;
34 Blob<T> m_blobQuantile2;
35 Blob<T> m_blobDesiredQuantiles;
36 Blob<T> m_blobLoss;
37 Blob<T> m_blobLossSum;
38 Blob<T> m_blobLossSumMean;
39 Blob<T> m_blobWork;
40
48 : base(cuda, log, p)
49 {
50 m_type = LayerParameter.LayerType.QUANTILE_LOSS;
51
52 m_blobErrors = new Blob<T>(cuda, log);
53 m_blobErrors.Name = m_param.name + ".diff";
54 m_blobTargetsFull = new Blob<T>(cuda, log);
55 m_blobTargetsFull.Name = m_param.name + ".trgtfull";
56 m_blobQuantile1 = new Blob<T>(cuda, log);
57 m_blobQuantile1.Name = m_param.name + ".qtl1";
58 m_blobQuantile2 = new Blob<T>(cuda, log);
59 m_blobQuantile2.Name = m_param.name + ".qtl2";
60 m_blobDesiredQuantiles = new Blob<T>(cuda, log);
61 m_blobDesiredQuantiles.Name = m_param.name + ".desqtl";
62 m_blobLoss = new Blob<T>(cuda, log);
63 m_blobLoss.Name = m_param.name + ".loss";
64 m_blobLossSum = new Blob<T>(cuda, log);
65 m_blobLossSum.Name = m_param.name + ".losssum";
66 m_blobLossSumMean = new Blob<T>(cuda, log);
67 m_blobLossSumMean.Name = m_param.name + ".losssum.mean";
68 m_blobWork = new Blob<T>(m_cuda, m_log);
69 m_blobWork.Name = m_param.name + ".work";
70 }
71
73 protected override void dispose()
74 {
75 dispose(ref m_blobErrors);
76 dispose(ref m_blobQuantile1);
77 dispose(ref m_blobQuantile2);
78 dispose(ref m_blobTargetsFull);
79 dispose(ref m_blobDesiredQuantiles);
80 dispose(ref m_blobLoss);
81 dispose(ref m_blobLossSum);
82 dispose(ref m_blobLossSumMean);
83 dispose(ref m_blobWork);
84
85 base.dispose();
86 }
87
91 public override int ExactNumTopBlobs
92 {
93 get { return -1; }
94 }
95
99 public override int MinTopBlobs
100 {
101 get { return 1; }
102 }
103
107 public override int MaxTopBlobs
108 {
109 get { return 2; }
110 }
111
118 public override bool AllowForceBackward(int nBottomIdx)
119 {
120 return true;
121 }
122
128 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
129 {
130 base.LayerSetUp(colBottom, colTop);
131
132 List<int> rgShape = new List<int>(1);
133 rgShape.Add(m_param.quantile_loss_param.desired_quantiles.Count);
134 m_blobDesiredQuantiles.Reshape(rgShape);
135
136 float[] rgDeqQtl1 = new float[m_param.quantile_loss_param.desired_quantiles.Count];
137 float[] rgDeqQtl2 = new float[m_param.quantile_loss_param.desired_quantiles.Count];
138
139 for (int i = 0; i < rgDeqQtl1.Length; i++)
140 {
141 rgDeqQtl1[i] = m_param.quantile_loss_param.desired_quantiles[i];
142 rgDeqQtl2[i] = rgDeqQtl1[i] - 1;
143 }
144
145 m_blobDesiredQuantiles.mutable_cpu_data = convert(rgDeqQtl1);
146 m_blobDesiredQuantiles.mutable_cpu_diff = convert(rgDeqQtl2);
147
148
149 }
150
156 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
157 {
158 base.Reshape(colBottom, colTop);
159
160 int nAxes = colBottom[0].num_axes;
161 m_nCount = colBottom[0].count();
162 m_nOuterNum = colBottom[0].num;
163 m_nChannels = (nAxes == 2) ? 1 : colBottom[0].channels;
164 m_nInnerNum = (nAxes == 2) ? colBottom[0].channels : colBottom[0].count(2);
165
166 m_log.CHECK_EQ(colBottom[0].num, colBottom[1].num, "Input and target must have same 'num' size.");
167 m_log.CHECK_EQ(colBottom[0].channels, colBottom[1].channels, "Input and target must have same 'channel' size.");
168 m_log.CHECK_EQ(colBottom[0].height, colBottom[1].height * m_param.quantile_loss_param.desired_quantiles.Count, "Input must have 'desired_quantile.Count' * target 'height' size.");
169
170 m_blobErrors.ReshapeLike(colBottom[0]);
171 m_blobTargetsFull.ReshapeLike(colBottom[0]);
172 m_blobQuantile1.ReshapeLike(colBottom[0]);
173 m_blobQuantile2.ReshapeLike(colBottom[0]);
174 m_blobLoss.ReshapeLike(colBottom[0]);
175 m_blobWork.ReshapeLike(colBottom[0]);
176
177 m_rgShape.Clear();
178 m_rgShape.Add(m_nOuterNum);
179 m_rgShape.Add(m_nChannels);
180 m_blobLossSum.Reshape(m_rgShape);
181
182 m_rgShape.Clear();
183 m_rgShape.Add(m_nOuterNum);
184 m_blobLossSumMean.Reshape(m_rgShape);
185
186 m_rgShape.Clear();
187 m_rgShape.Add(1);
188 colTop[0].Reshape(m_rgShape);
189
190 if (colTop.Count > 1)
191 {
192 m_rgShape[0] = m_nChannels;
193 colTop[1].Reshape(m_rgShape);
194 }
195 }
196
212 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
213 {
214 // Fill the targets accross all output quantiles.
215 m_cuda.channel_fillfrom(m_nCount, m_nOuterNum, m_nChannels, m_nInnerNum, colBottom[1].gpu_data, m_blobTargetsFull.mutable_gpu_data, DIR.FWD);
216
217 // Compute the actual error between the observed target and each predicted quantile
218 m_cuda.sub(m_nCount, m_blobTargetsFull.gpu_data, colBottom[0].gpu_data, m_blobErrors.mutable_gpu_data);
219
220 // Compute the loss separately for each sample, time-step, quantile
221 m_cuda.channel_copyall(m_nCount, m_nOuterNum * m_nChannels, 1, m_nInnerNum, m_blobDesiredQuantiles.gpu_diff, m_blobWork.mutable_gpu_data);
222 m_cuda.mul(m_nCount, m_blobWork.gpu_data, m_blobErrors.gpu_data, m_blobQuantile1.mutable_gpu_data);
223
224 m_cuda.channel_copyall(m_nCount, m_nOuterNum * m_nChannels, 1, m_nInnerNum, m_blobDesiredQuantiles.gpu_data, m_blobWork.mutable_gpu_data);
225 m_cuda.mul(m_nCount, m_blobWork.gpu_data, m_blobErrors.gpu_data, m_blobQuantile2.mutable_gpu_data);
226
227 m_cuda.max(m_nCount, m_blobQuantile2.gpu_data, m_blobQuantile1.gpu_data, m_blobLoss.mutable_gpu_data);
228
229 // Sum losses over the quantiles
230 m_cuda.channel_sum(m_nCount, m_nOuterNum, m_nChannels, m_nInnerNum, m_blobLoss.gpu_data, m_blobLossSum.mutable_gpu_data, false);
231
232 // Mean of Sum losses over time
233 m_cuda.channel_mean(m_blobLossSum.count(), m_nOuterNum, 1, m_nChannels, m_blobLossSum.gpu_data, m_blobLossSumMean.mutable_gpu_data);
234
235 // Average across time and observations
236 double dfQLoss = m_blobLossSumMean.mean();
237 colTop[0].SetData(dfQLoss, 0);
238
239 // Calculate the q-risk for each quantile
240 if (colTop.Count > 1)
241 {
242 double dfTargetSum = convertD(colBottom[1].asum_data());
243 m_cuda.channel_sum(m_blobLossSum.count(), 1, m_nOuterNum, m_nChannels, m_blobLossSum.gpu_data, colTop[1].mutable_gpu_data, true);
244
245 colTop[1].scale_data(2.0 / dfTargetSum);
246 }
247
248 callLossEvent(m_blobLossSumMean);
249 }
250
273 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
274 {
275 if (!rgbPropagateDown[0])
276 return;
277
278 // Sum and average over quantiles, time and observations
279 double dfGrad = convertD(colTop[0].GetDiff(0));
280 m_blobLoss.SetDiff(dfGrad / (m_nOuterNum * m_nChannels));
281
282 // Compute the grad separately for each sample, time-step, quantile
283 m_cuda.max_bwd(m_nCount, m_blobQuantile2.gpu_data, m_blobQuantile1.gpu_data, m_blobLoss.gpu_diff, m_blobQuantile2.mutable_gpu_diff, m_blobQuantile1.mutable_gpu_diff);
284
285 m_cuda.channel_copyall(m_nCount, m_nOuterNum * m_nChannels, 1, m_nInnerNum, m_blobDesiredQuantiles.gpu_data, m_blobWork.mutable_gpu_data);
286 m_cuda.mul(m_nCount, m_blobWork.gpu_data, m_blobQuantile2.gpu_diff, m_blobQuantile2.mutable_gpu_diff);
287
288 m_cuda.channel_copyall(m_nCount, m_nOuterNum * m_nChannels, 1, m_nInnerNum, m_blobDesiredQuantiles.gpu_diff, m_blobWork.mutable_gpu_data);
289 m_cuda.mul(m_nCount, m_blobWork.gpu_data, m_blobQuantile1.gpu_diff, m_blobQuantile1.mutable_gpu_diff);
290
291 m_cuda.add(m_nCount, m_blobQuantile1.gpu_diff, m_blobQuantile2.gpu_diff, m_blobErrors.mutable_gpu_diff);
292
293 // Compute the actual grad between the observed target and each predicted quantile
294 m_cuda.scale(m_nCount, -1.0, m_blobErrors.gpu_diff, colBottom[0].mutable_gpu_diff);
295 }
296 }
297}
The Log class provides general output in text form.
Definition: Log.cs:13
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
The BlobCollection contains a list of Blobs.
void SetData(double df)
Set all blob data to the value specified.
int Count
Returns the number of items in the collection.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555
T[] mutable_cpu_diff
Get diff from the GPU and bring it over to the host, or Set diff from the Host and send it over to th...
Definition: Blob.cs:1511
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487
T[] mutable_cpu_data
Get data from the GPU and bring it over to the host, or Set data from the Host and send it over to th...
Definition: Blob.cs:1461
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442
double mean(float[] rgDf=null, bool bDiff=false)
Calculate the mean of the blob data.
Definition: Blob.cs:2965
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1541
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
Definition: Blob.cs:1981
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
void convert(BlobCollection< T > col)
Convert a collection of blobs from / to half size.
Definition: Layer.cs:535
double convertD(T df)
Converts a generic to a double value.
Definition: Layer.cs:1349
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
The LossLayer provides an interface for Layer's that take two blobs as input – usually (1) prediction...
Definition: LossLayer.cs:23
int m_nOuterNum
Specifies the outer num, such as the batch count (e.g. count(0, axis)). Each derivative class must se...
Definition: LossLayer.cs:39
int m_nInnerNum
Specifies the inner num, such as the channel + height + width (e.g. count(axis + 1))....
Definition: LossLayer.cs:43
void callLossEvent(Blob< T > blob)
This method is called by the loss layer to pass the blob data to the OnLoss event (if implemented)
Definition: LossLayer.cs:72
The QuantileLossLayer computes the quantile loss for real-valued regression tasks.
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs as variable.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the QuantileLoss error gradient w.r.t. the inputs.
override int MinTopBlobs
Returns the minimum number of required top (output) Blobs: loss.
override void dispose()
Releases all GPU and host resources used by the Layer.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
QuantileLossLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The QuantileLossLayer constructor
override int MaxTopBlobs
Returns the maximum number of required top (output) Blobs: loss, q_risk
override bool AllowForceBackward(int nBottomIdx)
Unlike most loss layers, in the QuantileLossLayer we can backpropagate to both inputs – override to r...
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
QuantileLossParameter quantile_loss_param
Returns the parameter set when initialized with LayerType.QUANTILE_LOSS
LayerType
Specifies the layer type.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
DIR
Defines the direction of data flow.
Definition: CudaDnn.cs:22
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12