MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
ContrastiveLossLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using MyCaffe.basecode;
6using MyCaffe.common;
7using MyCaffe.param;
8
9namespace MyCaffe.layers
10{
33 public class ContrastiveLossLayer<T> : LossLayer<T>
34 {
35 Blob<T> m_blobDiff; // cached for backward pass.
36 Blob<T> m_blobDistSq; // cached for backward pass.
37 Blob<T> m_blobDiffSq; // cached for backward pass.
38 Blob<T> m_blobSummerVec; // tmp storage for gpu forward pass.
39 Blob<T> m_blobSimilar; // tmp storage for backward pass.
40 Blob<T> m_blobDistScale; // tmp storage for distance scale applied ot matching distances (optional).
41 Blob<T> m_blobPrimary; // target of similar, or dissimilar image.
42 T[] m_rgMatches = null;
43 int m_nIteration = 0;
44 int m_nCentroidNotification = 10;
45
59 : base(cuda, log, p)
60 {
61 m_type = LayerParameter.LayerType.CONTRASTIVE_LOSS;
62
63 m_blobDiff = new Blob<T>(cuda, log, false);
64 m_blobDiff.Name = m_param.name + " diff";
65 m_blobDistSq = new Blob<T>(cuda, log, false);
66 m_blobDistSq.Name = m_param.name + " distsq";
67 m_blobDiffSq = new Blob<T>(cuda, log, false);
68 m_blobDiffSq.Name = m_param.name + " diffsq";
69 m_blobSummerVec = new Blob<T>(cuda, log, false);
70 m_blobSummerVec.Name = m_param.name + " sum";
71 m_blobSimilar = new Blob<T>(cuda, log, false);
72 m_blobSimilar.Name = m_param.name + " similar";
73 m_blobDistScale = new Blob<T>(cuda, log, false);
74 m_blobDistScale.Name = m_param.name + " dist scale";
75 m_blobPrimary = new Blob<T>(cuda, log, false);
76 m_blobPrimary.Name = m_param.name + " primary";
77 }
78
80 protected override void dispose()
81 {
82 base.dispose();
83
84 if (m_blobDiff != null)
85 {
86 m_blobDiff.Dispose();
87 m_blobDiff = null;
88 }
89
90 if (m_blobDistSq != null)
91 {
92 m_blobDistSq.Dispose();
93 m_blobDistSq = null;
94 }
95
96 if (m_blobDiffSq != null)
97 {
98 m_blobDiffSq.Dispose();
99 m_blobDiffSq = null;
100 }
101
102 if (m_blobSummerVec != null)
103 {
104 m_blobSummerVec.Dispose();
105 m_blobSummerVec = null;
106 }
107
108 if (m_blobSimilar != null)
109 {
110 m_blobSimilar.Dispose();
111 m_blobSimilar = null;
112 }
113
114 if (m_blobDistScale != null)
115 {
116 m_blobDistScale.Dispose();
117 m_blobDistScale = null;
118 }
119
120 if (m_blobPrimary != null)
121 {
122 m_blobPrimary.Dispose();
123 m_blobPrimary = null;
124 }
125 }
126
130 public override int ExactNumBottomBlobs
131 {
132 get { return -1; }
133 }
134
138 public override int MinBottomBlobs
139 {
140 get { return 3; }
141 }
142
150 public override int MaxBottomBlobs
151 {
152 get { return 4; }
153 }
154
158 public override int ExactNumTopBlobs
159 {
160 get { return -1; }
161 }
162
166 public override int MinTopBlobs
167 {
168 get { return 1; }
169 }
170
174 public override int MaxTopBlobs
175 {
176 get { return 2; }
177 }
178
183 public override bool AllowForceBackward(int nBottomIdx)
184 {
185 if (nBottomIdx != 2)
186 return true;
187
188 return false;
189 }
190
196 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
197 {
198 base.LayerSetUp(colBottom, colTop);
199
200 m_nIteration = 0;
201
202 m_log.CHECK_EQ(colBottom[0].channels, colBottom[1].channels, "the bottom[0] and bottom[1] should have equal channel values.");
203 m_log.CHECK_EQ(1, colBottom[0].height, "The bottom[0] should have height = 1.");
204 m_log.CHECK_EQ(1, colBottom[0].width, "The bottom[0] should have width = 1.");
205 m_log.CHECK_EQ(1, colBottom[1].height, "The bottom[1] should have height = 1.");
206 m_log.CHECK_EQ(1, colBottom[1].width, "The bottom[1] should have width = 1.");
207 m_log.CHECK_GE(colBottom[2].channels, 1, "The bottom[2] should have channels >= 1.");
208 m_log.CHECK_LE(colBottom[2].channels, 3, "The bottom[2] should have channels <= 3.");
209 m_log.CHECK_EQ(1, colBottom[2].height, "The bottom[2] should have height = 1.");
210 m_log.CHECK_EQ(1, colBottom[2].width, "The bottom[2] should have width = 1.");
211 m_nCentroidNotification = 10;
212 }
213
219 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
220 {
221 base.Reshape(colBottom, colTop);
222
223 m_blobDiff.Reshape(colBottom[0].num, colBottom[0].channels, 1, 1);
224 m_blobDiffSq.Reshape(colBottom[0].num, colBottom[0].channels, 1, 1);
225 m_blobDistSq.Reshape(colBottom[0].num, 1, 1, 1);
226 // vector of ones used to sum along channels.
227 m_blobSummerVec.Reshape(colBottom[0].channels, 1, 1, 1);
228 m_blobSummerVec.SetData(1.0);
229 m_blobPrimary.ReshapeLike(colBottom[0]);
230
232 {
233 if (m_rgMatches == null || m_rgMatches.Length != colBottom[0].num)
234 m_rgMatches = new T[colBottom[0].num];
235
236 colTop[1].Reshape(colBottom[0].num, 1, 1, 1);
237 }
238
239 m_blobSimilar.Reshape(colBottom[0].num, 1, 1, 1);
240 m_blobDistScale.Reshape(colBottom[0].num, 1, 1, 1);
241 }
242
265 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
266 {
267 int nCount = colBottom[0].count();
268
269 // Label data is in on of two forms:
270 if (colBottom[2].channels > 1)
271 {
272 // channel > 1: the direct label values of each image packed into the data channels.
273 m_cuda.channel_compare(colBottom[2].count(), colBottom[2].num, colBottom[2].channels, 1, colBottom[2].gpu_data, m_blobSimilar.mutable_gpu_data);
274 }
275 else
276 {
277 // channel = 1: the direct similarity where 1 = the same, and 0 = different.
278 m_cuda.copy(colBottom[2].count(), colBottom[2].gpu_data, m_blobSimilar.mutable_gpu_data);
279 }
280
281 // When using centroid learning, the centroids from the DecodeLayer are only filled after they are fully
282 // calculated - prior to full calculations, the centriods are set to 0.
283 bool bCentroidLearningEnabled = false;
284 bool bUseCentroidLearning = false;
286 {
287 bCentroidLearningEnabled = true;
288 T fAsum = colBottom[3].asum_data();
289 double dfAsum = convertD(fAsum);
290 if (dfAsum > 0)
291 bUseCentroidLearning = true;
292 }
293
294 if (bUseCentroidLearning)
295 {
296 m_log.CHECK_EQ(colBottom.Count, 4, "When using centroid learning, a fourth bottom is required that contains the class centroids (calculated by the DecodeLayer).");
297 m_log.CHECK_EQ(colBottom[3].channels, colBottom[0].channels, "Each centroid should have the same size as each encoding.");
298 m_log.CHECK_EQ(colBottom[3].height, 1, "The centroids should have a height = 1.");
299 m_log.CHECK_EQ(colBottom[3].width, 1, "The centroids should have a width = 1.");
300 m_log.CHECK_EQ(colBottom[2].channels, 2, "The colBottom[2] must contain labels, not a similarity value - make sure the data layer has 'output_all_labels' = True.");
301
302 // Load the target with the centroids to match the labels received in colBottom(2) - only use the first label of the two.
303 int nEncodingDim = m_blobPrimary.count(1);
304 int nLabelDim = colBottom[2].count(1);
305
306 // Fill with centroids for each 'first' label.
307 m_cuda.channel_fill(m_blobPrimary.count(), m_blobPrimary.num, nEncodingDim, 1, colBottom[3].gpu_data, nLabelDim, colBottom[2].gpu_data, m_blobPrimary.mutable_gpu_data);
308
309 // If using centroid learning; for similar pairs, copy the centroids from colBottom[3], otherwise copy the colBottom[0] dissimilar encodings.
311 m_cuda.copy(m_blobPrimary.count(), m_blobPrimary.num, m_blobPrimary.count(1), m_blobPrimary.gpu_data, colBottom[0].gpu_data, m_blobPrimary.mutable_gpu_data, m_blobSimilar.gpu_data); // centroid used for matching, use bottom[0] for all NON matching
312
313 // If using centroid learning; for non-similar pairs, copy the centroids from colBottom[3], otherwise copy the colBottom[0] dissimilar encodings.
315 m_cuda.copy(m_blobPrimary.count(), m_blobPrimary.num, m_blobPrimary.count(1), m_blobPrimary.gpu_data, colBottom[0].gpu_data, m_blobPrimary.mutable_gpu_data, m_blobSimilar.gpu_data, true); // centroid used for NON matching, use bottom[0] for all matching.
316
317 if (m_nCentroidNotification > 0)
318 {
319 m_log.WriteLine("INFO: Centroid learning ON.");
320 m_nCentroidNotification--;
321 }
322 }
323 else
324 {
325 // If not using centroid learning, just use the bottom[1] as is.
326 m_cuda.copy(m_blobPrimary.count(), colBottom[0].gpu_data, m_blobPrimary.mutable_gpu_data);
327 }
328
329 bool bLegacyVersion = m_param.contrastive_loss_param.legacy_version;
330 double dfMargin = m_param.contrastive_loss_param.margin;
331 float[] rgSimPairs = Utility.ConvertVecF<T>(m_blobSimilar.update_cpu_data());
332 float[] rgDist = null;
333 double dfLoss = 0;
334
336 {
337 // Manhattan Distance uses legacy calculation.
338 bLegacyVersion = true;
339
340 Blob<T> blobAbsDiff = m_blobDiffSq;
341 Blob<T> blobDist = m_blobDistSq;
342
343 m_cuda.sub(nCount,
344 m_blobPrimary.gpu_data, // a
345 colBottom[1].gpu_data, // b
346 m_blobDiff.mutable_gpu_data); // a_i - b_i
347
348 m_cuda.abs(nCount,
349 m_blobDiff.gpu_data, // a_i - b_i
350 blobAbsDiff.mutable_gpu_data); // |a_i - b_i|
351
352 m_cuda.gemv(false,
353 m_blobPrimary.num,
354 m_blobPrimary.channels,
355 1.0,
356 blobAbsDiff.gpu_data, // |a_i - b_i|
357 m_blobSummerVec.gpu_data,
358 0.0,
359 blobDist.mutable_gpu_data); // \Sum |a_i - b_i|
360
361 if ((bUseCentroidLearning || !bCentroidLearningEnabled) && m_param.contrastive_loss_param.matching_distance_scale != 1.0)
362 {
363 m_cuda.scale(m_blobDistScale.count(), m_param.contrastive_loss_param.matching_distance_scale - 1.0, m_blobSimilar.gpu_data, m_blobDistScale.mutable_gpu_data);
364 m_cuda.add_scalar(m_blobDistScale.count(), 1.0, m_blobDistScale.mutable_gpu_data);
365 m_cuda.mul(blobDist.count(), blobDist.gpu_data, m_blobDistScale.gpu_data, blobDist.mutable_gpu_data);
366 }
367
368 rgDist = Utility.ConvertVecF<T>(blobDist.update_cpu_data());
369 }
370 else // default = EUCLIDEAN
371 {
372 m_cuda.sub(nCount,
373 m_blobPrimary.gpu_data, // a
374 colBottom[1].gpu_data, // b
375 m_blobDiff.mutable_gpu_data); // a_i - b_i
376
377 m_cuda.powx(nCount,
378 m_blobDiff.mutable_gpu_data, // a_i - b_i
379 2.0,
380 m_blobDiffSq.mutable_gpu_data); // (a_i - b_i)^2
381
382 m_cuda.gemv(false,
383 m_blobPrimary.num,
384 m_blobPrimary.channels,
385 1.0,
386 m_blobDiffSq.gpu_data, // (a_i - b_i)^2
387 m_blobSummerVec.gpu_data,
388 0.0,
389 m_blobDistSq.mutable_gpu_data); // \Sum (a_i - b_i)^2
390
391 if ((bUseCentroidLearning || !bCentroidLearningEnabled) && m_param.contrastive_loss_param.matching_distance_scale != 1.0)
392 {
393 m_cuda.scale(m_blobDistScale.count(), m_param.contrastive_loss_param.matching_distance_scale - 1.0, m_blobSimilar.gpu_data, m_blobDistScale.mutable_gpu_data);
394 m_cuda.add_scalar(m_blobDistScale.count(), 1.0, m_blobDistScale.mutable_gpu_data);
395 m_cuda.mul(m_blobDistSq.count(), m_blobDistSq.gpu_data, m_blobDistScale.gpu_data, m_blobDistSq.mutable_gpu_data);
396 }
397
398 rgDist = Utility.ConvertVecF<T>(m_blobDistSq.update_cpu_data());
399 }
400
401 for (int i = 0; i < colBottom[0].num; i++)
402 {
403 double dfDist = (bLegacyVersion) ? dfMargin - rgDist[i] : dfMargin - Math.Sqrt(rgDist[i]);
404 bool bSimilar = (rgSimPairs[i] == 0) ? false : true;
405
406 if (bSimilar) // similar pairs
407 {
408 if (m_rgMatches != null)
409 {
410 if (dfDist >= 0)
411 m_rgMatches[i] = m_tOne;
412 else
413 m_rgMatches[i] = m_tZero;
414 }
415
416 dfLoss += rgDist[i];
417 }
418 else // dissimilar pairs
419 {
420 if (m_rgMatches != null)
421 {
422 if (dfDist >= 0)
423 m_rgMatches[i] = m_tZero;
424 else
425 m_rgMatches[i] = m_tOne;
426 }
427
428 dfDist = Math.Max(dfDist, 0);
429
430 if (bLegacyVersion)
431 dfLoss += dfDist;
432 else
433 dfLoss += dfDist * dfDist;
434 }
435 }
436
437 dfLoss = dfLoss / (double)colBottom[0].num / 2.0;
438 colTop[0].SetData(dfLoss, 0);
439
440 if (colTop.Count > 1 && m_rgMatches != null)
441 colTop[1].mutable_cpu_data = m_rgMatches;
442
443 if (m_phase == Phase.TRAIN)
444 m_nIteration++;
445 }
446
476 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
477 {
478 if (!rgbPropagateDown[0] && !rgbPropagateDown[1])
479 return;
480
481 double dfTopDiff = convertD(colTop[0].GetDiff(0)) / colBottom[0].num;
482
483 m_log.CHECK_GT(m_blobSimilar.gpu_data, 0, "The similar data is not initialized - you must first run the forward pass under the Phase = TRAIN.");
484
485 for (int i = 0; i < 2; i++)
486 {
487 if (rgbPropagateDown[i])
488 {
489 int nCount = colBottom[0].count();
490 int nChannels = colBottom[0].channels;
491 double dfMargin = m_param.contrastive_loss_param.margin;
492 bool bLegacyVersion = m_param.contrastive_loss_param.legacy_version;
493 double dfSign = (i == 0) ? 1 : -1;
494 double dfAlpha = dfSign * dfTopDiff;
495
496 m_cuda.cll_bwd(nCount,
497 nChannels,
498 dfMargin,
499 bLegacyVersion,
500 dfAlpha,
501 m_blobSimilar.gpu_data, // pair similarity 0 or 1
502 m_blobDiff.gpu_data, // the cached eltwise difference between a and b
503 m_blobDistSq.gpu_data, // the cached square distance between a and b
504 colBottom[i].mutable_gpu_diff);
505 }
506 }
507 }
508 }
509}
The Log class provides general output in text form.
Definition: Log.cs:13
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
void CHECK_GT(double df1, double df2, string str)
Test whether one number is greater than another.
Definition: Log.cs:299
void CHECK_LE(double df1, double df2, string str)
Test whether one number is less than or equal to another.
Definition: Log.cs:263
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
Definition: Log.cs:287
The Utility class provides general utility funtions.
Definition: Utility.cs:35
The BlobCollection contains a list of Blobs.
void SetData(double df)
Set all blob data to the value specified.
int Count
Returns the number of items in the collection.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
Definition: Blob.cs:800
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
Definition: Blob.cs:1922
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442
T[] update_cpu_data()
Update the CPU data by transferring the GPU data over to the Host.
Definition: Blob.cs:1470
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
Definition: Blob.cs:402
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
Definition: Blob.cs:792
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
The ContrastiveLossLayer computes the contrastive loss where . This layer is initialized with the My...
override int MinBottomBlobs
Returns the minumum number of bottom blobs: featA, featB, label
override int MaxTopBlobs
Specifies the maximum number of required top (output) Blobs: loss, matches
override int MinTopBlobs
Specifies the minimum number of required top (output) Blobs: loss
override bool AllowForceBackward(int nBottomIdx)
Unlike most loss layers, in the ContrastiveLossLayer we can backpropagate to the first two inputs.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override void dispose()
Releases all GPU and host resources used by the Layer.
override int ExactNumBottomBlobs
Returns -1 specifying a variable number of bottoms
ContrastiveLossLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The ContrastiveLossLayer constructor.
override int ExactNumTopBlobs
Returns -1 specifying a variable number of tops.
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
The forward computation.
override int MaxBottomBlobs
Returns the minumum number of bottom blobs: featA, featB, label, centroids
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the infogain loss error gradient w.r.t the inputs.
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
T m_tZero
Specifies a generic type equal to 0.0.
Definition: Layer.cs:76
T m_tOne
Specifies a generic type equal to 1.0.
Definition: Layer.cs:72
double convertD(T df)
Converts a generic to a double value.
Definition: Layer.cs:1349
Phase m_phase
Specifies the Phase under which the Layer is run.
Definition: Layer.cs:51
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
The LossLayer provides an interface for Layer's that take two blobs as input – usually (1) prediction...
Definition: LossLayer.cs:23
Specifies the parameters for the ContrastiveLossLayer.
bool output_matches
Optionally, specifies to output match information (default = false).
bool legacy_version
The first implementation of this cost did not exactly match the cost of Hadsell et al 2006 – using (m...
double matching_distance_scale
Optionally, specifies the scale applied to the matching distance when calculating the loss (default =...
CENTROID_LEARNING centroid_learning
Optionally, specifies to use centroid learning as soon as the centroids (from the DecodeLayer) are re...
double margin
Margin for dissimilar pair.
DISTANCE_CALCULATION distance_calculation
Optionally, specifies the distance calculation to use when calculating the distance between encoding ...
DISTANCE_CALCULATION
Defines the distance calculation to use.
CENTROID_LEARNING
Defines the type of centroid learning to use.
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
ContrastiveLossParameter contrastive_loss_param
Returns the parameter set when initialized with LayerType.CONTRASTIVE_LOSS
LayerType
Specifies the layer type.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
Phase
Defines the Phase under which to run a Net.
Definition: Interfaces.cs:61
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12