MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
BatchNormLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using MyCaffe.basecode;
6using MyCaffe.common;
7using MyCaffe.fillers;
8using MyCaffe.param;
9
10namespace MyCaffe.layers
11{
45 public class BatchNormLayer<T> : Layer<T>
46 {
47 Blob<T> m_blobMean; // also used as save mean with cuDNN
48 Blob<T> m_blobVariance; // also used as save var with cuDNN
49 Blob<T> m_blobTemp;
50 Blob<T> m_blobXNorm;
51 bool m_bUseGlobalStats;
52 double m_dfMovingAverageFraction;
53 int m_nChannels;
54 double m_dfEps;
55
56 // extra temporary variables used to carry out sums/broadcasting using BLAS
57 Blob<T> m_blobBatchSumMultiplier;
58 Blob<T> m_blobNumByChans;
59 Blob<T> m_blobSpaitalSumMultiplier;
60
61 // cuDNN support
62 bool m_bScaleBias = false;
63 long m_hCuDnn = 0;
64 long m_hFwdBottomDesc = 0;
65 long m_hFwdTopDesc = 0;
66 long m_hBwdBottomDesc = 0;
67 long m_hBwdTopDesc = 0;
68 long m_hFwdScaleBiasMeanVarDesc = 0;
69 long m_hBwdScaleBiasMeanVarDesc = 0;
70 BATCHNORM_MODE m_mode = BATCHNORM_MODE.SPATIAL;
71 Blob<T> m_blobScaleOnes = null;
72 Blob<T> m_blobBiasZeros = null;
73 Blob<T> m_blobPrivateTop = null;
74 Blob<T> m_blobPrivateBottom = null;
75 const double CUDNN_BN_MIN_EPSILON = 1e-5;
76 int m_nIteration = 0;
77
85 : base(cuda, log, p)
86 {
88 m_blobMean = new common.Blob<T>(cuda, log);
89 m_blobMean.Name = m_param.name + " mean";
90 m_blobVariance = new common.Blob<T>(cuda, log);
91 m_blobVariance.Name = m_param.name + " variance";
92 m_blobTemp = new common.Blob<T>(cuda, log);
93 m_blobTemp.Name = m_param.name + " temp";
94 m_blobXNorm = new common.Blob<T>(cuda, log);
95 m_blobXNorm.Name = m_param.name + " xnorm";
96 m_blobBatchSumMultiplier = new common.Blob<T>(cuda, log);
97 m_blobBatchSumMultiplier.Name = m_param.name + " summult";
98 m_blobNumByChans = new common.Blob<T>(cuda, log);
99 m_blobNumByChans.Name = m_param.name + "numbychan";
100 m_blobSpaitalSumMultiplier = new common.Blob<T>(cuda, log);
101 m_blobSpaitalSumMultiplier.Name = m_param.name + "spatialsummult";
102
104 {
105 m_blobMean.Name = m_param.name + "save mean";
106 m_blobVariance.Name = m_param.name + "save var";
107
108 m_blobPrivateTop = new Blob<T>(cuda, log);
109 m_blobPrivateTop.Name = m_param.name + "private top";
110 m_blobPrivateBottom = new Blob<T>(cuda, log);
111 m_blobPrivateBottom.Name = m_param.name + "private bottom";
112 m_blobScaleOnes = new Blob<T>(cuda, log);
113 m_blobScaleOnes.Name = m_param.name + "scale ones";
114 m_blobBiasZeros = new Blob<T>(cuda, log);
115 m_blobBiasZeros.Name = m_param.name + "bias zeros";
116 }
117 }
118
120 protected override void dispose()
121 {
122 m_blobMean.Dispose();
123 m_blobVariance.Dispose();
124 m_blobTemp.Dispose();
125 m_blobXNorm.Dispose();
126 m_blobBatchSumMultiplier.Dispose();
127 m_blobNumByChans.Dispose();
128 m_blobSpaitalSumMultiplier.Dispose();
129
130
131 // CuDnn Cleanup
132 if (m_blobPrivateTop != null)
133 {
134 m_blobPrivateTop.Dispose();
135 m_blobPrivateTop = null;
136 }
137
138 if (m_blobPrivateBottom != null)
139 {
140 m_blobPrivateBottom.Dispose();
141 m_blobPrivateBottom = null;
142 }
143
144 if (m_blobScaleOnes != null)
145 {
146 m_blobScaleOnes.Dispose();
147 m_blobScaleOnes = null;
148 }
149
150 if (m_blobBiasZeros != null)
151 {
152 m_blobBiasZeros.Dispose();
153 m_blobBiasZeros = null;
154 }
155
156 if (m_hBwdBottomDesc != 0)
157 {
158 m_cuda.FreeTensorDesc(m_hBwdBottomDesc);
159 m_hBwdBottomDesc = 0;
160 }
161
162 if (m_hBwdScaleBiasMeanVarDesc != 0)
163 {
164 m_cuda.FreeTensorDesc(m_hBwdScaleBiasMeanVarDesc);
165 m_hBwdScaleBiasMeanVarDesc = 0;
166 }
167
168 if (m_hBwdTopDesc != 0)
169 {
170 m_cuda.FreeTensorDesc(m_hBwdTopDesc);
171 m_hBwdTopDesc = 0;
172 }
173
174 if (m_hFwdBottomDesc != 0)
175 {
176 m_cuda.FreeTensorDesc(m_hFwdBottomDesc);
177 m_hFwdBottomDesc = 0;
178 }
179
180 if (m_hFwdScaleBiasMeanVarDesc != 0)
181 {
182 m_cuda.FreeTensorDesc(m_hFwdScaleBiasMeanVarDesc);
183 m_hFwdScaleBiasMeanVarDesc = 0;
184 }
185
186 if (m_hFwdTopDesc != 0)
187 {
188 m_cuda.FreeTensorDesc(m_hFwdTopDesc);
189 m_hFwdTopDesc = 0;
190 }
191
192 if (m_hCuDnn != 0)
193 {
194 m_cuda.FreeCuDNN(m_hCuDnn);
195 m_hCuDnn = 0;
196 }
197
198 base.dispose();
199 }
200
202 protected override void setup_internal_blobs(BlobCollection<T> col)
203 {
204 if (col.Count > 0)
205 return;
206
207 col.Add(m_blobMean);
208 col.Add(m_blobVariance);
209
211 {
212 col.Add(m_blobPrivateBottom);
213 col.Add(m_blobPrivateTop);
214
215 if (!m_bScaleBias)
216 {
217 col.Add(m_blobScaleOnes);
218 col.Add(m_blobBiasZeros);
219 }
220 }
221 else
222 {
223 col.Add(m_blobTemp);
224 col.Add(m_blobXNorm);
225 col.Add(m_blobBatchSumMultiplier);
226 col.Add(m_blobNumByChans);
227 col.Add(m_blobSpaitalSumMultiplier);
228 }
229 }
230
234 public override int ExactNumBottomBlobs
235 {
236 get { return 1; }
237 }
238
242 public override int ExactNumTopBlobs
243 {
244 get { return 1; }
245 }
246
252 public override bool ReInitializeParameters(WEIGHT_TARGET target)
253 {
254 base.ReInitializeParameters(target);
255
256 if (target == WEIGHT_TARGET.BOTH || target == WEIGHT_TARGET.WEIGHTS)
257 {
258 for (int i = 0; i < 3; i++)
259 {
260 m_colBlobs[i].SetData(0);
261 }
262 }
263
264 return true;
265 }
266
272 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
273 {
274 bool bUseCuDnn = m_param.batch_norm_param.useCudnn();
275
276 m_dfMovingAverageFraction = m_param.batch_norm_param.moving_average_fraction;
277 m_bUseGlobalStats = (m_phase == Phase.TEST || m_phase == Phase.RUN) ? true : false;
278
280 m_bUseGlobalStats = m_param.batch_norm_param.use_global_stats.Value;
281
282 if (colBottom[0].num_axes == 1)
283 m_nChannels = 1;
284 else
285 m_nChannels = colBottom[0].shape(1);
286
287 m_dfEps = m_param.batch_norm_param.eps;
288
289 m_bScaleBias = m_param.batch_norm_param.scale_bias; // by default = false;
290 if (m_param.batch_norm_param.scale_filler != null || // implicit set.
292 m_bScaleBias = true;
293
294 if (m_bScaleBias && !bUseCuDnn)
295 m_bScaleBias = false;
296
297 if (m_colBlobs.Count > 0)
298 {
299 m_log.WriteLine("Skipping parameter initialization.");
300 }
301 else
302 {
303 List<int> rgSize = new List<int>();
304 rgSize.Add(m_nChannels);
305
306 m_colBlobs.Clear(true);
307
308 m_colBlobs.Add(new Blob<T>(m_cuda, m_log, rgSize, false)); // global mean
309 m_colBlobs[0].Name = m_param.name + "_global_mean";
310 m_colBlobs[0].type = BLOB_TYPE.INTERNAL;
311 m_colBlobs[0].SetData(0.0);
312 m_colBlobs.Add(new Blob<T>(m_cuda, m_log, rgSize, false)); // glboal var
313 m_colBlobs[1].Name = m_param.name + "_global_var";
314 m_colBlobs[1].type = BLOB_TYPE.INTERNAL;
315 m_colBlobs[1].SetData(0.0);
316 m_colBlobs.Add(new Blob<T>(m_cuda, m_log, rgSize, false)); // variance correction
317 m_colBlobs[2].Name = m_param.name + "_var_corr";
318 m_colBlobs[2].type = BLOB_TYPE.INTERNAL;
319 m_colBlobs[2].SetData(1.0);
320
321 if (m_bScaleBias)
322 {
323 m_colBlobs.Add(new Blob<T>(m_cuda, m_log, rgSize)); // scale
324 m_colBlobs[3].type = BLOB_TYPE.INTERNAL;
325 m_colBlobs[3].Name = m_param.name + "_scale";
326
328 if (fpScale == null)
329 fpScale = new FillerParameter("constant", 1.0);
330
331 Filler<T> fillerScale = Filler<T>.Create(m_cuda, m_log, fpScale);
332 fillerScale.Fill(m_colBlobs[3]);
333
334 m_colBlobs.Add(new Blob<T>(m_cuda, m_log, rgSize)); // bias
335 m_colBlobs[4].Name = m_param.name + "_bias";
336 m_colBlobs[4].type = BLOB_TYPE.INTERNAL;
337
339 if (fpBias == null)
340 fpBias = new FillerParameter("constant", 0.0);
341
342 Filler<T> fillerBias = Filler<T>.Create(m_cuda, m_log, fpBias);
343 fillerBias.Fill(m_colBlobs[4]);
344 }
345
346 m_nIteration = 0;
347 }
348
349 // Mask statistics from optimization by setting local learning rates
350 // for mean, variance, and variance correction to zero.
351 for (int i = 0; i < 3; i++)
352 {
353 if (m_param.parameters.Count == i)
354 {
355 m_param.parameters.Add(new ParamSpec(0.0, 0.0));
356 }
357 else
358 {
359 m_param.parameters[i].lr_mult = 0;
360 m_param.parameters[i].decay_mult = 0;
361 }
362 }
363
364 // Set lr for scale and bias to 1
365 if (m_bScaleBias)
366 {
367 for (int i = 3; i < 5; i++)
368 {
369 if (m_param.parameters.Count == i)
370 {
371 m_param.parameters.Add(new ParamSpec(1.0, 1.0));
372 }
373 else
374 {
375 m_param.parameters[i].lr_mult = 1;
376 m_param.parameters[i].decay_mult = 1;
377 }
378 }
379 }
380
382 return;
383
384 //-----------------------------------
385 // Handle cuDNN setup
386 //-----------------------------------
387
388 // Setup the convert to half flags used by the Layer just before calling forward and backward.
390
391 int nChannels = colBottom[0].channels;
392 List<int> rgShape = new List<int>() { 1, nChannels, 1, 1 };
393
394 if (!m_bScaleBias)
395 {
396 m_blobScaleOnes.Reshape(rgShape);
397 m_blobScaleOnes.SetData(1.0);
398 m_blobBiasZeros.Reshape(rgShape);
399 m_blobBiasZeros.SetData(0.0);
400 }
401
402 m_hCuDnn = m_cuda.CreateCuDNN();
403 m_hFwdBottomDesc = m_cuda.CreateTensorDesc();
404 m_hFwdTopDesc = m_cuda.CreateTensorDesc();
405 m_hFwdScaleBiasMeanVarDesc = m_cuda.CreateTensorDesc();
406 m_hBwdBottomDesc = m_cuda.CreateTensorDesc();
407 m_hBwdTopDesc = m_cuda.CreateTensorDesc();
408 m_hBwdScaleBiasMeanVarDesc = m_cuda.CreateTensorDesc();
409 m_mode = BATCHNORM_MODE.SPATIAL;
410 m_dfEps = Math.Min(m_dfEps, CUDNN_BN_MIN_EPSILON);
411
412 m_blobMean.Reshape(rgShape);
413 m_blobVariance.Reshape(rgShape);
414
415 if (colBottom[0] == colTop[0]) // CuDNN BN does not support in-place.
416 {
417 m_blobPrivateTop.ReshapeLike(colTop[0]);
418 m_blobPrivateBottom.ReshapeLike(colBottom[0]);
419 }
420 }
421
427 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
428 {
430 {
431 m_nChannels = colBottom[0].channels;
432 }
433 else
434 {
435 if (!reshapeNeeded(colBottom, colTop))
436 return;
437 }
438
439 if (colBottom[0].num_axes >= 1)
440 m_log.CHECK_EQ(colBottom[0].shape(1), m_nChannels, "The colBottom[0].shape(1) should equal the channel count '" + m_nChannels.ToString() + "'.");
441
442 colTop[0].ReshapeLike(colBottom[0]);
443
444 List<int> rgSize = new List<int>();
445 rgSize.Add(m_nChannels);
446
447 m_blobMean.Reshape(rgSize);
448 m_blobVariance.Reshape(rgSize);
449
451 {
452 m_blobTemp.ReshapeLike(colBottom[0]);
453 m_blobXNorm.ReshapeLike(colBottom[0]);
454
455 rgSize[0] = colBottom[0].shape(0);
456 m_blobBatchSumMultiplier.Reshape(rgSize);
457
458 int nSpatialDim = colBottom[0].count() / (m_nChannels * colBottom[0].shape(0));
459 if (m_blobSpaitalSumMultiplier.num_axes == 0 ||
460 m_blobSpaitalSumMultiplier.shape(0) != nSpatialDim)
461 {
462 rgSize[0] = nSpatialDim;
463 m_blobSpaitalSumMultiplier.Reshape(rgSize);
464 m_blobSpaitalSumMultiplier.SetData(1);
465 }
466
467 int nNumByChans = m_nChannels * colBottom[0].shape(0);
468 if (m_blobNumByChans.num_axes == 0 ||
469 m_blobNumByChans.shape(0) != nNumByChans)
470 {
471 rgSize[0] = nNumByChans;
472 m_blobNumByChans.Reshape(rgSize);
473 m_blobBatchSumMultiplier.SetData(1);
474 }
475
476 return;
477 }
478
479 //-----------------------------------
480 // Handle cuDNN setup
481 //-----------------------------------
482 int N = colBottom[0].num;
483 int C = colBottom[0].channels;
484 int H = colBottom[0].height;
485 int W = colBottom[0].width;
486
487 // Setup the main tensors.
488 m_cuda.SetTensorDesc(m_hFwdBottomDesc, N, C, H, W);
489 m_cuda.SetTensorDesc(m_hFwdTopDesc, N, C, H, W);
490 m_cuda.SetTensorDesc(m_hBwdBottomDesc, N, C, H, W);
491 m_cuda.SetTensorDesc(m_hBwdTopDesc, N, C, H, W);
492
493 // Setup auxilary tensors for caching mean and inVar for forward and backard pass.
494 m_blobMean.Reshape(1, C, 1, 1);
495 m_blobVariance.Reshape(1, C, 1, 1);
496
498 {
499 if (m_blobScaleOnes.channels != C) // scale
500 {
501 m_blobScaleOnes.Reshape(1, C, 1, 1);
502 m_blobScaleOnes.SetData(1.0);
503 }
504
505 if (m_blobBiasZeros.channels != C) // bias
506 {
507 m_blobBiasZeros.Reshape(1, C, 1, 1);
508 m_blobBiasZeros.SetData(0.0);
509 }
510 }
511
512 m_cuda.DeriveBatchNormDesc(m_hFwdScaleBiasMeanVarDesc, m_hFwdBottomDesc, m_hBwdScaleBiasMeanVarDesc, m_hBwdBottomDesc, m_mode);
513
514 if (colTop[0] == colBottom[0])
515 {
516 m_blobPrivateTop.ReshapeLike(colTop[0]);
517 m_blobPrivateBottom.ReshapeLike(colBottom[0]);
518 }
519 }
520
524 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
525 {
527 forward_cudnn(colBottom, colTop);
528 else
529 forward_cuda(colBottom, colTop);
530 }
531
535 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
536 {
538 backward_cudnn(colTop, rgbPropagateDown, colBottom);
539 else
540 backward_cuda(colTop, rgbPropagateDown, colBottom);
541 }
542
546 protected void forward_cuda(BlobCollection<T> colBottom, BlobCollection<T> colTop)
547 {
548 long hBottomData = colBottom[0].gpu_data;
549 long hTopData = colTop[0].mutable_gpu_data;
550 int nNum = colBottom[0].shape(0);
551 int nSpatialDim = colBottom[0].count() / (m_nChannels * colBottom[0].shape(0));
552
553 if (colBottom[0] != colTop[0])
554 m_cuda.copy(colBottom[0].count(), hBottomData, hTopData);
555
556 if (m_bUseGlobalStats)
557 {
558 // use the stored mean/variance estimates
559 double dfScaleFactor = convertD(m_colBlobs[2].GetData(0));
560
561 if (dfScaleFactor != 0)
562 dfScaleFactor = 1.0 / dfScaleFactor;
563
564 int nCount = m_blobVariance.count();
565
566 m_cuda.scale(nCount, dfScaleFactor, m_colBlobs[0].gpu_data, m_blobMean.mutable_gpu_data);
567 m_cuda.scale(nCount, dfScaleFactor, m_colBlobs[1].gpu_data, m_blobVariance.mutable_gpu_data);
568 }
569 else
570 {
571 // compute mean
572 m_cuda.gemv(false, m_nChannels * nNum, nSpatialDim, 1.0 / (nNum * nSpatialDim), hBottomData, m_blobSpaitalSumMultiplier.gpu_data, 0.0, m_blobNumByChans.mutable_gpu_data);
573 m_cuda.gemv(true, nNum, m_nChannels, 1.0, m_blobNumByChans.gpu_data, m_blobBatchSumMultiplier.gpu_data, 0.0, m_blobMean.mutable_gpu_data);
574 }
575
576 // subtract mean
577 m_cuda.gemm(false, false, nNum, m_nChannels, 1, 1.0, m_blobBatchSumMultiplier.gpu_data, m_blobMean.gpu_data, 0.0, m_blobNumByChans.mutable_gpu_data);
578 m_cuda.gemm(false, false, m_nChannels * nNum, nSpatialDim, 1, -1.0, m_blobNumByChans.gpu_data, m_blobSpaitalSumMultiplier.gpu_data, 1.0, hTopData);
579
580 if (!m_bUseGlobalStats)
581 {
582 // compute variance using var(x) = E((X-EX)^2)
583 m_cuda.mul(colTop[0].count(), hTopData, hTopData, m_blobTemp.mutable_gpu_data); // (X-EX)^2
584 m_cuda.gemv(false, m_nChannels * nNum, nSpatialDim, 1.0 / (nNum * nSpatialDim), m_blobTemp.gpu_data, m_blobSpaitalSumMultiplier.gpu_data, 0.0, m_blobNumByChans.mutable_gpu_data);
585 m_cuda.gemv(true, nNum, m_nChannels, 1.0, m_blobNumByChans.gpu_data, m_blobSpaitalSumMultiplier.gpu_data, 0.0, m_blobVariance.mutable_gpu_data); // E((X-EX)^2)
586
587 // compute and save moving average
588 double dfVal = convertD(m_colBlobs[2].GetData(0));
589 dfVal *= m_dfMovingAverageFraction;
590 dfVal += 1.0;
591 m_colBlobs[2].SetData(dfVal, 0);
592
593 m_cuda.axpby(m_blobMean.count(), 1.0, m_blobMean.gpu_data, m_dfMovingAverageFraction, m_colBlobs[0].mutable_gpu_data);
594 int nM = colBottom[0].count() / m_nChannels;
595 double dfBiasCorrectionFactor = (nM > 1) ? ((double)nM / (double)(nM - 1)) : 1.0;
596 m_cuda.axpby(m_blobVariance.count(), dfBiasCorrectionFactor, m_blobVariance.gpu_data, m_dfMovingAverageFraction, m_colBlobs[1].mutable_gpu_data);
597 }
598
599 // normalize variance
600 m_cuda.add_scalar(m_blobVariance.count(), m_dfEps, m_blobVariance.mutable_gpu_data);
601 m_cuda.sqrt(m_blobVariance.count(), m_blobVariance.gpu_data, m_blobVariance.mutable_gpu_data);
602
603 // replicate variance to input size
604 m_cuda.gemm(false, false, nNum, m_nChannels, 1, 1.0, m_blobBatchSumMultiplier.gpu_data, m_blobVariance.gpu_data, 0.0, m_blobNumByChans.mutable_gpu_data);
605 m_cuda.gemm(false, false, m_nChannels * nNum, nSpatialDim, 1, 1.0, m_blobNumByChans.gpu_data, m_blobSpaitalSumMultiplier.gpu_data, 0.0, m_blobTemp.mutable_gpu_data);
606 m_cuda.div(m_blobTemp.count(), hTopData, m_blobTemp.gpu_data, hTopData);
607 // The caching is only needed because later in-place layers
608 // might clobber the data. Can we skip this if they won't?
609 m_cuda.copy(m_blobXNorm.count(), hTopData, m_blobXNorm.mutable_gpu_data);
610 }
611
615 protected void backward_cuda(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
616 {
617 long hTopDiff = 0;
618
619 if (colBottom[0] != colTop[0])
620 {
621 hTopDiff = colTop[0].gpu_diff;
622 }
623 else
624 {
625 m_cuda.copy(m_blobXNorm.count(), colTop[0].gpu_diff, m_blobXNorm.mutable_gpu_diff);
626 hTopDiff = m_blobXNorm.gpu_diff;
627 }
628
629 long hBottomDiff = colBottom[0].mutable_gpu_diff;
630 if (m_bUseGlobalStats)
631 {
632 m_cuda.div(m_blobTemp.count(), hTopDiff, m_blobTemp.gpu_data, hBottomDiff);
633 return;
634 }
635
636 long hTopData = m_blobXNorm.gpu_data;
637 int nNum = colBottom[0].shape()[0];
638 int nSpatialDim = colBottom[0].count() / (m_nChannels * colBottom[0].shape(0));
639 // if Y = (X-mean(X))/(sqrt(var(X)+eps)), then
640 //
641 // dE(y)/dX =
642 // (dE/dY - mean(dE/dY) - mean(dE/dY \cdot Y) \cdot Y)
643 // ./ sqrt(var(X) + eps)
644 //
645 // where \cdot and ./ are hadamard product and elementwise division,
646 // respectively, dE/dY is the top diff, and mean/var/sum are all computed
647 // along all dimensions except the channels dimension. In the above
648 // equation, the operations allow for expansion (i.e. broadcast) along all
649 // dimensions except the channels dimension where required.
650
651 // sum(dE/dY \cdot Y)
652 m_cuda.mul(m_blobTemp.count(), hTopData, hTopDiff, hBottomDiff);
653 m_cuda.gemv(false, m_nChannels * nNum, nSpatialDim, 1.0, hBottomDiff, m_blobSpaitalSumMultiplier.gpu_data, 0.0, m_blobNumByChans.mutable_gpu_data);
654 m_cuda.gemv(true, nNum, m_nChannels, 1.0, m_blobNumByChans.gpu_data, m_blobBatchSumMultiplier.gpu_data, 0.0, m_blobMean.mutable_gpu_data);
655
656 // reshape (broadcast) the above
657 m_cuda.gemm(false, false, nNum, m_nChannels, 1, 1.0, m_blobBatchSumMultiplier.gpu_data, m_blobMean.gpu_data, 0.0, m_blobNumByChans.mutable_gpu_data);
658 m_cuda.gemm(false, false, m_nChannels * nNum, nSpatialDim, 1, 1.0, m_blobNumByChans.gpu_data, m_blobSpaitalSumMultiplier.gpu_data, 0.0, hBottomDiff);
659
660 // sum(dE/dY \cdot Y) \cdot Y
661 m_cuda.mul(m_blobTemp.count(), hTopData, hBottomDiff, hBottomDiff);
662
663 // sum(dE/dY)-sum(dE/dY \cdot Y) \cdot Y
664 m_cuda.gemv(false, m_nChannels * nNum, nSpatialDim, 1.0, hTopDiff, m_blobSpaitalSumMultiplier.gpu_data, 0.0, m_blobNumByChans.mutable_gpu_data);
665 m_cuda.gemv(true, nNum, m_nChannels, 1.0, m_blobNumByChans.gpu_data, m_blobBatchSumMultiplier.gpu_data, 0.0, m_blobMean.mutable_gpu_data);
666
667 // reshape (broadcast) the above to make
668 // sum(dE/dY)-sum(dE/dY \cdot Y) \cdot Y
669 m_cuda.gemm(false, false, nNum, m_nChannels, 1, 1.0, m_blobBatchSumMultiplier.gpu_data, m_blobMean.gpu_data, 0.0, m_blobNumByChans.mutable_gpu_data);
670 m_cuda.gemm(false, false, nNum * m_nChannels, nSpatialDim, 1, 1.0, m_blobNumByChans.gpu_data, m_blobSpaitalSumMultiplier.gpu_data, 1.0, hBottomDiff);
671
672 // dE/dY - mean(dE/dY)-mean(dE/dY \cdot Y) \cdot Y
673 m_cuda.axpby(m_blobTemp.count(), 1.0, hTopDiff, -1.0 / (double)(nNum * nSpatialDim), hBottomDiff);
674
675 // Note: blobTemp still contains sqrt(var(X) + eps), computed during the forward
676 // pass.
677 m_cuda.div(m_blobTemp.count(), hBottomDiff, m_blobTemp.gpu_data, hBottomDiff);
678 }
679
683 protected void forward_cudnn(BlobCollection<T> colBottom, BlobCollection<T> colTop)
684 {
685 long hBottomData = colBottom[0].gpu_data;
686 long hTopData = colTop[0].mutable_gpu_data;
687
688 if (colTop[0] == colBottom[0])
689 hTopData = m_blobPrivateTop.mutable_gpu_data;
690
691 double dfEps = m_dfEps;
692 long hGlobalMean = m_colBlobs[0].gpu_data;
693 long hGlobalVar = m_colBlobs[1].gpu_data;
694 long hScaleData = (m_bScaleBias) ? m_colBlobs[3].gpu_data : m_blobScaleOnes.gpu_data;
695 long hBiasData = (m_bScaleBias) ? m_colBlobs[4].gpu_data : m_blobBiasZeros.gpu_data;
696
697 if (!m_bUseGlobalStats)
698 {
699 long hSaveMean = m_blobMean.mutable_gpu_data;
700 long hSaveVar = m_blobVariance.mutable_gpu_data;
701
702 hGlobalMean = m_colBlobs[0].mutable_gpu_data;
703 hGlobalVar = m_colBlobs[1].mutable_gpu_data;
704
705 double dfFactor = 1.0;
706
707 if (m_nIteration > 0)
708 dfFactor = 1 - m_dfMovingAverageFraction;
709
710 m_cuda.BatchNormForward(m_hCuDnn, m_mode, m_tOne, m_tZero,
711 m_hFwdBottomDesc, hBottomData,
712 m_hFwdTopDesc, hTopData,
713 m_hFwdScaleBiasMeanVarDesc, hScaleData, hBiasData,
714 dfFactor, hGlobalMean, hGlobalVar, dfEps, hSaveMean, hSaveVar, true);
715 m_nIteration++;
716 }
717 else
718 {
719 m_cuda.BatchNormForward(m_hCuDnn, BATCHNORM_MODE.SPATIAL, m_tOne, m_tZero,
720 m_hFwdBottomDesc, hBottomData,
721 m_hFwdTopDesc, hTopData,
722 m_hFwdScaleBiasMeanVarDesc, hScaleData, hBiasData,
723 1.0, hGlobalMean, hGlobalVar, dfEps, 0, 0, false);
724 }
725
726 if (colTop[0] == colBottom[0])
727 {
728 m_blobPrivateBottom.CopyFrom(colBottom[0]);
729 colTop[0].CopyFrom(m_blobPrivateTop);
730 }
731 }
732
736 protected void backward_cudnn(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
737 {
738 long hTopDiff = colTop[0].gpu_diff;
739 long hBottomData = colBottom[0].gpu_data;
740 long hBottomDiff = colBottom[0].mutable_gpu_diff;
741 double dfEps = m_dfEps;
742 long hMean = (m_bUseGlobalStats) ? 0 : m_blobMean.gpu_data;
743 long hVariance = (m_bUseGlobalStats) ? 0 : m_blobVariance.gpu_data;
744 long hScaleData = (m_bScaleBias) ? m_colBlobs[3].gpu_data : m_blobScaleOnes.gpu_data;
745 long hScaleDiff = (m_bScaleBias) ? m_colBlobs[3].mutable_gpu_diff : m_blobScaleOnes.mutable_gpu_diff;
746 long hBiasDiff = (m_bScaleBias) ? m_colBlobs[4].mutable_gpu_diff : m_blobBiasZeros.mutable_gpu_diff;
747
748 if (colTop[0] == colBottom[0])
749 {
750 // copy diff from top to private top.
751 m_blobPrivateTop.CopyFrom(colTop[0], true);
752 hTopDiff = m_blobPrivateTop.gpu_diff;
753 hBottomData = m_blobPrivateBottom.gpu_data;
754 }
755
756 m_cuda.BatchNormBackward(m_hCuDnn, m_mode, m_tOne, m_tZero, m_tOne, m_tOne,
757 m_hBwdBottomDesc, hBottomData,
758 m_hBwdBottomDesc, hTopDiff,
759 m_hBwdBottomDesc, hBottomDiff,
760 m_hBwdScaleBiasMeanVarDesc, hScaleData, hScaleDiff, hBiasDiff,
761 dfEps, hMean, hVariance);
762 }
763 }
764}
The Log class provides general output in text form.
Definition: Log.cs:13
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void ReshapeLike(BlobCollection< T > src)
Reshapes all blobs in the collection to the sizes of the source.
void CopyFrom(BlobCollection< T > bSrc, bool bCopyDiff=false)
Copy the data or diff from another BlobCollection into this one.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
Definition: Blob.cs:800
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
Definition: Blob.cs:1922
Blob(CudaDnn< T > cuda, Log log, bool bIncludeDiff=true, bool bUseHalfSize=false)
The Blob constructor.
Definition: Blob.cs:64
int num_axes
Returns the number of axes in the Blob.
Definition: Blob.cs:705
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
Definition: Blob.cs:903
List< int > shape()
Returns an array where each element contains the shape of an axis of the Blob.
Definition: Blob.cs:684
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1541
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
Definition: Blob.cs:402
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
Abstract Filler class used to fill blobs with values.
Definition: Filler.cs:19
void Fill(Blob< T > b)
Fill the blob with values based on the actual filler used.
Definition: Filler.cs:50
static Filler< T > Create(CudaDnn< T > cuda, Log log, FillerParameter p)
Create a new Filler instance.
Definition: Filler.cs:79
The BatchNormLayer normalizes the input to have 0-mean and/or unit (1) variance across the batch....
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
override int ExactNumBottomBlobs
Returns the exact number of bottom (input) Blobs required: input
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Perform the backward computation.
override int ExactNumTopBlobs
Returns the exact number of top (output) Blobs required: batchnorm
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
override void dispose()
Releases all GPU and host resources used by the Layer.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Perform the forward compuation.
void forward_cuda(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Perform the forward compuation using the native Cuda version.
BatchNormLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
Constructor.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override bool ReInitializeParameters(WEIGHT_TARGET target)
Re-initialize the parameters of the layer.
void backward_cudnn(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Perform the backward computation using cuDNN.
void backward_cuda(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Perform the backward computation using the native Cuda version.
void forward_cudnn(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Perform the forward compuation using cuDNN.
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
T m_tZero
Specifies a generic type equal to 0.0.
Definition: Layer.cs:76
T m_tOne
Specifies a generic type equal to 1.0.
Definition: Layer.cs:72
bool m_bUseHalfSize
Specifies that the half size of the top (if any) should be converted to the base size.
Definition: Layer.cs:84
double convertD(T df)
Converts a generic to a double value.
Definition: Layer.cs:1349
virtual bool reshapeNeeded(BlobCollection< T > colBottom, BlobCollection< T > colTop, bool bReset=true)
Tests the shapes of both the bottom and top blobs and if they are the same as the previous sizing,...
Definition: Layer.cs:622
Phase m_phase
Specifies the Phase under which the Layer is run.
Definition: Layer.cs:51
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
BlobCollection< T > m_colBlobs
Specifies the learnable parameter Blobs of the Layer.
Definition: Layer.cs:55
bool m_bNetReshapeRequest
Specifies whether the reshape is requested from a Net.Reshape call or not.
Definition: Layer.cs:104
FillerParameter bias_filler
Specifies the bias filler used to file the bias value. If null, a constant(0) filler is used.
bool scale_bias
Specifies to use the scale and bias terms, otherwise the scale = 1 and bias = 0 are used to form an i...
double eps
Specifies a small value to add to the variance estimate so that we don't divide by zero.
double moving_average_fraction
Specifies how much the moving average decays each iteration. Smaller values make the moving average d...
FillerParameter scale_filler
Specifies the scale filler used to fill the scale value. If null, a constant(1) filler is used.
bool useCudnn()
Queries whether or not to use NVIDIA's cuDnn.
bool? use_global_stats
If false, normalization is performed over the current mini-batch and global statistics are accumulate...
Specifies the filler parameters used to create each Filler.
Specifies the base parameter for all layers.
List< ParamSpec > parameters
Specifies the ParamSpec parameters of the LayerParameter.
string name
Specifies the name of this LayerParameter.
bool use_halfsize
Specifies whether or not to use half sized memory or not.
BatchNormParameter batch_norm_param
Returns the parameter set when initialized with LayerType.BATCHNORM
LayerType
Specifies the layer type.
Specifies training parameters (multipliers on global learning constants, and the name of other settin...
Definition: ParamSpec.cs:19
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
Phase
Defines the Phase under which to run a Net.
Definition: Interfaces.cs:61
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
BLOB_TYPE
Defines the tpe of data held by a given Blob.
Definition: Interfaces.cs:62
BATCHNORM_MODE
Specifies the cuDnn batch norm mode to use.
Definition: CudaDnn.cs:237
WEIGHT_TARGET
Defines the type of weight to target in re-initializations.
Definition: Interfaces.cs:38
The MyCaffe.fillers namespace contains all fillers including the Filler class.
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12