MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
BaseConvolutionLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using MyCaffe.basecode;
6using MyCaffe.param;
7using MyCaffe.fillers;
8using MyCaffe.common;
9
10namespace MyCaffe.layers
11{
17 public abstract class BaseConvolutionLayer<T> : Layer<T>
18 {
30 protected Blob<T> m_blobPad;
42 protected List<int> m_rgColBufferShape;
46 protected List<int> m_rgOutputShape = new List<int>();
50 protected List<int> m_rgBottomShape = new List<int>();
54 protected int m_nNumSpatialAxes;
58 protected int m_nBottomDim;
62 protected int m_nTopDim;
66 protected int m_nChannelAxis;
70 protected int m_nNum;
74 protected int m_nChannels;
78 protected int m_nGroup;
82 protected int m_nOutSpatialDim;
86 protected int m_nWeightOffset;
90 protected int m_nNumOutput;
94 protected bool m_bBiasTerm;
98 protected bool m_bIs1x1;
102 protected bool m_bForceNDim2col;
103
104 int m_nNumKernelsIm2col;
105 int m_nNumKernelsCol2im;
106 int m_nConvOutChannels;
107 int m_nConvInChannels;
108 int m_nConvOutSpatialDim;
109 int m_nKernelDim;
110 int m_nColOffset;
111 int m_nOutputOffset;
112
113 Blob<T> m_blobColBuffer;
114 Blob<T> m_blobBiasMultiplier;
115
116 long m_hWorkspaceData = 0;
117 ulong m_lWorkspaceSize = 0;
118 bool m_bWorkspaceOwner = false;
119
120
128 : base(cuda, log, p)
129 {
130 m_blobKernelShape = new Blob<T>(cuda, log);
131 m_blobKernelShape.Name = m_param.name + " kernel_shape";
132
133 m_blobStride = new Blob<T>(cuda, log);
134 m_blobStride.Name = m_param.name + " stride";
135
136 m_blobPad = new Blob<T>(cuda, log);
137 m_blobPad.Name = m_param.name + " pad";
138
139 m_blobDilation = new Blob<T>(cuda, log);
140 m_blobDilation.Name = m_param.name + " dilation";
141
142 m_blobConvInputShape = new Blob<T>(cuda, log);
143 m_blobConvInputShape.Name = m_param.name + " conv_input_shape";
144
145 m_blobColBuffer = new Blob<T>(cuda, log);
146 m_blobColBuffer.Name = m_param.name + " conv_col_buffer";
147
148 m_blobBiasMultiplier = new Blob<T>(cuda, log);
149 m_blobBiasMultiplier.Name = m_param.name + "conv_bias_mult";
150 }
151
153 protected override void dispose()
154 {
155 m_blobKernelShape.Dispose();
156 m_blobStride.Dispose();
157 m_blobPad.Dispose();
158 m_blobDilation.Dispose();
159 m_blobConvInputShape.Dispose();
160
161 m_blobColBuffer.Dispose();
162 m_blobBiasMultiplier.Dispose();
163
164 if (m_bWorkspaceOwner && m_hWorkspaceData != 0)
165 {
166 m_cuda.DisableGhostMemory();
167 m_cuda.FreeMemory(m_hWorkspaceData);
168 m_cuda.ResetGhostMemory();
169 m_hWorkspaceData = 0;
170 m_bWorkspaceOwner = false;
171 }
172
173 base.dispose();
174 }
175
187 protected ulong getWorkspaceLimitInBytes(bool bUseTensorCores = false)
188 {
189 // Specify workspace limit for kernels directly until we have a
190 // planning strategy and a rewrite of Caffe's GPU memory management.
191 // default = 1024 * 1024 * 16;
192 ulong lWorkspaceLimitBytes = ulong.MaxValue;
194 lWorkspaceLimitBytes = (ulong)m_param.convolution_param.cudnn_workspace_limit;
195
196 if (lWorkspaceLimitBytes != ulong.MaxValue)
197 lWorkspaceLimitBytes *= 16;
198
199 // When using Half Size memory, let CUDA pick the fastest workspace size and algorithm.
200 if (m_bUseHalfSize || bUseTensorCores)
201 lWorkspaceLimitBytes = ulong.MaxValue;
202
203 // BUG Work Around
204 // With cuDNN 7.0.5 and above we are seeing memory overwrite errors (from CUDA)
205 // when using more than 1 group and the workspace.
206 // * also confirmed in cuDNN 7.1.4 and CUDA 9.2 on driver 397.64, 398.36
208 lWorkspaceLimitBytes = 0; // sets option to NO_WORKSPACE for Bwd Filter and Data
209
210 return lWorkspaceLimitBytes;
211 }
212
214 protected override void setup_internal_blobs(BlobCollection<T> col)
215 {
216 if (col.Count > 0)
217 return;
218
220 {
221 col.Add(m_blobColBuffer);
222 col.Add(m_blobBiasMultiplier);
223 }
224 }
225
230 protected override WorkspaceArgs getWorkspace()
231 {
232 WorkspaceArgs args = base.getWorkspace();
233
234 if (args != null)
235 return args;
236
237 m_bWorkspaceOwner = true;
238 return new common.WorkspaceArgs(m_hWorkspaceData, m_lWorkspaceSize);
239 }
240
246 protected override bool setWorkspace(ulong lSizeInBytes)
247 {
248 if (!m_bWorkspaceOwner && base.setWorkspace(lSizeInBytes))
249 return true;
250
251 m_bWorkspaceOwner = true;
252
253 if (lSizeInBytes < m_lWorkspaceSize)
254 return true;
255
256 m_lWorkspaceSize = lSizeInBytes;
257 m_cuda.DisableGhostMemory();
258
259 if (m_hWorkspaceData != 0)
260 m_cuda.FreeMemory(m_hWorkspaceData);
261
262 if (m_lWorkspaceSize > 0)
263 m_hWorkspaceData = m_cuda.AllocMemory((long)m_lWorkspaceSize);
264
265 m_cuda.ResetGhostMemory();
266
267 return true;
268 }
269
275 public override bool ReInitializeParameters(WEIGHT_TARGET target)
276 {
277 base.ReInitializeParameters(target);
278
279 if (target == WEIGHT_TARGET.WEIGHTS || target == WEIGHT_TARGET.BOTH)
280 {
282 filler.Fill(m_colBlobs[0]);
283 }
284
285 if (m_param.convolution_param.bias_term && m_colBlobs.Count > 1 && (target == WEIGHT_TARGET.BOTH || target == WEIGHT_TARGET.BIAS))
286 {
288 fillerBias.Fill(m_colBlobs[1]);
289 }
290
291 return true;
292 }
293
299 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
300 {
301 if (!reshapeNeeded(colBottom, colTop))
302 return;
303
304 // Configure the kernel size, padding, stride and inputs.
306
308 m_nChannelAxis = colBottom[0].CanonicalAxisIndex(p.axis);
309
310 int nFirstSpatialAxis = m_nChannelAxis + 1;
311 int nNumAxes = colBottom[0].num_axes;
312
313 m_nNumSpatialAxes = nNumAxes - nFirstSpatialAxis;
314
315 m_log.CHECK_GE(m_nNumSpatialAxes, 0, "The number of spatial axes must be zero or greater.");
316
317 List<int> rgBottomDimBlobShape = new List<int>() { m_nNumSpatialAxes + 1 };
318 List<int> rgSpaitalDimBlobShape = new List<int>() { Math.Max(m_nNumSpatialAxes, 1) };
319
320 // Setup filter kernel dimensions (blobKernelShape)
321 m_blobKernelShape.Reshape(rgSpaitalDimBlobShape);
322 T[] rgKernelShape = m_blobKernelShape.mutable_cpu_data;
323
324 if (p.kernel_h.HasValue || p.kernel_w.HasValue)
325 {
326 m_log.CHECK_EQ(m_nNumSpatialAxes, 2, "kernel_h & kernel_w can only be used in 2D convolution.");
327 m_log.CHECK_EQ(0, p.kernel_size.Count, "Either kernel_size or kernel_h/w should be specified; not both.");
328 rgKernelShape[0] = (T)Convert.ChangeType(p.kernel_h.Value, typeof(T));
329 rgKernelShape[1] = (T)Convert.ChangeType(p.kernel_w.Value, typeof(T));
330 }
331 else
332 {
333 int nNumKernelDims = p.kernel_size.Count;
334 m_log.CHECK(nNumKernelDims == 1 || nNumKernelDims == m_nNumSpatialAxes, "Kernel size must be specified once, or once per spatial dimension (kernel_size specified " + nNumKernelDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);");
335
336 for (int i = 0; i < m_nNumSpatialAxes; i++)
337 {
338 int nIdx = (nNumKernelDims == 1) ? 0 : i;
339 rgKernelShape[i] = (T)Convert.ChangeType(p.kernel_size[nIdx], typeof(T));
340 }
341 }
342
343 for (int i = 0; i < m_nNumSpatialAxes; i++)
344 {
345 m_log.CHECK_GT((int)Convert.ChangeType(rgKernelShape[i], typeof(int)), 0, "Filter dimension must be non-zero.");
346 }
347
348 m_blobKernelShape.mutable_cpu_data = rgKernelShape;
349
350
351 // Setup stride dimensions (blobStride)
352 m_blobStride.Reshape(rgSpaitalDimBlobShape);
353 T[] rgStrideData = m_blobStride.mutable_cpu_data;
354
355 if (p.stride_h.HasValue || p.stride_w.HasValue)
356 {
357 m_log.CHECK_EQ(m_nNumSpatialAxes, 2, "stride_h & stride_w can only be used in 2D convolution.");
358 m_log.CHECK_EQ(0, p.stride.Count, "Either stride_size or stride_h/w should be specified; not both.");
359 rgStrideData[0] = (T)Convert.ChangeType(p.stride_h.Value, typeof(T));
360 rgStrideData[1] = (T)Convert.ChangeType(p.stride_w.Value, typeof(T));
361 }
362 else
363 {
364 int nNumStrideDims = p.stride.Count;
365 m_log.CHECK(nNumStrideDims == 0 || nNumStrideDims == 1 || nNumStrideDims == m_nNumSpatialAxes, "Stride size must be specified once, or once per spatial dimension (stride specified " + nNumStrideDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);");
366 int nDefaultStride = 1;
367
368 for (int i = 0; i < m_nNumSpatialAxes; i++)
369 {
370 if (nNumStrideDims == 0)
371 {
372 rgStrideData[i] = (T)Convert.ChangeType(nDefaultStride, typeof(T));
373 }
374 else
375 {
376 int nIdx = (nNumStrideDims == 1) ? 0 : i;
377 rgStrideData[i] = (T)Convert.ChangeType(p.stride[nIdx], typeof(T));
378 }
379 m_log.CHECK_GT((int)Convert.ChangeType(rgStrideData[i], typeof(int)), 0, "Stride dimension must be non-zero.");
380 }
381 }
382
383 m_blobStride.mutable_cpu_data = rgStrideData;
384
385
386 // Setup pad dimensions (blobPad)
387 m_blobPad.Reshape(rgSpaitalDimBlobShape);
388 T[] rgPadData = m_blobPad.mutable_cpu_data;
389
390 if (p.pad_h.HasValue || p.pad_w.HasValue)
391 {
392 m_log.CHECK_EQ(m_nNumSpatialAxes, 2, "pad_h & pad_w can only be used in 2D convolution.");
393 m_log.CHECK_EQ(0, p.pad.Count, "Either pad_size or pad_h/w should be specified; not both.");
394 rgPadData[0] = (T)Convert.ChangeType(p.pad_h.Value, typeof(T));
395 rgPadData[1] = (T)Convert.ChangeType(p.pad_w.Value, typeof(T));
396 }
397 else
398 {
399 int nNumPadDims = p.pad.Count;
400 m_log.CHECK(nNumPadDims == 0 || nNumPadDims == 1 || nNumPadDims == m_nNumSpatialAxes, "Pad size must be specified once, or once per spatial dimension (pad specified " + nNumPadDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);");
401 int nDefaultPad = 0;
402
403 for (int i = 0; i < m_nNumSpatialAxes; i++)
404 {
405 if (nNumPadDims == 0)
406 {
407 rgPadData[i] = (T)Convert.ChangeType(nDefaultPad, typeof(T));
408 }
409 else
410 {
411 int nIdx = (nNumPadDims == 1) ? 0 : i;
412 rgPadData[i] = (T)Convert.ChangeType(p.pad[nIdx], typeof(T));
413 }
414 }
415 }
416
417 m_blobPad.mutable_cpu_data = rgPadData;
418
419
420 // Setup dilation dimensions (blobDilation)
421 m_blobDilation.Reshape(rgSpaitalDimBlobShape);
422 T[] rgDilationData = m_blobDilation.mutable_cpu_data;
423 int nNumDilationDims = p.dilation.Count;
424
425 m_log.CHECK(nNumDilationDims == 0 || nNumDilationDims == 1 || nNumDilationDims == m_nNumSpatialAxes, "Dilation size must be specified once, or once per spatial dimension (dilation specified " + nNumDilationDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);");
426 int nDefaultDilation = 1;
427
428 for (int i = 0; i < m_nNumSpatialAxes; i++)
429 {
430 if (nNumDilationDims == 0)
431 {
432 rgDilationData[i] = (T)Convert.ChangeType(nDefaultDilation, typeof(T));
433 }
434 else
435 {
436 int nIdx = (nNumDilationDims == 1) ? 0 : i;
437 rgDilationData[i] = (T)Convert.ChangeType(p.dilation[nIdx], typeof(T));
438 }
439 }
440
441 m_blobDilation.mutable_cpu_data = rgDilationData;
442
443
444 // Special case: im2col is the identity for 1x1 convolution with stride 1
445 // add no padding, so flag for skipping the buffer and transformation.
446 m_bIs1x1 = true;
447
448 for (int i = 0; i < m_nNumSpatialAxes; i++)
449 {
450 if (!(val_at(rgKernelShape, i) == 1 &&
451 val_at(rgStrideData, i) == 1 &&
452 val_at(rgPadData, i) == 0))
453 {
454 m_bIs1x1 = false;
455 break;
456 }
457 }
458
459 // Configure output channels and groups.
460 m_nChannels = colBottom[0].shape(m_nChannelAxis);
461 m_nNumOutput = (int)p.num_output;
462 m_log.CHECK_GT(m_nNumOutput, 0, "Output count must be greater than zero.");
463
464 m_nGroup = (int)p.group;
465 m_log.CHECK_EQ(m_nChannels % m_nGroup, 0, "The channels must span evenly across the groups.");
466 m_log.CHECK_EQ(m_nNumOutput % m_nGroup, 0, "The number of output should be a in multiples of group.");
467
468 if (reverse_dimensions())
469 {
470 m_nConvOutChannels = m_nChannels;
471 m_nConvInChannels = m_nNumOutput;
472 }
473 else
474 {
475 m_nConvOutChannels = m_nNumOutput;
476 m_nConvInChannels = m_nChannels;
477 }
478
479 // Handle the parameters: weights and biases
480 // - blobs[0] holds the filter weights.
481 // - blobs[1] holds the biases (optional)
482
483 List<int> rgWeightShape = new List<int>();
484 rgWeightShape.Add(m_nConvOutChannels);
485 rgWeightShape.Add(m_nConvInChannels / m_nGroup);
486
487 for (int i = 0; i < m_nNumSpatialAxes; i++)
488 {
489 rgWeightShape.Add(val_at(rgKernelShape, i));
490 }
491
493
494 List<int> rgBiasShape = new List<int>() { m_nNumOutput };
495
496 // Setup the convert to half flags used by the Layer just before calling forward and backward.
499
500 if (m_colBlobs.Count > 0)
501 {
502 m_log.CHECK_EQ(1 + ((m_bBiasTerm) ? 1 : 0), m_colBlobs.Count, "Incorrect number of weight blobs.");
503
504 if (!Utility.Compare<int>(rgWeightShape, m_colBlobs[0].shape()))
505 {
506 Blob<T> b = new Blob<T>(m_cuda, m_log, rgWeightShape);
507 m_log.FAIL("Incorrect weight shape: expected shape " + b.shape_string + "; instead, shape was " + m_colBlobs[0].shape_string);
508 }
509
510 if (m_bBiasTerm && !Utility.Compare<int>(rgBiasShape, m_colBlobs[1].shape()))
511 {
512 Blob<T> b = new Blob<T>(m_cuda, m_log, rgBiasShape);
513 m_log.FAIL("Incorrect bias shape: expected shape " + b.shape_string + "; instead, shape was " + m_colBlobs[1].shape_string);
514 }
515
516 m_log.WriteLine("Skipping parameter initialization.");
517 }
518 else
519 {
520 m_colBlobs.Clear();
521
522 // Initialize and fill the weights:
523 // output channels x input channels per-group x kernel height x kernel width.
524 Blob<T> blobWts = new Blob<T>(m_cuda, m_log, true, m_bUseHalfSize);
525 blobWts.Name = m_param.name + " weights";
526 blobWts.type = BLOB_TYPE.WEIGHT;
527
528 if (m_bUseHalfSize || !shareParameter(blobWts, rgWeightShape))
529 {
530 blobWts.Reshape(rgWeightShape, m_bUseHalfSize);
532
533 Blob<T> blobWts1 = blobWts;
534
535 if (m_bUseHalfSize)
536 {
537 blobWts1 = new Blob<T>(m_cuda, m_log, false, false);
538 blobWts1.ReshapeLike(blobWts);
539 }
540
541 wtFiller.Fill(blobWts1);
542
543 if (m_bUseHalfSize)
544 {
545 blobWts.CopyFrom(blobWts1);
546 blobWts1.Dispose();
547 }
548 }
549
550 m_colBlobs.Add(blobWts);
551
552 // If necessary, initialize and fill the biases:
553 if (m_bBiasTerm)
554 {
555 Blob<T> blobBias = new Blob<T>(m_cuda, m_log, true, m_bUseHalfSize);
556 blobBias.Name = m_param.name + " bias";
557 blobBias.type = BLOB_TYPE.WEIGHT;
558
559 if (m_bUseHalfSize || !shareParameter(blobBias, rgBiasShape))
560 {
561 blobBias.Reshape(rgBiasShape, m_bUseHalfSize);
563
564 Blob<T> blobBias1 = blobBias;
565
566 if (m_bUseHalfSize)
567 {
568 blobBias1 = new Blob<T>(m_cuda, m_log, false, false);
569 blobBias1.ReshapeLike(blobBias);
570 }
571
572 biasFiller.Fill(blobBias1);
573
574 if (m_bUseHalfSize)
575 {
576 blobBias.CopyFrom(blobBias1);
577 blobBias1.Dispose();
578 }
579 }
580
581 m_colBlobs.Add(blobBias);
582 }
583 }
584
585 m_nKernelDim = m_colBlobs[0].count(1);
586 m_nWeightOffset = m_nConvOutChannels * m_nKernelDim / m_nGroup;
587
588 // Propagate gradients to the parameters (as directed by backward pass).
589 m_rgbParamPropagateDown = new DictionaryMap<bool>(m_colBlobs.Count, true);
590 }
591
597 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
598 {
599 if (!reshapeNeeded(colBottom, colTop))
600 return;
601
602 int nFirstSpatialAxis = m_nChannelAxis + 1;
603 m_log.CHECK_EQ(colBottom[0].num_axes, nFirstSpatialAxis + m_nNumSpatialAxes, "bottom num_axes may not change.");
604
605 m_nNum = colBottom[0].count(0, m_nChannelAxis);
606 m_log.CHECK_EQ(colBottom[0].shape(m_nChannelAxis), m_nChannels, "Input size incompatible with convolution kernel.");
607
608 // TODO: generalize to handle inputs of different shapes.
609 for (int i = 1; i < colBottom.Count; i++)
610 {
611 m_log.CHECK(Utility.Compare<int>(colBottom[0].shape(), colBottom[i].shape()), "Shape mismatch - bottom[0]: '" + colBottom[0].shape_string + "' vs. bottom[" + i.ToString() + "]: '" + colBottom[i].shape_string + "'");
612 }
613
614 // Shape the tops.
615 m_rgBottomShape = Utility.Clone<int>(colBottom[0].shape());
617
618 List<int> rgTopShape = new List<int>();
619
620 for (int i = 0; i < m_nChannelAxis; i++)
621 {
622 rgTopShape.Add(colBottom[0].shape(i));
623 }
624
625 rgTopShape.Add(m_nNumOutput);
626
627 for (int i = 0; i < m_nNumSpatialAxes; i++)
628 {
629 rgTopShape.Add(m_rgOutputShape[i]);
630 }
631
632 for (int i = 0; i < colTop.Count; i++)
633 {
634 colTop[i].Reshape(rgTopShape, m_bUseHalfSize);
635 }
636
637 if (reverse_dimensions())
638 m_nConvOutSpatialDim = colBottom[0].count(nFirstSpatialAxis);
639 else
640 m_nConvOutSpatialDim = colTop[0].count(nFirstSpatialAxis);
641
642 m_nColOffset = m_nKernelDim * m_nConvOutSpatialDim;
643 m_nOutputOffset = m_nConvOutChannels * m_nConvOutSpatialDim / m_nGroup;
644
646 {
647 // Setup input dimensions (blobConvInputShape)
648 List<int> rgBottomDimBlobShape = new List<int>() { m_nNumSpatialAxes + 1 };
649 m_blobConvInputShape.Reshape(rgBottomDimBlobShape);
650
651 T[] rgConvInputShapeData = m_blobConvInputShape.mutable_cpu_data;
652 for (int i = 0; i < m_nNumSpatialAxes + 1; i++)
653 {
654 if (reverse_dimensions())
655 rgConvInputShapeData[i] = (T)Convert.ChangeType(colTop[0].shape(m_nChannelAxis + i), typeof(T));
656 else
657 rgConvInputShapeData[i] = (T)Convert.ChangeType(colBottom[0].shape(m_nChannelAxis + i), typeof(T));
658 }
659 m_blobConvInputShape.mutable_cpu_data = rgConvInputShapeData;
660
661 // The im2col result buffer will only hold one image at a time to avoid
662 // overly large memory usage. In the special case of 1x1 convolution
663 // it goes lazily unused to save memory.
664 m_rgColBufferShape = new List<int>();
665 m_rgColBufferShape.Add(m_nKernelDim * m_nGroup);
666
667 for (int i = 0; i < m_nNumSpatialAxes; i++)
668 {
669 if (reverse_dimensions())
671 else
673 }
674
675 shareLayerBlob(m_blobColBuffer, m_rgColBufferShape);
676 m_blobColBuffer.Reshape(m_rgColBufferShape);
677 }
678
679 m_nBottomDim = colBottom[0].count(m_nChannelAxis);
680 m_nTopDim = colTop[0].count(m_nChannelAxis);
681 m_nNumKernelsIm2col = m_nConvInChannels * m_nConvOutSpatialDim;
682 m_nNumKernelsCol2im = (reverse_dimensions()) ? m_nTopDim : m_nBottomDim;
683
684 // Setup up the all ones 'bias_multiplier' for adding biases by BLAS
685 m_nOutSpatialDim = colTop[0].count(nFirstSpatialAxis);
686
687 if (m_bBiasTerm)
688 {
690 {
691 List<int> rgBiasMultShape = new List<int>() { m_nOutSpatialDim };
692 shareLayerBlob(m_blobBiasMultiplier, rgBiasMultShape);
693 m_blobBiasMultiplier.Reshape(rgBiasMultShape);
694 m_blobBiasMultiplier.SetData(1.0);
695 }
696 }
697 }
698
702 public override int MinBottomBlobs
703 {
704 get { return 1; }
705 }
706
710 public override int MinTopBlobs
711 {
712 get { return 1; }
713 }
714
718 public override bool EqualNumBottomTopBlobs
719 {
720 get { return true; }
721 }
722
735 protected void forward_gemm(long hInput, int nInputOffset, long hWeights, long hOutput, int nOutputOffset, bool bSkipIm2Col = false)
736 {
737 long hColBuff = hInput;
738 int nColBuffOffset = nInputOffset;
739
740 if (!m_bIs1x1)
741 {
742 if (!bSkipIm2Col)
743 conv_im2col(hInput, nInputOffset, m_blobColBuffer.mutable_gpu_data, 0);
744
745 hColBuff = m_blobColBuffer.gpu_data;
746 nColBuffOffset = 0;
747 }
748
749 //m_cuda.gemm(false, false, m_nConvOutChannels / m_nGroup, m_nConvOutSpatialDim, m_nKernelDim, m_tOne, hWeights, hColBuff, m_tZero, hOutput, m_nWeightOffset * g, nColBuffOffset + m_nColOffset * g, nOutputOffset + m_nOutputOffset * g);
750 m_cuda.gemm(false, false, m_nConvOutChannels / m_nGroup, m_nConvOutSpatialDim, m_nKernelDim, m_tOne, hWeights, hColBuff, m_tZero, hOutput, 0, nColBuffOffset, nOutputOffset, m_nGroup, m_nWeightOffset, m_nColOffset, m_nOutputOffset);
751 }
752
762 protected void forward_bias(long hOutput, int nOutputOffset, long hBias)
763 {
764 m_cuda.gemm(false, false, m_nNumOutput, m_nOutSpatialDim, 1, m_tOne, hBias, m_blobBiasMultiplier.gpu_data, m_tOne, hOutput, 0, 0, nOutputOffset);
765 }
766
778 protected void backward_gemm(long hOutput, int nOutputOffset, long hWeights, long hInput, int nInputOffset)
779 {
780 long hColBuff = m_blobColBuffer.mutable_gpu_data;
781 int nColBuffOffset = 0;
782
783 if (m_bIs1x1)
784 {
785 hColBuff = hInput;
786 nColBuffOffset = nInputOffset;
787 }
788
789 //for (int g = 0; g < m_nGroup; g++)
790 //{
791 // m_cuda.gemm(true, false, m_nKernelDim, m_nConvOutSpatialDim, m_nConvOutChannels / m_nGroup, m_tOne, hWeights, hOutput, m_tZero, hColBuff, m_nWeightOffset * g, nOutputOffset + m_nOutputOffset * g, nColBuffOffset + m_nColOffset * g);
792 //}
793
794 m_cuda.gemm(true, false, m_nKernelDim, m_nConvOutSpatialDim, m_nConvOutChannels / m_nGroup, m_tOne, hWeights, hOutput, m_tZero, hColBuff, 0, nOutputOffset, nColBuffOffset, m_nGroup, m_nWeightOffset, m_nOutputOffset, m_nColOffset);
795
796 if (!m_bIs1x1)
797 conv_col2im(hColBuff, nColBuffOffset, hInput, nInputOffset);
798 }
799
811 protected void weight_gemm(long hInput, int nInputOffset, long hOutput, int nOutputOffset, long hWeights)
812 {
813 long hColBuff = hInput;
814 int nColBuffOffset = nInputOffset;
815
816 if (!m_bIs1x1)
817 {
818 conv_im2col(hInput, nInputOffset, m_blobColBuffer.mutable_gpu_data, 0);
819 hColBuff = m_blobColBuffer.gpu_data;
820 nColBuffOffset = 0;
821 }
822
823 //for (int g = 0; g < m_nGroup; g++)
824 //{
825 // m_cuda.gemm(false, true, m_nConvOutChannels / m_nGroup, m_nKernelDim, m_nConvOutSpatialDim, m_tOne, hOutput, hColBuff, m_tOne, hWeights, nOutputOffset + m_nOutputOffset * g, nColBuffOffset + m_nColOffset * g, m_nWeightOffset * g);
826 //}
827
828 m_cuda.gemm(false, true, m_nConvOutChannels / m_nGroup, m_nKernelDim, m_nConvOutSpatialDim, m_tOne, hOutput, hColBuff, m_tOne, hWeights, nOutputOffset, nColBuffOffset, 0, m_nGroup, m_nOutputOffset, m_nColOffset, m_nWeightOffset);
829 }
830
840 protected void backward_bias(long hBias, long hInput, int nInputOffset)
841 {
842 m_cuda.gemv(false, m_nNumOutput, m_nOutSpatialDim, m_tOne, hInput, m_blobBiasMultiplier.gpu_data, m_tOne, hBias, nInputOffset, 0, 0);
843 }
844
850 protected int input_shape(int i)
851 {
853 }
854
860 protected abstract bool reverse_dimensions();
861
865 protected abstract void compute_output_shape();
866
867 private void conv_im2col(long hData, int nDataOffset, long hColBuff, int nColBuffOffset)
868 {
870 {
871 T[] rgConvInputShape = m_blobConvInputShape.update_cpu_data();
872 T[] rgKernelShape = m_blobKernelShape.update_cpu_data();
873 T[] rgPad = m_blobPad.update_cpu_data();
874 T[] rgStride = m_blobStride.update_cpu_data();
875 T[] rgDilation = m_blobDilation.update_cpu_data();
876
877 m_cuda.im2col(hData,
878 nDataOffset,
879 m_nConvInChannels,
880 val_at(rgConvInputShape, 1),
881 val_at(rgConvInputShape, 2),
882 val_at(rgKernelShape, 0),
883 val_at(rgKernelShape, 1),
884 val_at(rgPad, 0),
885 val_at(rgPad, 1),
886 val_at(rgStride, 0),
887 val_at(rgStride, 1),
888 val_at(rgDilation, 0),
889 val_at(rgDilation, 1),
890 hColBuff,
891 nColBuffOffset);
892 }
893 else
894 {
895 m_cuda.im2col_nd(hData,
896 nDataOffset,
898 m_nNumKernelsIm2col,
899 0,
900 m_blobConvInputShape.gpu_data,
901 m_blobColBuffer.gpu_shape,
902 m_blobKernelShape.gpu_data,
903 m_blobPad.gpu_data,
904 m_blobStride.gpu_data,
905 m_blobDilation.gpu_data,
906 hColBuff,
907 nColBuffOffset);
908 }
909 }
910
911 private void conv_col2im(long hColBuff, int nColBuffOffset, long hData, int nDataOffset)
912 {
914 {
915 T[] rgConvInputShape = m_blobConvInputShape.update_cpu_data();
916 T[] rgKernelShape = m_blobKernelShape.update_cpu_data();
917 T[] rgPad = m_blobPad.update_cpu_data();
918 T[] rgStride = m_blobStride.update_cpu_data();
919 T[] rgDilation = m_blobDilation.update_cpu_data();
920
921 m_cuda.col2im(hColBuff,
922 nColBuffOffset,
923 m_nConvInChannels,
924 val_at(rgConvInputShape, 1),
925 val_at(rgConvInputShape, 2),
926 val_at(rgKernelShape, 0),
927 val_at(rgKernelShape, 1),
928 val_at(rgPad, 0),
929 val_at(rgPad, 1),
930 val_at(rgStride, 0),
931 val_at(rgStride, 1),
932 val_at(rgDilation, 0),
933 val_at(rgDilation, 1),
934 hData,
935 nDataOffset);
936 }
937 else
938 {
939 m_cuda.col2im_nd(hColBuff,
940 nColBuffOffset,
942 m_nNumKernelsCol2im,
943 0,
944 m_blobConvInputShape.gpu_data,
945 m_blobColBuffer.gpu_shape,
946 m_blobKernelShape.gpu_data,
947 m_blobPad.gpu_data,
948 m_blobStride.gpu_data,
949 m_blobDilation.gpu_data,
950 hData,
951 nDataOffset);
952 }
953 }
954 }
955}
The Log class provides general output in text form.
Definition: Log.cs:13
void CHECK(bool b, string str)
Test a flag for true.
Definition: Log.cs:227
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
Definition: Log.cs:394
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
void CHECK_GT(double df1, double df2, string str)
Test whether one number is greater than another.
Definition: Log.cs:299
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
Definition: Log.cs:287
The Utility class provides general utility funtions.
Definition: Utility.cs:35
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
Definition: Blob.cs:1922
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487
string shape_string
Returns a string describing the Blob's shape.
Definition: Blob.cs:657
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442
BLOB_TYPE type
Returns the BLOB_TYPE of the Blob.
Definition: Blob.cs:2761
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
Definition: Blob.cs:903
long gpu_shape
Returns the shape GPU handle used by the CudaDnn connection. The shape data contains the shape inform...
Definition: Blob.cs:1565
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
Definition: Blob.cs:402
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
The WorkspaceArgs are passed to both the Layer::OnSetWorkspace and Layer::OnGetWorkspace events.
Definition: EventArgs.cs:17
WorkspaceArgs(long hData, ulong lSize)
The WorkspaceArgs constructor.
Definition: EventArgs.cs:26
Abstract Filler class used to fill blobs with values.
Definition: Filler.cs:19
void Fill(Blob< T > b)
Fill the blob with values based on the actual filler used.
Definition: Filler.cs:50
static Filler< T > Create(CudaDnn< T > cuda, Log log, FillerParameter p)
Create a new Filler instance.
Definition: Filler.cs:79
The BaseConvolutionLayer is an abstract base class that factors out BLAS code common to ConvolutionLa...
bool m_bIs1x1
Whether or not the kernel is 1x1.
int m_nNumOutput
The number of outputs.
List< int > m_rgOutputShape
The spatial dimensions of the output.
override void dispose()
Releases all GPU and host resources used by the Layer.
List< int > m_rgColBufferShape
The spatial dimensionss of the col_buffer.
void backward_bias(long hBias, long hInput, int nInputOffset)
Helper function that abstracts away the column buffer and gemm arguments.
int m_nOutSpatialDim
The output spatial dimension.
override bool ReInitializeParameters(WEIGHT_TARGET target)
Re-initialize the parameters of the layer.
int m_nChannels
The number of channels in each item.
void forward_gemm(long hInput, int nInputOffset, long hWeights, long hOutput, int nOutputOffset, bool bSkipIm2Col=false)
Helper function that abstract away the column buffer and gemm arguments.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override WorkspaceArgs getWorkspace()
Retruns the WorkspaceArgs containing the workspace used by this Layer.
abstract bool reverse_dimensions()
reverse_dimensions should return true iff we are implementing deconv, so that conv helpers know which...
int m_nNumSpatialAxes
The number of spatial axes.
ulong getWorkspaceLimitInBytes(bool bUseTensorCores=false)
Returns the workspace limit in bytes based on the cudnn_workspace_limit setting.
override int MinBottomBlobs
Returns the minimum number of required bottom Blobs: input
override int MinTopBlobs
Returns the minimum number of required top (output) Blobs: output
void weight_gemm(long hInput, int nInputOffset, long hOutput, int nOutputOffset, long hWeights)
Helper function that abstract away the column buffer and gemm arguments.
override bool EqualNumBottomTopBlobs
Returns that there are an equal number of top and bottom Blobs.
Blob< T > m_blobStride
The spatial dimensions of the stride.
Blob< T > m_blobDilation
The spatial dimentions of the dilation.
Blob< T > m_blobKernelShape
The spatial dimensions of the filter kernel.
List< int > m_rgBottomShape
The buttom shape.
BaseConvolutionLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The BaseConvolutionLayer constructor.
int m_nWeightOffset
The weight offset used.
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
Blob< T > m_blobPad
The spatial dimensions of the padding.
int m_nNum
The number of items in the batch.
abstract void compute_output_shape()
Compute height_out and width_out from other parameters.
Blob< T > m_blobConvInputShape
The spatial dimensions of the convolution input.
void forward_bias(long hOutput, int nOutputOffset, long hBias)
Helper function that abstracts away the column buffer and gemm arguments.
override bool setWorkspace(ulong lSizeInBytes)
If not already set, allocates the workspace needed in GPU memory.
void backward_gemm(long hOutput, int nOutputOffset, long hWeights, long hInput, int nInputOffset)
Helper function that abstract away the column buffer and gemm arguments.
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
bool m_bForceNDim2col
Whether or not to force n-dim 2 column.
int input_shape(int i)
Returns the spatial dimensions of the input.
bool m_bBiasTerm
Whether or not to use bias.
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
int val_at(T[] rg, int nIdx)
Returns the integer value at a given index in a generic array.
Definition: Layer.cs:1434
bool shareLayerBlob(Blob< T > b, List< int > rgMinShape)
Attempts to share a Layer Blob if another parameter Blob with the same name and acceptable size is fo...
Definition: Layer.cs:1170
T m_tZero
Specifies a generic type equal to 0.0.
Definition: Layer.cs:76
T m_tOne
Specifies a generic type equal to 1.0.
Definition: Layer.cs:72
bool shareParameter(Blob< T > b, List< int > rgMinShape, bool bAllowEndsWithComparison=false)
Attempts to share a parameter Blob if another parameter Blob with the same name and accpetable size i...
Definition: Layer.cs:1152
bool m_bUseHalfSize
Specifies that the half size of the top (if any) should be converted to the base size.
Definition: Layer.cs:84
virtual bool reshapeNeeded(BlobCollection< T > colBottom, BlobCollection< T > colTop, bool bReset=true)
Tests the shapes of both the bottom and top blobs and if they are the same as the previous sizing,...
Definition: Layer.cs:622
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
BlobCollection< T > m_colBlobs
Specifies the learnable parameter Blobs of the Layer.
Definition: Layer.cs:55
DictionaryMap< bool > m_rgbParamPropagateDown
Specifies whether or not to compute the learnable diff of each parameter Blob.
Definition: Layer.cs:63
Specifies the parameters for the ConvolutionLayer. The default weight filler is set to the XavierFill...
FillerParameter weight_filler
The filler for the weight. The default is set to use the 'xavier' filler.
uint group
The group size for group convolution.
bool useCudnn(int nNumSpatialAxes=2)
Queries whether or not to use NVIDIA's cuDnn.
bool force_nd_im2col
Whether to force use of the general ND convolution, even if a specific implementation for blobs of th...
int axis
The axis to interpret as 'channels' when performing convolution. Preceding dimensions are treated as ...
FillerParameter bias_filler
The filler for the bias. The default is set to use the 'constant = 0.1' filler.
bool bias_term
Whether to have bias terms or not.
int cudnn_workspace_limit
Specifies the workspace limit used by cuDnn. A value of 0 directs cuDNN to use the fastest algorithm ...
uint num_output
The number of outputs for the layer.
bool cudnn_workspace_allow_on_groups
When true, allows workspace usage on groups > 1 (default = false).
uint? stride_h
The stride height (2D only)
List< uint > kernel_size
Kernel size is given as a single value for equal dimensions in all spatial dimensions,...
List< uint > dilation
Factor used to dilate the kernel, (implicitly) zero-filling the resulting holes. (Kernel dilation is ...
uint? stride_w
The stride width (2D only)
uint? pad_h
The padding height (2D only)
uint? kernel_h
The kernel height (2D only)
List< uint > stride
Stride is given as a single value for equal dimensions in all spatial dimensions, or once per spatial...
uint? kernel_w
The kernel width (2D only)
uint? pad_w
The padding width (2D only)
List< uint > pad
Pad is given as a single value for equal dimensions in all spatial dimensions, or once per spatial di...
Specifies the base parameter for all layers.
ConvolutionParameter convolution_param
Returns the parameter set when initialized with LayerType.CONVOLUTION
string name
Specifies the name of this LayerParameter.
bool use_halfsize
Specifies whether or not to use half sized memory or not.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
BLOB_TYPE
Defines the tpe of data held by a given Blob.
Definition: Interfaces.cs:62
WEIGHT_TARGET
Defines the type of weight to target in re-initializations.
Definition: Interfaces.cs:38
The MyCaffe.fillers namespace contains all fillers including the Filler class.
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12