MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
ConvolutionLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using MyCaffe.basecode;
6using MyCaffe.common;
7using MyCaffe.param;
8using System.Drawing;
9
10namespace MyCaffe.layers
11{
48 {
49 const int CUDNN_STREAMS_PER_GROUP = 3;
50
51 long[] m_rghCudnn = null;
52 long[] m_rghStream = null;
53
54 // algorithms for forward and backward convolutions
55 CONV_FWD_ALGO[] m_rgfwdAlgo = null;
56 CONV_BWD_FILTER_ALGO[] m_rgbwdFilterAlgo = null;
57 CONV_BWD_DATA_ALGO[] m_rgbwdDataAlgo = null;
58
59 List<long> m_rghBottomDesc = new List<long>();
60 List<long> m_rghTopDesc = new List<long>();
61 long m_hBiasDesc = 0;
62 long m_hFilterDesc = 0;
63 List<long> m_rghConvDesc = new List<long>();
64 int m_nBottomOffset = 0;
65 int m_nTopOffset = 0;
66 int m_nBiasOffset = 0;
67
68 ulong[] m_rglWorkspaceFwdSizes = null;
69 ulong[] m_rglWorkspaceBwdFilterSizes = null;
70 ulong[] m_rglWorkspaceBwdDataSizes = null;
71 ulong[] m_rglWorkspaceFwdOffsets = null; // offsets into workspace fwd data.
72 ulong[] m_rglWorkspaceBwdFilterOffsets = null; // offsets into workspace bwd filter data.
73 ulong[] m_rglWorkspaceBwdDataOffsets = null; // offsets into workspace bwd data.
74 bool m_bUseTensorCores = false;
75
119 : base(cuda, log, p)
120 {
121 m_type = LayerParameter.LayerType.CONVOLUTION;
122 }
123
125 protected override void dispose()
126 {
127 for (int i = 0; i < m_rghBottomDesc.Count; i++)
128 {
129 m_cuda.FreeTensorDesc(m_rghBottomDesc[i]);
130 m_cuda.FreeTensorDesc(m_rghTopDesc[i]);
131 m_cuda.FreeConvolutionDesc(m_rghConvDesc[i]);
132 }
133
134 m_rghBottomDesc.Clear();
135 m_rghTopDesc.Clear();
136 m_rghConvDesc.Clear();
137
138 if (m_hBiasDesc != 0)
139 {
140 m_cuda.FreeTensorDesc(m_hBiasDesc);
141 m_hBiasDesc = 0;
142 }
143
144 if (m_hFilterDesc != 0)
145 {
146 m_cuda.FreeFilterDesc(m_hFilterDesc);
147 m_hFilterDesc = 0;
148 }
149
150 for (int g = 0; g < (m_nGroup * CUDNN_STREAMS_PER_GROUP); g++)
151 {
152 if (m_rghStream != null && m_rghStream[g] != 0)
153 m_cuda.FreeStream(m_rghStream[g]);
154
155 if (m_rghCudnn != null && m_rghCudnn[g] != 0)
156 m_cuda.FreeCuDNN(m_rghCudnn[g]);
157 }
158
159 m_rghStream = null;
160 m_rghCudnn = null;
161
162 base.dispose();
163 }
164
170 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
171 {
172 base.LayerSetUp(colBottom, colTop);
173
175 return;
176
177 // Initialize CUDA streams and cuDNN.
178 m_rghStream = new long[m_nGroup * CUDNN_STREAMS_PER_GROUP];
179 m_rghCudnn = new long[m_nGroup * CUDNN_STREAMS_PER_GROUP];
180
181 // Initialize algorithm arrays.
182 m_rgfwdAlgo = new CONV_FWD_ALGO[colBottom.Count];
183 m_rgbwdFilterAlgo = new CONV_BWD_FILTER_ALGO[colBottom.Count];
184 m_rgbwdDataAlgo = new CONV_BWD_DATA_ALGO[colBottom.Count];
185
186 // Initialize the size arrays.
187 m_rglWorkspaceFwdSizes = new ulong[colBottom.Count];
188 m_rglWorkspaceBwdFilterSizes = new ulong[colBottom.Count];
189 m_rglWorkspaceBwdDataSizes = new ulong[colBottom.Count];
190 m_rglWorkspaceFwdOffsets = new ulong[m_nGroup * CUDNN_STREAMS_PER_GROUP];
191 m_rglWorkspaceBwdFilterOffsets = new ulong[m_nGroup * CUDNN_STREAMS_PER_GROUP];
192 m_rglWorkspaceBwdDataOffsets = new ulong[m_nGroup * CUDNN_STREAMS_PER_GROUP];
193
194 for (int i = 0; i < colBottom.Count; i++)
195 {
196 // initialize all to default algorithms.
197 m_rgfwdAlgo[i] = (CONV_FWD_ALGO)0;
198 m_rgbwdFilterAlgo[i] = (CONV_BWD_FILTER_ALGO)0;
199 m_rgbwdDataAlgo[i] = (CONV_BWD_DATA_ALGO)0;
200
201 // default algorithms don't require workspace.
202 m_rglWorkspaceFwdSizes[i] = 0;
203 m_rglWorkspaceBwdFilterSizes[i] = 0;
204 m_rglWorkspaceBwdDataSizes[i] = 0;
205 }
206
207 for (int g = 0; g < m_nGroup * CUDNN_STREAMS_PER_GROUP; g++)
208 {
209 m_rghStream[g] = m_cuda.CreateStream(false, g);
210 m_rghCudnn[g] = m_cuda.CreateCuDNN(m_rghStream[g]);
211 m_rglWorkspaceFwdOffsets[g] = 0;
212 m_rglWorkspaceBwdFilterOffsets[g] = 0;
213 m_rglWorkspaceBwdDataOffsets[g] = 0;
214 }
215
217 if (typeof(T) == typeof(double))
218 {
219 m_log.WriteLine("WARNING: Tensor cores are only supported with the 'float' base type. Tensor core use will be disabled for the 'double' base type.");
220 m_bUseTensorCores = false;
221 }
222
223 // Set the indexing parameters.
224 m_nBiasOffset = m_nNumOutput / m_nGroup;
225
226 // Create filter descriptor.
227 Size szKernel = size_at(m_blobKernelShape);
228 m_hFilterDesc = m_cuda.CreateFilterDesc();
229 m_cuda.SetFilterDesc(m_hFilterDesc, m_nNumOutput / m_nGroup, m_nChannels / m_nGroup, szKernel.Height, szKernel.Width, m_bUseHalfSize);
230
231 // Create tensor descriptor(s) for data and corresponding convolution(s).
232 for (int i = 0; i < colBottom.Count; i++)
233 {
234 m_rghBottomDesc.Add(m_cuda.CreateTensorDesc());
235 m_rghTopDesc.Add(m_cuda.CreateTensorDesc());
236 m_rghConvDesc.Add(m_cuda.CreateConvolutionDesc());
237 }
238
239 // Tensor descriptor for bias.
240 if (m_bBiasTerm)
241 m_hBiasDesc = m_cuda.CreateTensorDesc();
242 }
243
251 protected override bool reshapeNeeded(BlobCollection<T> colBottom, BlobCollection<T> colTop, bool bReset = true)
252 {
253 // Memory optimizations require reshaping now on each pass.
254 if (!bReset)
256
257 if (!compareShapes(colBottom, colTop))
258 {
260 return true;
261 }
262 else
263 {
265 return false;
266 }
267 }
268
274 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
275 {
276 base.Reshape(colBottom, colTop);
277 if (!reshapeNeeded(colBottom, colTop, false))
278 return;
279
280 setShapes(colBottom, colTop);
281
283 return;
284
285 m_log.CHECK_EQ(2, m_nNumSpatialAxes, "cuDNN Convolution input must have 2 spatial axes (e.g., height and width). Use 'engine: CAFFE' for general ND convolution.");
286
287 m_nBottomOffset = m_nBottomDim / m_nGroup;
288 m_nTopOffset = m_nTopDim / m_nGroup;
289
290 int nHeight = colBottom[0].shape(m_nChannelAxis + 1);
291 int nWidth = colBottom[0].shape(m_nChannelAxis + 2);
292 int nHeightOut = colTop[0].shape(m_nChannelAxis + 1);
293 int nWidthOut = colTop[0].shape(m_nChannelAxis + 2);
294
295 Size szPad = size_at(m_blobPad);
296 Size szStride = size_at(m_blobStride);
297 Size szDilation = size_at(m_blobDilation);
298
299 ulong lWorkspaceLimitBytes = getWorkspaceLimitInBytes(m_bUseTensorCores);
300
301 for (int i = 0; i < colBottom.Count; i++)
302 {
303 m_cuda.SetTensorDesc(m_rghBottomDesc[i], m_nNum, m_nChannels / m_nGroup, nHeight, nWidth, m_nChannels * nHeight * nWidth, nHeight * nWidth, nWidth, 1, m_bUseHalfSize);
304 m_cuda.SetTensorDesc(m_rghTopDesc[i], m_nNum, m_nNumOutput / m_nGroup, nHeightOut, nWidthOut, m_nNumOutput * m_nOutSpatialDim, m_nOutSpatialDim, nWidthOut, 1, m_bUseHalfSize);
305 m_cuda.SetConvolutionDesc(m_rghConvDesc[i], szPad.Height, szPad.Width, szStride.Height, szStride.Width, szDilation.Height, szDilation.Width, m_bUseTensorCores, m_bUseHalfSize);
306
307 // Get the algorithms and workspace sizes needed.
308 CONV_FWD_ALGO algoFwd = (CONV_FWD_ALGO)0;
310 CONV_BWD_DATA_ALGO algoBwdData = (CONV_BWD_DATA_ALGO)0;
311 ulong lWsSizeFwd = 0;
312 ulong lWsSizeBwdFilter = 0;
313 ulong lWsSizeBwdData = 0;
314
315 m_cuda.GetConvolutionInfo(m_rghCudnn[0], m_rghBottomDesc[i], m_hFilterDesc, m_rghConvDesc[i], m_rghTopDesc[i], lWorkspaceLimitBytes, m_bUseTensorCores, out algoFwd, out lWsSizeFwd, out algoBwdFilter, out lWsSizeBwdFilter, out algoBwdData, out lWsSizeBwdData);
316 m_rgfwdAlgo[i] = algoFwd;
317 m_rglWorkspaceFwdSizes[i] = lWsSizeFwd;
318 m_rgbwdFilterAlgo[i] = algoBwdFilter;
319 m_rglWorkspaceBwdFilterSizes[i] = lWsSizeBwdFilter;
320 m_rgbwdDataAlgo[i] = algoBwdData;
321 m_rglWorkspaceBwdDataSizes[i] = lWsSizeBwdData;
322 }
323
324 // reduce over all workspace sizes to get a maximum to allocate / reallocate
325 ulong lTotalWsFwd = 0;
326 ulong lTotalWsBwdFilter = 0;
327 ulong lTotalWsBwdData = 0;
328
329 for (int i = 0; i < colBottom.Count; i++)
330 {
331 lTotalWsFwd = Math.Max(lTotalWsFwd, m_rglWorkspaceFwdSizes[i]);
332 lTotalWsBwdFilter = Math.Max(lTotalWsBwdFilter, m_rglWorkspaceBwdFilterSizes[i]);
333 lTotalWsBwdData = Math.Max(lTotalWsBwdData, m_rglWorkspaceBwdDataSizes[i]);
334 }
335
336 // Get max over all oeprations.
337 ulong lMaxWorkspace = Math.Max(lTotalWsFwd, Math.Max(lTotalWsBwdFilter, lTotalWsBwdData));
338
339 // Ensure all groups have enough workspace.
340 ulong lTotalMaxWorkspace = (ulong)lMaxWorkspace * (ulong)m_nGroup * (ulong)CUDNN_STREAMS_PER_GROUP;
341 lTotalMaxWorkspace *= (ulong)CUDNN_STREAMS_PER_GROUP;
342
343 // Initialize the workspace data.
344 WorkspaceArgs wsArgs = getWorkspace();
345
346 // This is the total amount of storage needed over all groups + streams.
347 setWorkspace(lTotalMaxWorkspace);
348
349 // if we succedd in the allocation, set the offsets for the workspaces.
350 for (int g = 0; g < (m_nGroup * CUDNN_STREAMS_PER_GROUP); g++)
351 {
352 m_rglWorkspaceFwdOffsets[g] = (ulong)g * lTotalWsFwd;
353 m_rglWorkspaceBwdFilterOffsets[g] = (ulong)g * lTotalWsBwdFilter;
354 m_rglWorkspaceBwdDataOffsets[g] = (ulong)g * lTotalWsBwdData;
355 }
356
357 // Tensor descriptor for bias.
358 if (m_bBiasTerm)
359 m_cuda.SetTensorDesc(m_hBiasDesc, 1, m_nNumOutput / m_nGroup, 1, 1, m_bUseHalfSize);
360 }
361
366 protected override bool reverse_dimensions()
367 {
368 return false;
369 }
370
374 protected override void compute_output_shape()
375 {
376 T[] rgKernelShapeData = m_blobKernelShape.cpu_data;
377 T[] rgStrideData = m_blobStride.cpu_data;
378 T[] rgPadData = m_blobPad.cpu_data;
379 T[] rgDilationData = m_blobDilation.cpu_data;
380
381 m_rgOutputShape.Clear();
382
383 for (int i = 0; i < m_nNumSpatialAxes; i++)
384 {
385 int nKernel = val_at(rgKernelShapeData, i);
386 int nStride = val_at(rgStrideData, i);
387 int nPad = val_at(rgPadData, i);
388 int nDilation = val_at(rgDilationData, i);
389
390 // i + 1 to skip channel axis.
391 int nInputDim = input_shape(i + 1);
392 int nKernelExtent = nDilation * (nKernel - 1) + 1;
393 int nOutputDim = (nInputDim + 2 * nPad - nKernelExtent) / nStride + 1;
394
395 if (nOutputDim == 0)
396 nOutputDim = 1;
397
398 m_rgOutputShape.Add(nOutputDim);
399 }
400 }
401
407 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
408 {
410 forward_cuda(colBottom, colTop);
411 else
412 forward_cudnn(colBottom, colTop);
413 }
414
421 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
422 {
424 backward_cuda(colTop, rgbPropagateDown, colBottom);
425 else
426 backward_cudnn(colTop, rgbPropagateDown, colBottom);
427 }
428
434 protected void forward_cuda(BlobCollection<T> colBottom, BlobCollection<T> colTop)
435 {
436 long hWeight = m_colBlobs[0].gpu_data;
437
438 for (int i = 0; i < colBottom.Count; i++)
439 {
440 long hBottomData = colBottom[i].gpu_data;
441 long hTopData = colTop[i].mutable_gpu_data;
442
443 for (int n = 0; n < m_nNum; n++)
444 {
445 forward_gemm(hBottomData, n * m_nBottomDim, hWeight, hTopData, n * m_nTopDim);
446
447 if (m_bBiasTerm)
448 forward_bias(hTopData, n * m_nTopDim, m_colBlobs[1].gpu_data);
449 }
450 }
451 }
452
459 protected void backward_cuda(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
460 {
461 long hWeight = m_colBlobs[0].gpu_data;
462 long hWeightDiff = m_colBlobs[0].mutable_gpu_diff;
463
464 for (int i = 0; i < colTop.Count; i++)
465 {
466 long hTopDiff = colTop[i].gpu_diff;
467
468 // Bias gradient, if necessary.
470 {
471 long hBiasDiff = m_colBlobs[1].mutable_gpu_diff;
472
473 for (int n = 0; n < m_nNum; n++)
474 {
475 backward_bias(hBiasDiff, hTopDiff, n * m_nTopDim);
476 }
477 }
478
479 if (m_rgbParamPropagateDown[0] || rgbPropagateDown[i])
480 {
481 long hBottomData = colBottom[i].gpu_data;
482 long hBottomDiff = colBottom[i].mutable_gpu_diff;
483
484 for (int n = 0; n < m_nNum; n++)
485 {
486 // gradient w.r.t. weight. Note that we will accumulate diffs.
488 weight_gemm(hBottomData, n * m_nBottomDim, hTopDiff, n * m_nTopDim, hWeightDiff);
489
490 // gradient w.r.t. bottom data, if necessary.
491 if (rgbPropagateDown[i])
492 backward_gemm(hTopDiff, n * m_nTopDim, hWeight, hBottomDiff, n * m_nBottomDim);
493 }
494 }
495 }
496 }
497
503 protected void forward_cudnn(BlobCollection<T> colBottom, BlobCollection<T> colTop)
504 {
505 long hWeight = m_colBlobs[0].gpu_data;
506 WorkspaceArgs wsArgs = getWorkspace();
507
508 for (int i = 0; i < colBottom.Count; i++)
509 {
510 long hBottomData = colBottom[i].gpu_data;
511 long hTopData = colTop[i].mutable_gpu_data;
512
513 // Forward through cuDNN in parallel over groups.
514 for (int g = 0; g < m_nGroup; g++)
515 {
516 // Filters.
517 m_cuda.ConvolutionForward(m_rghCudnn[g],
518 m_tOne,
519 m_rghBottomDesc[i],
520 hBottomData, m_nBottomOffset * g,
521 m_hFilterDesc,
522 hWeight, m_nWeightOffset * g,
523 m_rghConvDesc[i],
524 m_rgfwdAlgo[i],
525 wsArgs.WorkspaceData, (int)m_rglWorkspaceFwdOffsets[g], m_rglWorkspaceFwdSizes[i],
526 m_tZero,
527 m_rghTopDesc[i],
528 hTopData, m_nTopOffset * g,
529 false);
530 }
531
532 // Synchronize the work across groups, each of which went into its own stream.
533 for (int g = 0; g < m_nGroup; g++)
534 {
535 m_cuda.SynchronizeStream(m_rghStream[g]);
536 }
537
538 // Bias.
539 if (m_bBiasTerm)
540 {
541 for (int g=0; g<m_nGroup; g++)
542 {
543 long hBiasData = m_colBlobs[1].gpu_data;
544
545 m_cuda.AddTensor(m_rghCudnn[g],
546 m_tOne,
547 m_hBiasDesc,
548 hBiasData, m_nBiasOffset * g,
549 m_tOne,
550 m_rghTopDesc[i],
551 hTopData, m_nTopOffset * g);
552 }
553
554 // Synchronize the work across groups, each of which went into its own stream.
555 for (int g = 0; g < m_nGroup; g++)
556 {
557 m_cuda.SynchronizeStream(m_rghStream[g]);
558 }
559 }
560 }
561 }
562
569 protected void backward_cudnn(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
570 {
571 WorkspaceArgs wsArgs = getWorkspace();
572
573 // Gradient w.r.t. bias.
575 {
576 long hBiasDiff = m_colBlobs[1].mutable_gpu_diff;
577
578 for (int i = 0; i < colTop.Count; i++)
579 {
580 long hTopDiff = colTop[i].mutable_gpu_diff;
581
582 // Backward through cuDNN in parallel over groups and gradients.
583 for (int g = 0; g < m_nGroup; g++)
584 {
585 m_cuda.ConvolutionBackwardBias(m_rghCudnn[0 * m_nGroup + g],
586 m_tOne, m_rghTopDesc[i], hTopDiff, m_nTopOffset * g,
587 m_tOne, m_hBiasDesc, hBiasDiff, m_nBiasOffset * g,
588 false);
589 }
590 // Synchronize the work across groups, each of which went into its own stream.
591 for (int g = 0; g < m_nGroup; g++)
592 {
593 m_cuda.SynchronizeStream(m_rghStream[0 * m_nGroup + g]);
594 }
595 }
596 }
597
598 // Gradient w.r.t weights.
600 {
601 long hWeightDiff = m_colBlobs[0].mutable_gpu_diff;
602
603 for (int i = 0; i < colTop.Count; i++)
604 {
605 long hTopDiff = colTop[i].mutable_gpu_diff;
606 long hBottomData = colBottom[i].gpu_data;
607
608 // Backward through cuDNN in parallel over groups and gradients.
609 for (int g = 0; g < m_nGroup; g++)
610 {
611 m_cuda.ConvolutionBackwardFilter(m_rghCudnn[1 * m_nGroup + g],
612 m_tOne,
613 m_rghBottomDesc[i], hBottomData, m_nBottomOffset * g,
614 m_rghTopDesc[i], hTopDiff, m_nTopOffset * g,
615 m_rghConvDesc[i],
616 m_rgbwdFilterAlgo[i],
617 wsArgs.WorkspaceData, (int)m_rglWorkspaceBwdFilterOffsets[1 * m_nGroup + g],
618 m_rglWorkspaceBwdFilterSizes[i],
619 m_tOne,
620 m_hFilterDesc, hWeightDiff, m_nWeightOffset * g,
621 false);
622 }
623 // Synchronize the work across groups, each of which went into its own stream.
624 for (int g = 0; g < m_nGroup; g++)
625 {
626 m_cuda.SynchronizeStream(m_rghStream[1 * m_nGroup + g]);
627 }
628 }
629 }
630
631 // Gradient w.r.t. bottom data.
632 long hWeight = m_colBlobs[0].gpu_data;
633
634 for (int i=0; i<colTop.Count; i++)
635 {
636 if (rgbPropagateDown[i])
637 {
638 long hTopDiff = colTop[i].mutable_gpu_diff;
639 long hBottomDiff = colBottom[i].mutable_gpu_diff;
640
641 // Backward through cuDNN in parallel over groups and gradients.
642 for (int g = 0; g < m_nGroup; g++)
643 {
644 m_cuda.ConvolutionBackwardData(m_rghCudnn[2 * m_nGroup + g],
645 m_tOne,
646 m_hFilterDesc, hWeight, m_nWeightOffset * g,
647 m_rghTopDesc[i], hTopDiff, m_nTopOffset * g,
648 m_rghConvDesc[i],
649 m_rgbwdDataAlgo[i],
650 wsArgs.WorkspaceData, (int)m_rglWorkspaceBwdDataOffsets[2 * m_nGroup + g],
651 m_rglWorkspaceBwdDataSizes[i],
652 m_tZero,
653 m_rghBottomDesc[i], hBottomDiff, m_nBottomOffset * g,
654 false);
655 }
656 // Synchronize the work across groups, each of which went into its own stream.
657 for (int g = 0; g < m_nGroup; g++)
658 {
659 m_cuda.SynchronizeStream(m_rghStream[2 * m_nGroup + g]);
660 }
661 }
662 }
663 }
664 }
665}
The Log class provides general output in text form.
Definition: Log.cs:13
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
The BlobCollection contains a list of Blobs.
int Count
Returns the number of items in the collection.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
The WorkspaceArgs are passed to both the Layer::OnSetWorkspace and Layer::OnGetWorkspace events.
Definition: EventArgs.cs:17
long WorkspaceData
Get/set the handle to workspace data in GPU memory.
Definition: EventArgs.cs:36
The BaseConvolutionLayer is an abstract base class that factors out BLAS code common to ConvolutionLa...
int m_nNumOutput
The number of outputs.
List< int > m_rgOutputShape
The spatial dimensions of the output.
void backward_bias(long hBias, long hInput, int nInputOffset)
Helper function that abstracts away the column buffer and gemm arguments.
int m_nOutSpatialDim
The output spatial dimension.
int m_nChannels
The number of channels in each item.
void forward_gemm(long hInput, int nInputOffset, long hWeights, long hOutput, int nOutputOffset, bool bSkipIm2Col=false)
Helper function that abstract away the column buffer and gemm arguments.
override WorkspaceArgs getWorkspace()
Retruns the WorkspaceArgs containing the workspace used by this Layer.
int m_nNumSpatialAxes
The number of spatial axes.
ulong getWorkspaceLimitInBytes(bool bUseTensorCores=false)
Returns the workspace limit in bytes based on the cudnn_workspace_limit setting.
void weight_gemm(long hInput, int nInputOffset, long hOutput, int nOutputOffset, long hWeights)
Helper function that abstract away the column buffer and gemm arguments.
Blob< T > m_blobStride
The spatial dimensions of the stride.
Blob< T > m_blobDilation
The spatial dimentions of the dilation.
Blob< T > m_blobKernelShape
The spatial dimensions of the filter kernel.
int m_nWeightOffset
The weight offset used.
Blob< T > m_blobPad
The spatial dimensions of the padding.
int m_nNum
The number of items in the batch.
void forward_bias(long hOutput, int nOutputOffset, long hBias)
Helper function that abstracts away the column buffer and gemm arguments.
override bool setWorkspace(ulong lSizeInBytes)
If not already set, allocates the workspace needed in GPU memory.
void backward_gemm(long hOutput, int nOutputOffset, long hWeights, long hInput, int nInputOffset)
Helper function that abstract away the column buffer and gemm arguments.
int input_shape(int i)
Returns the spatial dimensions of the input.
bool m_bBiasTerm
Whether or not to use bias.
The ConvolutionLayer convolves the input image with a bank of learned filters, and (optionally) adds ...
override void dispose()
Releases all GPU and host resources used by the Layer.
void backward_cuda(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Run the Backward computation using the Engine.CAFFE mode as specified in the LayerParameter.
override bool reshapeNeeded(BlobCollection< T > colBottom, BlobCollection< T > colTop, bool bReset=true)
Tests the shapes of both the bottom and top blobs and if they are the same as the previous sizing,...
void backward_cudnn(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Run the Backward computation using the Engine CUDNN mode as specified in the LayerParameter.
void forward_cuda(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Run the Forward computation using the Engine.CAFFE mode as specified in the LayerParameter.
override bool reverse_dimensions()
Returns false, for we want convolution, not deconvolution.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Run the Backward computation using either the Engine.CAFFE or Engine.CUDNN mode as specified in the L...
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer for use with both Engine.CAFFE and Engine.CUDNN modes.
ConvolutionLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The ConvolutionLayer constructor.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Run the Forward computation using either the Engine.CAFFE or Engine.CUDNN mode as specified in the La...
void forward_cudnn(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Run the Forward computation using the Engine CUDNN mode as specified in the LayerParameter.
override void compute_output_shape()
Computes the output shape used by the BaseConvolutionLayer.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
void setShapes(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Set the internal shape sizes - used when determining if a Reshape is necessary.
Definition: Layer.cs:685
int val_at(T[] rg, int nIdx)
Returns the integer value at a given index in a generic array.
Definition: Layer.cs:1434
T m_tZero
Specifies a generic type equal to 0.0.
Definition: Layer.cs:76
T m_tOne
Specifies a generic type equal to 1.0.
Definition: Layer.cs:72
bool compareShapes(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Compare the shapes of the top and bottom and if the same, return true, otherwise false.
Definition: Layer.cs:648
bool m_bUseHalfSize
Specifies that the half size of the top (if any) should be converted to the base size.
Definition: Layer.cs:84
Size size_at(Blob< T > b)
Returns the Size of a given two element Blob, such as one that stores Blob size information.
Definition: Layer.cs:1444
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
bool m_bReshapeOnForwardNeeded
Specifies whether or not the reshape on forward is needed or not.
Definition: Layer.cs:100
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
BlobCollection< T > m_colBlobs
Specifies the learnable parameter Blobs of the Layer.
Definition: Layer.cs:55
DictionaryMap< bool > m_rgbParamPropagateDown
Specifies whether or not to compute the learnable diff of each parameter Blob.
Definition: Layer.cs:63
bool cudnn_enable_tensor_cores
Specifies to enable the CUDA tensor cores when performing the convolution which is faster but not sup...
bool useCudnn(int nNumSpatialAxes=2)
Queries whether or not to use NVIDIA's cuDnn.
Specifies the base parameter for all layers.
ConvolutionParameter convolution_param
Returns the parameter set when initialized with LayerType.CONVOLUTION
LayerType
Specifies the layer type.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
CONV_BWD_FILTER_ALGO
Specifies the cuDnn convolution backward filter algorithm to use.
Definition: CudaDnn.cs:305
CONV_FWD_ALGO
Specifies the cuDnn convolution forward algorithm to use.
Definition: CudaDnn.cs:259
CONV_BWD_DATA_ALGO
Specifies the cuDnn convolution backward data algorithm to use.
Definition: CudaDnn.cs:331
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12