MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
DeconvolutionLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using MyCaffe.basecode;
6using MyCaffe.param;
7using MyCaffe.fillers;
8using MyCaffe.common;
9using MyCaffe.layers;
10using System.Drawing;
11
12namespace MyCaffe.layers
13{
36 {
37 // Set to three for the benefit of the backward pass, which
38 // cas use separate streams for calculating the gradient w.r.t.
39 // bias, filter weights, and bottom data for each group independently.
40 const int CUDNN_STREAMS_PER_GROUP = 3;
41
42 long[] m_rghCudnn = null;
43 long[] m_rghStream = null;
44
45 // algorithms for forward and backward convolutions
46 CONV_FWD_ALGO[] m_rgfwdAlgo = null;
47 CONV_BWD_FILTER_ALGO[] m_rgbwdFilterAlgo = null;
48 CONV_BWD_DATA_ALGO[] m_rgbwdDataAlgo = null;
49
50 List<long> m_rghBottomDesc = new List<long>();
51 List<long> m_rghTopDesc = new List<long>();
52 long m_hBiasDesc = 0;
53 long m_hFilterDesc = 0;
54 List<long> m_rghConvDesc = new List<long>();
55 int m_nBottomOffset = 0;
56 int m_nTopOffset = 0;
57 int m_nBiasOffset = 0;
58
59 ulong[] m_rglWorkspaceFwdSizes = null;
60 ulong[] m_rglWorkspaceBwdFilterSizes = null;
61 ulong[] m_rglWorkspaceBwdDataSizes = null;
62 ulong[] m_rglWorkspaceFwdOffsets = null; // offsets into workspace fwd data.
63 ulong[] m_rglWorkspaceBwdFilterOffsets = null; // offsets into workspace bwd filter data.
64 ulong[] m_rglWorkspaceBwdDataOffsets = null; // offsets into workspace bwd data.
65 bool m_bUseTensorCores = false;
66
67
111 : base(cuda, log, p)
112 {
113 m_type = LayerParameter.LayerType.DECONVOLUTION;
114 }
115
117 protected override void dispose()
118 {
119 for (int i = 0; i < m_rghBottomDesc.Count; i++)
120 {
121 m_cuda.FreeTensorDesc(m_rghBottomDesc[i]);
122 m_cuda.FreeTensorDesc(m_rghTopDesc[i]);
123 m_cuda.FreeConvolutionDesc(m_rghConvDesc[i]);
124 }
125
126 m_rghBottomDesc.Clear();
127 m_rghTopDesc.Clear();
128 m_rghConvDesc.Clear();
129
130 if (m_hBiasDesc != 0)
131 {
132 m_cuda.FreeTensorDesc(m_hBiasDesc);
133 m_hBiasDesc = 0;
134 }
135
136 if (m_hFilterDesc != 0)
137 {
138 m_cuda.FreeFilterDesc(m_hFilterDesc);
139 m_hFilterDesc = 0;
140 }
141
142 for (int g = 0; g < (m_nGroup * CUDNN_STREAMS_PER_GROUP); g++)
143 {
144 if (m_rghStream != null && m_rghStream[g] != 0)
145 m_cuda.FreeStream(m_rghStream[g]);
146
147 if (m_rghCudnn != null && m_rghCudnn[g] != 0)
148 m_cuda.FreeCuDNN(m_rghCudnn[g]);
149 }
150
151 m_rghStream = null;
152 m_rghCudnn = null;
153
154 base.dispose();
155 }
156
162 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
163 {
164 base.LayerSetUp(colBottom, colTop);
165
167 {
168 for (int i = 0; i < colBottom.Count; i++)
169 {
170 if (colBottom[i].HalfSize)
171 m_log.FAIL("Half sizes are only supported with the CUDNN engine!");
172 }
173
174 return;
175 }
176
177 // Initialize CUDA streams and cuDNN.
178 m_rghStream = new long[m_nGroup * CUDNN_STREAMS_PER_GROUP];
179 m_rghCudnn = new long[m_nGroup * CUDNN_STREAMS_PER_GROUP];
180
181 // Initialize algorithm arrays.
182 m_rgfwdAlgo = new CONV_FWD_ALGO[colBottom.Count];
183 m_rgbwdFilterAlgo = new CONV_BWD_FILTER_ALGO[colBottom.Count];
184 m_rgbwdDataAlgo = new CONV_BWD_DATA_ALGO[colBottom.Count];
185
186 // Initialize the size arrays.
187 m_rglWorkspaceFwdSizes = new ulong[colBottom.Count];
188 m_rglWorkspaceBwdFilterSizes = new ulong[colBottom.Count];
189 m_rglWorkspaceBwdDataSizes = new ulong[colBottom.Count];
190 m_rglWorkspaceFwdOffsets = new ulong[m_nGroup * CUDNN_STREAMS_PER_GROUP];
191 m_rglWorkspaceBwdFilterOffsets = new ulong[m_nGroup * CUDNN_STREAMS_PER_GROUP];
192 m_rglWorkspaceBwdDataOffsets = new ulong[m_nGroup * CUDNN_STREAMS_PER_GROUP];
193
194 for (int i = 0; i < colBottom.Count; i++)
195 {
196 // initialize all to default algorithms.
197 m_rgfwdAlgo[i] = (CONV_FWD_ALGO)0;
198 m_rgbwdFilterAlgo[i] = (CONV_BWD_FILTER_ALGO)0;
199 m_rgbwdDataAlgo[i] = (CONV_BWD_DATA_ALGO)0;
200
201 // default algorithms don't require workspace.
202 m_rglWorkspaceFwdSizes[i] = 0;
203 m_rglWorkspaceBwdFilterSizes[i] = 0;
204 m_rglWorkspaceBwdDataSizes[i] = 0;
205 }
206
207 for (int g = 0; g < m_nGroup * CUDNN_STREAMS_PER_GROUP; g++)
208 {
209 m_rghStream[g] = m_cuda.CreateStream();
210 m_rghCudnn[g] = m_cuda.CreateCuDNN(m_rghStream[g]);
211 m_rglWorkspaceFwdOffsets[g] = 0;
212 m_rglWorkspaceBwdFilterOffsets[g] = 0;
213 m_rglWorkspaceBwdDataOffsets[g] = 0;
214 }
215
217 if (typeof(T) == typeof(double))
218 {
219 m_log.WriteLine("WARNING: Tensor cores are only supported with the 'float' base type. Tensor core use will be disabled for the 'double' base type.");
220 m_bUseTensorCores = false;
221 }
222
223 // Set the indexing parameters.
224 m_nBiasOffset = m_nNumOutput / m_nGroup;
225
226 // Create filter descriptor.
227 Size szKernel = size_at(m_blobKernelShape);
228 m_hFilterDesc = m_cuda.CreateFilterDesc();
229 m_cuda.SetFilterDesc(m_hFilterDesc, m_nChannels / m_nGroup, m_nNumOutput / m_nGroup, szKernel.Height, szKernel.Width);
230
231 // Create tensor descriptor(s) for data and corresponding convolution(s).
232 for (int i = 0; i < colBottom.Count; i++)
233 {
234 m_rghBottomDesc.Add(m_cuda.CreateTensorDesc());
235 m_rghTopDesc.Add(m_cuda.CreateTensorDesc());
236 m_rghConvDesc.Add(m_cuda.CreateConvolutionDesc());
237 }
238
239 // Tensor descriptor for bias.
240 if (m_bBiasTerm)
241 m_hBiasDesc = m_cuda.CreateTensorDesc();
242 }
243
249 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
250 {
251 base.Reshape(colBottom, colTop);
253 return;
254
256 {
257 for (int i = 0; i < colBottom.Count; i++)
258 {
259 if (colBottom[i].HalfSize)
260 m_log.FAIL("Half sizes are only supported with the CUDNN engine!");
261 }
262
263 return;
264 }
265
266 m_log.CHECK_EQ(2, m_nNumSpatialAxes, "cuDNN Deconvolution input must have 2 spatial axes (e.g., height and width). Use 'engine: CAFFE' for general ND deconvolution.");
267
268 m_nBottomOffset = m_nBottomDim / m_nGroup;
269 m_nTopOffset = m_nTopDim / m_nGroup;
270
271 int nHeight = colBottom[0].shape(m_nChannelAxis + 1);
272 int nWidth = colBottom[0].shape(m_nChannelAxis + 2);
273 int nHeightOut = colTop[0].shape(m_nChannelAxis + 1);
274 int nWidthOut = colTop[0].shape(m_nChannelAxis + 2);
275
276 Size szPad = size_at(m_blobPad);
277 Size szStride = size_at(m_blobStride);
278 Size szDilation = size_at(m_blobDilation);
279
280 ulong lWorkspaceLimitBytes = getWorkspaceLimitInBytes(m_bUseTensorCores);
281
282 for (int i = 0; i < colBottom.Count; i++)
283 {
284 m_cuda.SetTensorDesc(m_rghBottomDesc[i], m_nNum, m_nChannels / m_nGroup, nHeight, nWidth, m_nChannels * nHeight * nWidth, nHeight * nWidth, nWidth, 1);
285 m_cuda.SetTensorDesc(m_rghTopDesc[i], m_nNum, m_nNumOutput / m_nGroup, nHeightOut, nWidthOut, m_nNumOutput * nHeightOut * nWidthOut, nHeightOut * nWidthOut, nWidthOut, 1);
286 m_cuda.SetConvolutionDesc(m_rghConvDesc[i], szPad.Height, szPad.Width, szStride.Height, szStride.Width, szDilation.Height, szDilation.Width, m_bUseTensorCores, m_bUseHalfSize);
287
288 // NOTE: The native Caffe team has found that CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM is
289 // buggy (in deconvolution). Thus, if this algo was chosen (by CuDnn), we attempt to use winograd
290 // instead. If winograd is not supported, or the workspace is larger than the threshold, we
291 // use implicit_gemm instead.
292 CONV_FWD_ALGO algoFwdPreferred = CONV_FWD_ALGO.ALGO_WINOGRAD;
293 // Get the algorithms and workspace sizes needed.
294 CONV_FWD_ALGO algoFwd = (CONV_FWD_ALGO)0;
296 CONV_BWD_DATA_ALGO algoBwdData = (CONV_BWD_DATA_ALGO)0;
297 ulong lWsSizeFwd = 0;
298 ulong lWsSizeBwdFilter = 0;
299 ulong lWsSizeBwdData = 0;
300
301 m_cuda.GetConvolutionInfo(m_rghCudnn[0], m_rghTopDesc[i], m_hFilterDesc, m_rghConvDesc[i], m_rghBottomDesc[i], lWorkspaceLimitBytes, m_bUseTensorCores, out algoFwd, out lWsSizeFwd, out algoBwdFilter, out lWsSizeBwdFilter, out algoBwdData, out lWsSizeBwdData, algoFwdPreferred);
302 m_rgfwdAlgo[i] = algoFwd;
303 m_rglWorkspaceFwdSizes[i] = lWsSizeFwd;
304 m_rgbwdFilterAlgo[i] = algoBwdFilter;
305 m_rglWorkspaceBwdFilterSizes[i] = lWsSizeBwdFilter;
306 m_rgbwdDataAlgo[i] = algoBwdData;
307 m_rglWorkspaceBwdDataSizes[i] = lWsSizeBwdData;
308 }
309
310 // reduce over all workspace sizes to get a maximum to allocate / reallocate
311 ulong lTotalWsFwd = 0;
312 ulong lTotalWsBwdFilter = 0;
313 ulong lTotalWsBwdData = 0;
314
315 for (int i = 0; i < colBottom.Count; i++)
316 {
317 lTotalWsFwd = Math.Max(lTotalWsFwd, m_rglWorkspaceFwdSizes[i]);
318 lTotalWsBwdFilter = Math.Max(lTotalWsBwdFilter, m_rglWorkspaceBwdFilterSizes[i]);
319 lTotalWsBwdData = Math.Max(lTotalWsBwdData, m_rglWorkspaceBwdDataSizes[i]);
320 }
321
322 // Get max over all oeprations.
323 ulong lMaxWorkspace = Math.Max(lTotalWsFwd, Math.Max(lTotalWsBwdFilter, lTotalWsBwdData));
324
325 // Ensure all groups have enough workspace.
326 ulong lTotalMaxWorkspace = (ulong)lMaxWorkspace * (ulong)m_nGroup * CUDNN_STREAMS_PER_GROUP;
327
328 // Initialize the workspace data.
329 WorkspaceArgs wsArgs = getWorkspace();
330
331 // This is the total amount of storage needed over all groups + streams.
332 setWorkspace(lTotalMaxWorkspace);
333
334 // if we succedd in the allocation, set the offsets for the workspaces.
335 for (int g = 0; g < (m_nGroup * CUDNN_STREAMS_PER_GROUP); g++)
336 {
337 m_rglWorkspaceFwdOffsets[g] = (ulong)g * lTotalWsFwd;
338 m_rglWorkspaceBwdFilterOffsets[g] = (ulong)g * lTotalWsBwdFilter;
339 m_rglWorkspaceBwdDataOffsets[g] = (ulong)g * lTotalWsBwdData;
340 }
341
342 // Tensor descriptor for bias.
343 if (m_bBiasTerm)
344 m_cuda.SetTensorDesc(m_hBiasDesc, 1, m_nNumOutput / m_nGroup, 1, 1);
345 }
346
351 protected override bool reverse_dimensions()
352 {
353 return true;
354 }
355
359 protected override void compute_output_shape()
360 {
361 T[] rgKernelShape = m_blobKernelShape.update_cpu_data();
362 T[] rgStrideData = m_blobStride.update_cpu_data();
363 T[] rgPadData = m_blobPad.update_cpu_data();
364 T[] rgDilationData = m_blobDilation.update_cpu_data();
365
366 m_rgOutputShape = new List<int>();
367
368 for (int i = 0; i < m_nNumSpatialAxes; i++)
369 {
370 int nStride = val_at(rgStrideData, i);
371 int nKernel = val_at(rgKernelShape, i);
372 int nPad = val_at(rgPadData, i);
373 int nDilation = val_at(rgDilationData, i);
374
375 // i+1 to skip channel axis
376 int nInputDim = input_shape(i + 1);
377 int nKernelExtent = nDilation * (nKernel - 1) + 1;
378 int nOutputDim = nStride * (nInputDim - 1) + nKernelExtent - 2 * nPad;
379 m_rgOutputShape.Add(nOutputDim);
380 }
381 }
382
388 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
389 {
391 forward_cuda(colBottom, colTop);
392 else
393 forward_cudnn(colBottom, colTop);
394 }
395
402 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
403 {
405 backward_cuda(colTop, rgbPropagateDown, colBottom);
406 else
407 backward_cudnn(colTop, rgbPropagateDown, colBottom);
408 }
409
415 protected void forward_cuda(BlobCollection<T> colBottom, BlobCollection<T> colTop)
416 {
417 long hWeight = m_colBlobs[0].gpu_data;
418
419 for (int i = 0; i < colBottom.Count; i++)
420 {
421 if (colBottom[i].HalfSize)
422 m_log.FAIL("The CAFFE engine does not support half sizes!");
423
424 long hBottomData = colBottom[i].gpu_data;
425 long hTopData = colTop[i].mutable_gpu_data;
426
427 for (int n = 0; n < m_nNum; n++)
428 {
429 backward_gemm(hBottomData, n * m_nBottomDim, hWeight, hTopData, n * m_nTopDim);
430
431 if (m_bBiasTerm)
432 forward_bias(hTopData, n * m_nTopDim, m_colBlobs[1].gpu_data);
433 }
434 }
435 }
436
443 protected void backward_cuda(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
444 {
445 long hWeight = m_colBlobs[0].gpu_data;
446 long hWeightDiff = m_colBlobs[0].mutable_gpu_diff;
447
448 for (int i = 0; i < colTop.Count; i++)
449 {
450 if (colTop[i].HalfSize || colBottom[i].HalfSize)
451 m_log.FAIL("The CAFFE engine does not support half sizes!");
452
453 long hTopDiff = colTop[i].gpu_diff;
454 long hBottomData = colBottom[i].gpu_data;
455 long hBottomDiff = colBottom[i].mutable_gpu_diff;
456
457 // Bias gradient if necessary.
459 {
460 long hBiasDiff = m_colBlobs[1].mutable_gpu_diff;
461
462 for (int n = 0; n < m_nNum; n++)
463 {
464 backward_bias(hBiasDiff, hTopDiff, n * m_nTopDim);
465 }
466 }
467
468 if (m_rgbParamPropagateDown[0] || rgbPropagateDown[i])
469 {
470 for (int n = 0; n < m_nNum; n++)
471 {
472 // gradient w.r.t. weight. Note that we will accumulate diffs.
474 weight_gemm(hTopDiff, n * m_nTopDim, hBottomData, n * m_nBottomDim, hWeightDiff);
475
476 // gradient w.r.t. bottom data, if necessary.
477 if (rgbPropagateDown[i])
478 forward_gemm(hTopDiff, n * m_nTopDim, hWeight, hBottomDiff, n * m_nBottomDim, m_rgbParamPropagateDown[0]);
479 }
480 }
481 }
482 }
483
489 protected void forward_cudnn(BlobCollection<T> colBottom, BlobCollection<T> colTop)
490 {
491 long hWeight = m_colBlobs[0].gpu_data;
492 WorkspaceArgs wsArgs = getWorkspace();
493
494 for (int i = 0; i < colBottom.Count; i++)
495 {
496 long hBottomData = colBottom[i].gpu_data;
497 long hTopData = colTop[i].mutable_gpu_data;
498
499 // Forward through cuDNN in parallel over groups.
500 for (int g = 0; g < m_nGroup; g++)
501 {
502 // Filters.
503 m_cuda.ConvolutionBackwardData(m_rghCudnn[g],
504 m_tOne,
505 m_hFilterDesc,
506 hWeight, m_nWeightOffset * g,
507 m_rghBottomDesc[i],
508 hBottomData, m_nBottomOffset * g,
509 m_rghConvDesc[i],
510 m_rgbwdDataAlgo[i],
511 wsArgs.WorkspaceData, (int)m_rglWorkspaceBwdDataOffsets[g], m_rglWorkspaceBwdDataSizes[i],
512 m_tZero,
513 m_rghTopDesc[i],
514 hTopData, m_nTopOffset * g);
515 m_cuda.SynchronizeStream(m_rghStream[g]);
516
517 // Bias.
518 if (m_bBiasTerm)
519 {
520 long hBiasData = m_colBlobs[1].gpu_data;
521
522 m_cuda.AddTensor(m_rghCudnn[g],
523 m_tOne,
524 m_hBiasDesc,
525 hBiasData, m_nBiasOffset * g,
526 m_tOne,
527 m_rghTopDesc[i],
528 hTopData, m_nTopOffset * g);
529 m_cuda.SynchronizeStream(m_rghStream[g]);
530 }
531 }
532
533 // Synchronize the work across groups, each of which went into its own
534 // stream, by launching an empty kernel into the default (null) stream.
535 m_cuda.SynchronizeThread();
536 }
537 }
538
545 protected void backward_cudnn(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
546 {
547 long hWeight = 0;
548 long hWeightDiff = 0;
549 WorkspaceArgs wsArgs = getWorkspace();
550
552 {
553 hWeight = m_colBlobs[0].gpu_data;
554 hWeightDiff = m_colBlobs[0].mutable_gpu_diff;
555 }
556
557 long hBiasDiff = 0;
558
560 hBiasDiff = m_colBlobs[1].mutable_gpu_diff;
561
562 for (int i = 0; i < colTop.Count; i++)
563 {
564 long hTopDiff = colTop[i].gpu_diff;
565
566 // Backward through cuDNN in parallel over groups and gradients.
567 for (int g = 0; g < m_nGroup; g++)
568 {
569 // Gradient w.r.t. bias.
571 {
572 m_cuda.ConvolutionBackwardBias(m_rghCudnn[0 * m_nGroup + g],
573 m_tOne,
574 m_rghTopDesc[i],
575 hTopDiff, m_nTopOffset * g,
576 m_tOne,
577 m_hBiasDesc,
578 hBiasDiff, m_nBiasOffset * g);
579 }
580
581 // Gradient w.r.t weights.
583 {
584 long hBottomData = colBottom[i].gpu_data;
585
586 m_cuda.ConvolutionBackwardFilter(m_rghCudnn[1 * m_nGroup + g],
587 m_tOne,
588 m_rghTopDesc[i],
589 hTopDiff, m_nTopOffset * g,
590 m_rghBottomDesc[i],
591 hBottomData, m_nBottomOffset * g,
592 m_rghConvDesc[i],
593 m_rgbwdFilterAlgo[i],
594 wsArgs.WorkspaceData, (int)m_rglWorkspaceBwdFilterOffsets[1 * m_nGroup + g],
595 m_rglWorkspaceBwdFilterSizes[i],
596 m_tOne,
597 m_hFilterDesc,
598 hWeightDiff, m_nWeightOffset * g);
599 }
600
601 // Gradient w.r.t. bottom data.
602 if (rgbPropagateDown[i])
603 {
604 if (hWeight == 0)
605 hWeightDiff = m_colBlobs[0].gpu_data;
606
607 long hBottomDiff = colBottom[i].mutable_gpu_diff;
608
609 m_cuda.ConvolutionForward(m_rghCudnn[2 * m_nGroup + g],
610 m_tOne,
611 m_rghTopDesc[i],
612 hTopDiff, m_nTopOffset * g,
613 m_hFilterDesc,
614 hWeight, m_nWeightOffset * g,
615 m_rghConvDesc[i],
616 m_rgfwdAlgo[i],
617 wsArgs.WorkspaceData, (int)m_rglWorkspaceFwdOffsets[2 * m_nGroup + g],
618 m_rglWorkspaceFwdSizes[i],
619 m_tZero,
620 m_rghBottomDesc[i],
621 hBottomDiff, m_nBottomOffset * g);
622 }
623 }
624
625 // Synchronize the work across groups, each of which went into its own
626 // stream, by launching an empty kernel into the default (null) stream.
627 m_cuda.SynchronizeThread();
628
629 for (int g = 0; g < m_nGroup; g++)
630 {
631 m_cuda.SynchronizeStream(m_rghStream[0 * m_nGroup + g]);
632 m_cuda.SynchronizeStream(m_rghStream[1 * m_nGroup + g]);
633 m_cuda.SynchronizeStream(m_rghStream[2 * m_nGroup + g]);
634 }
635 }
636 }
637 }
638}
The Log class provides general output in text form.
Definition: Log.cs:13
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
Definition: Log.cs:394
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
The BlobCollection contains a list of Blobs.
int Count
Returns the number of items in the collection.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
The WorkspaceArgs are passed to both the Layer::OnSetWorkspace and Layer::OnGetWorkspace events.
Definition: EventArgs.cs:17
long WorkspaceData
Get/set the handle to workspace data in GPU memory.
Definition: EventArgs.cs:36
The BaseConvolutionLayer is an abstract base class that factors out BLAS code common to ConvolutionLa...
int m_nNumOutput
The number of outputs.
List< int > m_rgOutputShape
The spatial dimensions of the output.
void backward_bias(long hBias, long hInput, int nInputOffset)
Helper function that abstracts away the column buffer and gemm arguments.
int m_nChannels
The number of channels in each item.
void forward_gemm(long hInput, int nInputOffset, long hWeights, long hOutput, int nOutputOffset, bool bSkipIm2Col=false)
Helper function that abstract away the column buffer and gemm arguments.
override WorkspaceArgs getWorkspace()
Retruns the WorkspaceArgs containing the workspace used by this Layer.
int m_nNumSpatialAxes
The number of spatial axes.
ulong getWorkspaceLimitInBytes(bool bUseTensorCores=false)
Returns the workspace limit in bytes based on the cudnn_workspace_limit setting.
void weight_gemm(long hInput, int nInputOffset, long hOutput, int nOutputOffset, long hWeights)
Helper function that abstract away the column buffer and gemm arguments.
Blob< T > m_blobStride
The spatial dimensions of the stride.
Blob< T > m_blobDilation
The spatial dimentions of the dilation.
Blob< T > m_blobKernelShape
The spatial dimensions of the filter kernel.
int m_nWeightOffset
The weight offset used.
Blob< T > m_blobPad
The spatial dimensions of the padding.
int m_nNum
The number of items in the batch.
void forward_bias(long hOutput, int nOutputOffset, long hBias)
Helper function that abstracts away the column buffer and gemm arguments.
override bool setWorkspace(ulong lSizeInBytes)
If not already set, allocates the workspace needed in GPU memory.
void backward_gemm(long hOutput, int nOutputOffset, long hWeights, long hInput, int nInputOffset)
Helper function that abstract away the column buffer and gemm arguments.
int input_shape(int i)
Returns the spatial dimensions of the input.
bool m_bBiasTerm
Whether or not to use bias.
The DeconvolutionLayer convolves the input with a bank of learned filtered, and (optionally) add bias...
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Run the Backward computation.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Run the Forward computation.
DeconvolutionLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The DeconvolutionLayer constructor.
void backward_cuda(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Run the Backward computation using Engine.CAFFE.
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer for use with both Engine.CAFFE and Engine.CUDNN modes.
void backward_cudnn(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Run the Backward computation using Engine.CUDNN.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override bool reverse_dimensions()
Returns true, for we want deconvolution, not convolution.
override void compute_output_shape()
Computes the output shape used by the BaseConvolutionLayer.
override void dispose()
Releases all GPU and host resources used by the Layer.
void forward_cudnn(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Run the Forward computation with Engine.CUDNN.
void forward_cuda(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Run the Forward computation with Engine.CAFFE.
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
int val_at(T[] rg, int nIdx)
Returns the integer value at a given index in a generic array.
Definition: Layer.cs:1434
T m_tZero
Specifies a generic type equal to 0.0.
Definition: Layer.cs:76
T m_tOne
Specifies a generic type equal to 1.0.
Definition: Layer.cs:72
bool m_bUseHalfSize
Specifies that the half size of the top (if any) should be converted to the base size.
Definition: Layer.cs:84
Size size_at(Blob< T > b)
Returns the Size of a given two element Blob, such as one that stores Blob size information.
Definition: Layer.cs:1444
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
bool m_bReshapeOnForwardNeeded
Specifies whether or not the reshape on forward is needed or not.
Definition: Layer.cs:100
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
BlobCollection< T > m_colBlobs
Specifies the learnable parameter Blobs of the Layer.
Definition: Layer.cs:55
DictionaryMap< bool > m_rgbParamPropagateDown
Specifies whether or not to compute the learnable diff of each parameter Blob.
Definition: Layer.cs:63
bool cudnn_enable_tensor_cores
Specifies to enable the CUDA tensor cores when performing the convolution which is faster but not sup...
bool useCudnn(int nNumSpatialAxes=2)
Queries whether or not to use NVIDIA's cuDnn.
Specifies the base parameter for all layers.
ConvolutionParameter convolution_param
Returns the parameter set when initialized with LayerType.CONVOLUTION
LayerType
Specifies the layer type.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
CONV_BWD_FILTER_ALGO
Specifies the cuDnn convolution backward filter algorithm to use.
Definition: CudaDnn.cs:305
CONV_FWD_ALGO
Specifies the cuDnn convolution forward algorithm to use.
Definition: CudaDnn.cs:259
CONV_BWD_DATA_ALGO
Specifies the cuDnn convolution backward data algorithm to use.
Definition: CudaDnn.cs:331
The MyCaffe.fillers namespace contains all fillers including the Filler class.
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12