MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
Im2colLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Drawing;
6using MyCaffe.basecode;
7using MyCaffe.common;
8using MyCaffe.param;
9
10namespace MyCaffe.layers
11{
23 public class Im2colLayer<T> : Layer<T>
24 {
28 Blob<T> m_blobKernelShape;
32 Blob<T> m_blobStride;
36 Blob<T> m_blobPad;
40 Blob<T> m_blobDilation;
41 int m_nNumSpatialAxes;
42 int m_nBottomDim;
43 int m_nTopDim;
44 int m_nChannelAxis;
45 int m_nNum;
46 int m_nChannels;
47 bool m_bForceNDIm2Col;
48
56 : base(cuda, log, p)
57 {
59 log.CHECK(p.type == LayerParameter.LayerType.IM2COL, "The layer type should be IM2COL.");
60
61 m_blobKernelShape = new Blob<T>(cuda, log);
62 m_blobStride = new Blob<T>(cuda, log);
63 m_blobPad = new Blob<T>(cuda, log);
64 m_blobDilation = new Blob<T>(cuda, log);
65 }
66
68 protected override void dispose()
69 {
70 m_blobKernelShape.Dispose();
71 m_blobStride.Dispose();
72 m_blobPad.Dispose();
73 m_blobDilation.Dispose();
74 }
75
79 public override int ExactNumBottomBlobs
80 {
81 get { return 1; }
82 }
83
87 public override int ExactNumTopBlobs
88 {
89 get { return 1; }
90 }
91
97 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
98 {
100
101 m_bForceNDIm2Col = p.force_nd_im2col;
102 int nInputNumDims = colBottom[0].shape().Count;
103 m_nChannelAxis = colBottom[0].CanonicalAxisIndex(p.axis);
104 int nFirstSpatialDim = m_nChannelAxis + 1;
105 m_nNumSpatialAxes = nInputNumDims - nFirstSpatialDim;
106
107 m_log.CHECK_GE(m_nNumSpatialAxes, 1, "The spatial axis count must be >= 1.");
108
109 List<int> rgDimBlobShape = new List<int>() { m_nNumSpatialAxes };
110
111 // Setup filter kernel dimensions (kernel_shape_).
112 m_blobKernelShape.Reshape(rgDimBlobShape);
113
114 T[] rgKernelShape = m_blobKernelShape.mutable_cpu_data;
115
116 if (p.kernel_h.HasValue || p.kernel_w.HasValue)
117 {
118 m_log.CHECK_EQ(m_nNumSpatialAxes, 2, "kernel_h & kernel_w can only be used for 2D convolution.");
119 m_log.CHECK_EQ(0, p.kernel_size.Count, "Either kernel_size or kernel_h/w should be specified; not both.");
120 rgKernelShape[0] = (T)Convert.ChangeType(p.kernel_h.Value, typeof(T));
121 rgKernelShape[1] = (T)Convert.ChangeType(p.kernel_w.Value, typeof(T));
122 }
123 else
124 {
125 int nNumKernelDims = p.kernel_size.Count;
126 m_log.CHECK(nNumKernelDims == 1 || nNumKernelDims == m_nNumSpatialAxes, "kernel_size must be specified once, or once per spatial dimension (kernel_size specified " + nNumKernelDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);");
127
128 for (int i = 0; i < m_nNumSpatialAxes; i++)
129 {
130 uint nKernel = p.kernel_size[(nNumKernelDims == 1) ? 0 : i];
131 rgKernelShape[i] = (T)Convert.ChangeType(nKernel, typeof(T));
132 }
133 }
134
135 for (int i = 0; i < m_nNumSpatialAxes; i++)
136 {
137 int nVal = (int)Convert.ChangeType(rgKernelShape[i], typeof(int));
138 m_log.CHECK_GT(nVal, 0, "Filter dimensions must be nonzero.");
139 }
140
141 m_blobKernelShape.mutable_cpu_data = rgKernelShape;
142
143
144 // Setup stride dimensions (stride_).
145 m_blobStride.Reshape(rgDimBlobShape);
146
147 T[] rgStrideData = m_blobStride.mutable_cpu_data;
148
149 if (p.stride_h.HasValue || p.stride_w.HasValue)
150 {
151 m_log.CHECK_EQ(m_nNumSpatialAxes, 2, "stride_h & stride_w can only be used for 2D convolution.");
152 m_log.CHECK_EQ(0, p.stride.Count, "Either stride or stride_h/w should be specified; not both.");
153 rgStrideData[0] = (T)Convert.ChangeType(p.stride_h.Value, typeof(T));
154 rgStrideData[1] = (T)Convert.ChangeType(p.stride_w.Value, typeof(T));
155 }
156 else
157 {
158 int nNumStrideDims = p.stride.Count;
159 m_log.CHECK(nNumStrideDims == 0 || nNumStrideDims == 1 || nNumStrideDims == m_nNumSpatialAxes, "stride must be specified once, or once per spatial dimension (stride specified " + nNumStrideDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);");
160
161 uint nDefaultStride = 1;
162 for (int i = 0; i < m_nNumSpatialAxes; i++)
163 {
164 uint nStride = (nNumStrideDims == 0) ? nDefaultStride :
165 p.stride[(nNumStrideDims == 1) ? 0 : i];
166
167 rgStrideData[i] = (T)Convert.ChangeType(nStride, typeof(T));
168 }
169 }
170
171 m_blobStride.mutable_cpu_data = rgStrideData;
172
173
174 // Setup pad dimensions (pad_).
175 m_blobPad.Reshape(rgDimBlobShape);
176
177 T[] rgPadData = m_blobPad.mutable_cpu_data;
178
179 if (p.pad_h.HasValue || p.pad_w.HasValue)
180 {
181 m_log.CHECK_EQ(m_nNumSpatialAxes, 2, "pad_h & pad_w can only be used for 2D convolution.");
182 m_log.CHECK_EQ(0, p.pad.Count, "Either pad or pad_h/w should be specified; not both.");
183 rgPadData[0] = (T)Convert.ChangeType(p.pad_h.Value, typeof(T));
184 rgPadData[1] = (T)Convert.ChangeType(p.pad_w.Value, typeof(T));
185 }
186 else
187 {
188 int nNumPadDims = p.pad.Count;
189 m_log.CHECK(nNumPadDims == 0 || nNumPadDims == 1 || nNumPadDims == m_nNumSpatialAxes, "pad must be specified once, or once per spatial dimension (pad specified " + nNumPadDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);");
190
191 uint nDefaultPad = 0;
192 for (int i = 0; i < m_nNumSpatialAxes; i++)
193 {
194 uint nPad = (nNumPadDims == 0) ? nDefaultPad :
195 p.pad[(nNumPadDims == 1) ? 0 : i];
196
197 rgPadData[i] = (T)Convert.ChangeType(nPad, typeof(T));
198 }
199 }
200
201 m_blobPad.mutable_cpu_data = rgPadData;
202
203
204 // Setup dilation dimensions (dilation_).
205 m_blobDilation.Reshape(rgDimBlobShape);
206
207 T[] rgDilationData = m_blobDilation.mutable_cpu_data;
208
209 int nNumDilationDims = p.dilation.Count;
210 m_log.CHECK(nNumDilationDims == 0 || nNumDilationDims == 1 || nNumDilationDims == m_nNumSpatialAxes, "dilation must be specified once, or once per spatial dimension (dilation specified " + nNumDilationDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);");
211
212 uint nDefaultDilation = 1;
213 for (int i = 0; i < m_nNumSpatialAxes; i++)
214 {
215 uint nPad = (nNumDilationDims == 0) ? nDefaultDilation :
216 p.dilation[(nNumDilationDims == 1) ? 0 : i];
217
218 rgDilationData[i] = (T)Convert.ChangeType(nPad, typeof(T));
219 }
220
221 m_blobDilation.mutable_cpu_data = rgDilationData;
222 }
223
229 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
230 {
231 List<int> rgTopShape = Utility.Clone<int>(colBottom[0].shape());
232 T[] rgKernelShapeData = m_blobKernelShape.update_cpu_data();
233 T[] rgStrideData = m_blobStride.update_cpu_data();
234 T[] rgPadData = m_blobPad.update_cpu_data();
235 T[] rgDilationData = m_blobDilation.update_cpu_data();
236
237 for (int i = 0; i < m_nNumSpatialAxes; i++)
238 {
239 int nKernel = val_at(rgKernelShapeData, i);
240 int nStride = val_at(rgStrideData, i);
241 int nPad = val_at(rgPadData, i);
242 int nDilation = val_at(rgDilationData, i);
243
244 rgTopShape[m_nChannelAxis] *= nKernel;
245 int nInputDim = colBottom[0].shape()[m_nChannelAxis + i + 1];
246 int nKernelExtent = nDilation * (nKernel - 1) + 1;
247 int nOutputDim = (nInputDim + 2 * nPad - nKernelExtent) / nStride + 1;
248 rgTopShape[m_nChannelAxis + i + 1] = nOutputDim;
249 }
250
251 colTop[0].Reshape(rgTopShape);
252 m_nNum = colBottom[0].count(0, m_nChannelAxis);
253 m_nBottomDim = colBottom[0].count(m_nChannelAxis);
254 m_nTopDim = colTop[0].count(m_nChannelAxis);
255 m_nChannels = colBottom[0].shape(m_nChannelAxis);
256 }
257
268 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
269 {
270 long hBottomData = colBottom[0].gpu_data;
271 long hTopData = colTop[0].mutable_gpu_data;
272
273 if (!m_bForceNDIm2Col && m_nNumSpatialAxes == 2)
274 {
275 Size szKernel = size_at(m_blobKernelShape);
276 Size szStride = size_at(m_blobStride);
277 Size szPad = size_at(m_blobPad);
278 Size szDilation = size_at(m_blobDilation);
279
280 for (int n = 0; n < m_nNum; n++)
281 {
282 m_cuda.im2col(hBottomData,
283 n * m_nBottomDim,
284 m_nChannels,
285 colBottom[0].shape(m_nChannelAxis + 1),
286 colBottom[0].shape(m_nChannelAxis + 2),
287 szKernel.Height, szKernel.Width,
288 szPad.Height, szPad.Width,
289 szStride.Height, szStride.Width,
290 szDilation.Height, szDilation.Width,
291 hTopData,
292 n * m_nTopDim);
293 }
294 }
295 else
296 {
297 int nNumKernels = m_nChannels * colTop[0].count(m_nChannelAxis + 1);
298 long hKernelShape = m_blobKernelShape.gpu_data;
299 long hStride = m_blobStride.gpu_data;
300 long hPad = m_blobPad.gpu_data;
301 long hDilation = m_blobDilation.gpu_data;
302
303 for (int n = 0; n < m_nNum; n++)
304 {
305 m_cuda.im2col_nd(hBottomData,
306 n * m_nBottomDim,
307 m_nNumSpatialAxes,
308 nNumKernels,
309 m_nChannelAxis,
310 colBottom[0].gpu_shape,
311 colTop[0].gpu_shape,
312 hKernelShape,
313 hPad,
314 hStride,
315 hDilation,
316 hTopData,
317 n * m_nTopDim);
318 }
319 }
320 }
321
330 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
331 {
332 long hTopDiff = colTop[0].gpu_diff;
333 long hBottomDiff = colBottom[0].mutable_gpu_diff;
334
335 if (!m_bForceNDIm2Col && m_nNumSpatialAxes == 2)
336 {
337 Size szKernel = size_at(m_blobKernelShape);
338 Size szStride = size_at(m_blobStride);
339 Size szPad = size_at(m_blobPad);
340 Size szDilation = size_at(m_blobDilation);
341
342 for (int n = 0; n < m_nNumSpatialAxes; n++)
343 {
344 m_cuda.col2im(hTopDiff,
345 n * m_nTopDim,
346 m_nChannels,
347 colBottom[0].shape()[m_nChannelAxis + 1],
348 colBottom[0].shape()[m_nChannelAxis + 2],
349 szKernel.Height, szKernel.Width,
350 szPad.Height, szPad.Width,
351 szStride.Height, szStride.Width,
352 szDilation.Height, szDilation.Width,
353 hBottomDiff,
354 n * m_nBottomDim);
355 }
356 }
357 else
358 {
359 long hKernelShape = m_blobKernelShape.gpu_data;
360 long hStride = m_blobStride.gpu_data;
361 long hPad = m_blobPad.gpu_data;
362 long hDilation = m_blobDilation.gpu_data;
363
364 for (int n = 0; n < m_nNumSpatialAxes; n++)
365 {
366 m_cuda.col2im_nd(hTopDiff,
367 n * m_nTopDim,
368 m_nNumSpatialAxes,
369 m_nBottomDim,
370 m_nChannelAxis,
371 colBottom[0].gpu_shape,
372 colTop[0].gpu_shape,
373 hKernelShape,
374 hPad,
375 hStride,
376 hDilation,
377 hBottomDiff,
378 n * m_nBottomDim);
379 }
380 }
381 }
382 }
383}
The Log class provides general output in text form.
Definition: Log.cs:13
void CHECK(bool b, string str)
Test a flag for true.
Definition: Log.cs:227
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
void CHECK_GT(double df1, double df2, string str)
Test whether one number is greater than another.
Definition: Log.cs:299
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
Definition: Log.cs:287
The Utility class provides general utility funtions.
Definition: Utility.cs:35
The BlobCollection contains a list of Blobs.
int Count
Returns the number of items in the collection.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
T[] mutable_cpu_data
Get data from the GPU and bring it over to the host, or Set data from the Host and send it over to th...
Definition: Blob.cs:1461
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442
T[] update_cpu_data()
Update the CPU data by transferring the GPU data over to the Host.
Definition: Blob.cs:1470
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
Definition: Blob.cs:402
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
The Im2ColLayer is a helper layer for image operations that rearranges image regions into column vect...
Definition: Im2colLayer.cs:24
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t. the forwarded inputs.
Definition: Im2colLayer.cs:330
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
Definition: Im2colLayer.cs:97
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
Definition: Im2colLayer.cs:229
Im2colLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The Im2col constructor.
Definition: Im2colLayer.cs:55
override void dispose()
Releases all GPU and host resources used by the Layer.
Definition: Im2colLayer.cs:68
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation.
Definition: Im2colLayer.cs:268
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: im2col.
Definition: Im2colLayer.cs:88
override int ExactNumBottomBlobs
Returns the exact number of required bottom (input) Blobs: input.
Definition: Im2colLayer.cs:80
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
int val_at(T[] rg, int nIdx)
Returns the integer value at a given index in a generic array.
Definition: Layer.cs:1434
Size size_at(Blob< T > b)
Returns the Size of a given two element Blob, such as one that stores Blob size information.
Definition: Layer.cs:1444
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
Specifies the parameters for the ConvolutionLayer. The default weight filler is set to the XavierFill...
bool force_nd_im2col
Whether to force use of the general ND convolution, even if a specific implementation for blobs of th...
int axis
The axis to interpret as 'channels' when performing convolution. Preceding dimensions are treated as ...
uint? stride_h
The stride height (2D only)
List< uint > kernel_size
Kernel size is given as a single value for equal dimensions in all spatial dimensions,...
List< uint > dilation
Factor used to dilate the kernel, (implicitly) zero-filling the resulting holes. (Kernel dilation is ...
uint? stride_w
The stride width (2D only)
uint? pad_h
The padding height (2D only)
uint? kernel_h
The kernel height (2D only)
List< uint > stride
Stride is given as a single value for equal dimensions in all spatial dimensions, or once per spatial...
uint? kernel_w
The kernel width (2D only)
uint? pad_w
The padding width (2D only)
List< uint > pad
Pad is given as a single value for equal dimensions in all spatial dimensions, or once per spatial di...
Specifies the base parameter for all layers.
ConvolutionParameter convolution_param
Returns the parameter set when initialized with LayerType.CONVOLUTION
LayerType type
Specifies the type of this LayerParameter.
LayerType
Specifies the layer type.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12