MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
InnerProductLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using MyCaffe.basecode;
6using MyCaffe.common;
7using MyCaffe.param;
8using MyCaffe.fillers;
9
10namespace MyCaffe.layers
11{
21 public class InnerProductLayer<T> : Layer<T>
22 {
23 int m_nM;
24 int m_nK;
25 int m_nN;
26 bool m_bBiasTerm;
27 Blob<T> m_blobBiasMultiplier;
28 bool m_bTranspose;
29 bool m_bEnableNoise = false;
30 double m_dfSigmaInit = 0;
31 Blob<T> m_blobEpsilonWeight = null;
32 Blob<T> m_blobEpsilonBias = null;
33 Filler<T> m_fillerEpsilon = null;
34 double m_dfBiasGradScale = 1.0;
35
55 : base(cuda, log, p)
56 {
57 m_type = LayerParameter.LayerType.INNERPRODUCT;
58 m_blobBiasMultiplier = new Blob<T>(cuda, log);
59 m_blobBiasMultiplier.Name = m_param.name + " biasmult";
60
62 {
63 m_blobEpsilonWeight = new Blob<T>(cuda, log);
64 m_blobEpsilonWeight.Name = m_param.name + " epsilon_wt";
65
67 {
68 m_blobEpsilonBias = new Blob<T>(cuda, log);
69 m_blobEpsilonBias.Name = m_param.name + " epsilon_bias";
70 }
71 }
72
74 }
75
77 protected override void dispose()
78 {
79 dispose(ref m_blobBiasMultiplier);
80 dispose(ref m_blobEpsilonWeight);
81 dispose(ref m_blobEpsilonBias);
82
83 base.dispose();
84 }
85
87 protected override void setup_internal_blobs(BlobCollection<T> col)
88 {
89 if (col.Count > 0)
90 return;
91
93 col.Add(m_blobBiasMultiplier);
94
95 if (m_blobEpsilonWeight != null)
96 col.Add(m_blobEpsilonWeight);
97
98 if (m_blobEpsilonBias != null)
99 col.Add(m_blobEpsilonBias);
100 }
101
105 public override int MinBottomBlobs
106 {
107 get { return 1; }
108 }
109
116 public override int MaxBottomBlobs
117 {
118 get { return 2; }
119 }
120
124 public override int ExactNumTopBlobs
125 {
126 get { return 1; }
127 }
128
134 public override bool ReInitializeParameters(WEIGHT_TARGET target)
135 {
136 base.ReInitializeParameters(target);
137
138 if (target == WEIGHT_TARGET.BOTH || target == WEIGHT_TARGET.WEIGHTS)
139 {
141 weight_filler.Fill(m_colBlobs[0]);
142 }
143
144 if (m_param.inner_product_param.bias_term && m_colBlobs.Count > 1 && (target == WEIGHT_TARGET.BOTH || target == WEIGHT_TARGET.BIAS))
145 {
147 bias_filler.Fill(m_colBlobs[1]);
148 }
149
150 return true;
151 }
152
158 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
159 {
160 if (colBottom.Count > 1)
161 m_param.inner_product_param.num_output = (uint)convertF(colBottom[1].GetData(0));
162
163 int nNumOutput = (int)m_param.inner_product_param.num_output;
167 m_dfSigmaInit = m_param.inner_product_param.sigma_init;
168 m_dfBiasGradScale = m_param.inner_product_param.bias_grad_scale;
169 m_nN = nNumOutput;
170
171 List<int> rgShape = colBottom[0].shape();
172 int nShapeCount = rgShape.Count;
173 for (int i = nShapeCount; i <= m_param.inner_product_param.axis; i++)
174 {
175 rgShape.Add(1);
176 }
177
178 if (nShapeCount != rgShape.Count)
179 colBottom[0].Reshape(rgShape);
180
181 int nAxis = colBottom[0].CanonicalAxisIndex(m_param.inner_product_param.axis);
182
183 // Dimensions starting from 'axis' are 'flattened' into a single
184 // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W),
185 // and axis == 1, N inner products with dimension CHW are preformed.
186 m_nK = colBottom[0].count(nAxis);
187
188 // Check if we need to set up the weights.
189 if (m_colBlobs.Count > 0)
190 {
191 m_log.WriteLine("Skipping parameter initialization.");
192 }
193 else
194 {
195 // Initialize the weight.
196 List<int> rgWeightShape = Utility.Create<int>(2, 0);
197
198 if (m_bTranspose)
199 {
200 rgWeightShape[0] = m_nK;
201 rgWeightShape[1] = m_nN;
202 }
203 else
204 {
205 rgWeightShape[0] = m_nN;
206 rgWeightShape[1] = m_nK;
207 }
208
209 double dfNoiseRange = 1.0 / Math.Sqrt(rgWeightShape[1]);
210 Blob<T> blobWeight = new Blob<T>(m_cuda, m_log);
211 blobWeight.Name = m_param.name + " weights";
212 blobWeight.type = BLOB_TYPE.IP_WEIGHT;
213
214 if (!shareParameter(blobWeight, rgWeightShape, true))
215 {
216 blobWeight.Reshape(rgWeightShape);
218 weight_filler.Fill(blobWeight);
219
220 if (m_bEnableNoise)
221 blobWeight.scale_data(dfNoiseRange);
222 }
223 m_colBlobs.Add(blobWeight);
224
225 // If necessary, initialize and fill the bias term.
226 if (m_bBiasTerm)
227 {
228 List<int> rgBiasShape = Utility.Create<int>(1, m_nN);
229 Blob<T> blobBias = new Blob<T>(m_cuda, m_log);
230 blobBias.Name = m_param.name + " bias";
231 blobBias.type = BLOB_TYPE.IP_WEIGHT;
232
233 if (!shareParameter(blobBias, rgBiasShape, true))
234 {
235 blobBias.Reshape(rgBiasShape);
237 bias_filler.Fill(blobBias);
238
239 if (m_bEnableNoise)
240 blobBias.scale_data(dfNoiseRange);
241 }
242 m_colBlobs.Add(blobBias);
243 }
244
245 // Add Noise sigma weight and bias
246 if (m_bEnableNoise)
247 {
248 FillerParameter fp = new FillerParameter("uniform");
249 fp.min = -1;
250 fp.max = 1;
251 m_fillerEpsilon = Filler<T>.Create(m_cuda, m_log, fp);
252
253 Blob<T> blobSigmaWeight = new Blob<T>(m_cuda, m_log);
254 blobSigmaWeight.Name = m_param.name + " sigma_wt";
255 blobSigmaWeight.type = BLOB_TYPE.WEIGHT;
256 blobSigmaWeight.ReshapeLike(m_colBlobs[0]);
257 blobSigmaWeight.SetData(m_dfSigmaInit / Math.Sqrt(blobSigmaWeight.shape(1)));
258 m_colBlobs.Add(blobSigmaWeight);
259 m_blobEpsilonWeight.ReshapeLike(blobSigmaWeight);
260
261 if (m_bBiasTerm)
262 {
263 Blob<T> blobSigmaBias = new Blob<T>(m_cuda, m_log);
264 blobSigmaBias.Name = m_param.name + " sigma_bias";
265 blobSigmaBias.type = BLOB_TYPE.WEIGHT;
266 blobSigmaBias.ReshapeLike(m_colBlobs[1]);
267 blobSigmaBias.SetData(m_dfSigmaInit / Math.Sqrt(blobSigmaBias.shape(0)));
268 m_colBlobs.Add(blobSigmaBias);
269 m_blobEpsilonBias.ReshapeLike(blobSigmaBias);
270 }
271
272 ResetNoise();
273 }
274 }
275
276 m_rgbParamPropagateDown = new DictionaryMap<bool>(m_colBlobs.Count, true);
277 }
278
284 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
285 {
286 List<int> rgShape = new List<int>(colBottom[0].shape());
287
288 // Figure out the dimensions
289 while (rgShape.Count <= m_param.inner_product_param.axis)
290 {
291 rgShape.Add(1);
292 }
293
294 colBottom[0].Reshape(rgShape);
295
296 int nAxis = colBottom[0].CanonicalAxisIndex(m_param.inner_product_param.axis);
297 int nNewK = colBottom[0].count(nAxis);
298
299 m_log.CHECK_EQ(m_nK, nNewK, "Input size incompatible with inner product parameters.");
300
301 // The first 'axis' dimensions are independent of inner products; the total
302 // number of these is M_, the product over these dimensions.
303 m_nM = colBottom[0].count(0, nAxis);
304
305 // The top shape will be the bottom shape with the flattened axes dropped,
306 // and replaced by a single axis with dimensions num_output (N_).
307 List<int> rgTopShape = Utility.Clone<int>(colBottom[0].shape(), nAxis + 1);
308 rgTopShape[nAxis] = m_nN;
309
310 // Deconvolution Layer requires min_top_axes = 4
311 for (int i = rgTopShape.Count; i < m_param.inner_product_param.min_top_axes; i++)
312 {
313 rgTopShape.Add(1);
314 }
315
316 colTop[0].Reshape(rgTopShape);
318 colTop[0].type = BLOB_TYPE.PREDICTION;
319
320 // Set up the bias multiplier
321 if (m_bBiasTerm)
322 {
323 List<int> rgBiasShape = Utility.Create<int>(1, m_nM);
324 m_blobBiasMultiplier.Reshape(rgBiasShape);
325 m_blobBiasMultiplier.SetData(1.0);
326 }
327 }
328
332 public void ResetNoise()
333 {
334 if (m_bEnableNoise)
335 {
336 // Resamples the noise vector.
337 m_fillerEpsilon.Fill(m_blobEpsilonWeight);
338
339 if (m_bBiasTerm)
340 {
341 // Resample the noise vector
342 m_fillerEpsilon.Fill(m_blobEpsilonBias);
343 }
344 }
345 }
346
358 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
359 {
360 long hBottomData = colBottom[0].gpu_data;
361 long hTopData = colTop[0].mutable_gpu_data;
362 long hWeight = m_colBlobs[0].gpu_data;
363 long hBias = (m_bBiasTerm) ? m_colBlobs[1].gpu_data : 0;
364
365 if (m_bEnableNoise && m_phase == Phase.TRAIN)
366 {
367 // Multiply the sigma weight by the noise vector.
368 m_cuda.mul(m_colBlobs[2].count(), m_colBlobs[2].gpu_data, m_blobEpsilonWeight.gpu_data, m_blobEpsilonWeight.mutable_gpu_diff);
369 // Add the sigma noise to the weights.
370 m_cuda.add(m_colBlobs[0].count(), m_colBlobs[0].gpu_data, m_blobEpsilonWeight.gpu_diff, m_blobEpsilonWeight.mutable_gpu_diff);
371 hWeight = m_blobEpsilonWeight.gpu_diff;
372
373 if (m_bBiasTerm)
374 {
375 // Multiply the sigma bias by the noise vector.
376 m_cuda.mul(m_colBlobs[3].count(), m_colBlobs[3].gpu_data, m_blobEpsilonBias.gpu_data, m_blobEpsilonBias.mutable_gpu_diff);
377 // Add the sigma noise to the bias.
378 m_cuda.add(m_colBlobs[1].count(), m_colBlobs[1].gpu_data, m_blobEpsilonBias.gpu_diff, m_blobEpsilonBias.mutable_gpu_diff);
379 hBias = m_blobEpsilonBias.gpu_diff;
380 }
381 }
382
383 if (m_nM == 1)
384 {
385 m_cuda.gemv(false, m_nN, m_nK, m_tOne, hWeight, hBottomData, m_tZero, hTopData);
386
387 if (m_bBiasTerm)
388 m_cuda.axpy(m_nN, m_blobBiasMultiplier.GetData(0), hBias, hTopData);
389 }
390 else
391 {
392 m_cuda.gemm(false, (m_bTranspose) ? false : true, m_nM, m_nN, m_nK, m_tOne, hBottomData, hWeight, m_tZero, hTopData);
393
394 if (m_bBiasTerm)
395 m_cuda.gemm(false, false, m_nM, m_nN, 1, m_tOne, m_blobBiasMultiplier.gpu_data, hBias, m_tOne, hTopData);
396 }
397 }
398
410 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
411 {
412 long hTopDiff = colTop[0].gpu_diff;
413
414 // Gradient with respect to weight.
416 {
417 long hBottomData = colBottom[0].gpu_data;
418
419 if (m_bTranspose)
420 m_cuda.gemm(true, false, m_nK, m_nN, m_nM, m_tOne, hBottomData, hTopDiff, m_tOne, m_colBlobs[0].mutable_gpu_diff);
421 else
422 m_cuda.gemm(true, false, m_nN, m_nK, m_nM, m_tOne, hTopDiff, hBottomData, m_tOne, m_colBlobs[0].mutable_gpu_diff);
423 }
424
425 // Gradient with respect to bias.
426 if (m_bBiasTerm && m_rgbParamPropagateDown[1])
427 {
428 if (m_dfBiasGradScale != 1)
429 m_blobBiasMultiplier.scale_data(m_dfBiasGradScale);
430
431 m_cuda.gemv(true, m_nM, m_nN, m_tOne, hTopDiff, m_blobBiasMultiplier.gpu_data, m_tOne, m_colBlobs[1].mutable_gpu_diff);
432
433 if (m_dfBiasGradScale != 1)
434 {
435 double dfUnScale = 1.0 / m_dfBiasGradScale;
436 m_blobBiasMultiplier.scale_data(dfUnScale);
437 m_colBlobs[1].scale_diff(dfUnScale);
438 }
439 }
440
441 // Gradient with respect to bottom data.
442 if (rgbPropagateDown[0])
443 {
444 if (m_bTranspose)
445 m_cuda.gemm(false, true, m_nM, m_nK, m_nN, m_tOne, hTopDiff, m_colBlobs[0].gpu_data, m_tZero, colBottom[0].mutable_gpu_diff);
446 else
447 m_cuda.gemm(false, false, m_nM, m_nK, m_nN, m_tOne, hTopDiff, m_colBlobs[0].gpu_data, m_tZero, colBottom[0].mutable_gpu_diff);
448 }
449
450 if (m_bEnableNoise && m_phase == Phase.TRAIN)
451 {
452 // Gradient with respect to the sigma weight
453 m_cuda.mul(m_colBlobs[2].count(), m_colBlobs[0].gpu_diff, m_blobEpsilonWeight.gpu_data, m_colBlobs[2].mutable_gpu_diff);
454
455 if (m_bBiasTerm)
456 {
457 // Gradient with respect to the sigma bais
458 m_cuda.mul(m_colBlobs[3].count(), m_colBlobs[1].gpu_diff, m_blobEpsilonBias.gpu_data, m_colBlobs[3].mutable_gpu_diff);
459 }
460 }
461 }
462 }
463}
The Log class provides general output in text form.
Definition: Log.cs:13
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
The Utility class provides general utility funtions.
Definition: Utility.cs:35
static List< int > Create(int nCount, int nStart, int nInc)
Create a new List and fill it with values starting with start and incrementing by inc.
Definition: Utility.cs:721
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
Definition: Blob.cs:1922
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442
BLOB_TYPE type
Returns the BLOB_TYPE of the Blob.
Definition: Blob.cs:2761
void scale_data(double df)
Scale the data by a scaling factor.
Definition: Blob.cs:1754
List< int > shape()
Returns an array where each element contains the shape of an axis of the Blob.
Definition: Blob.cs:684
T GetData(int nIdx)
Returns the data at a given flat index within the Blob.
Definition: Blob.cs:1893
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1541
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
Abstract Filler class used to fill blobs with values.
Definition: Filler.cs:19
void Fill(Blob< T > b)
Fill the blob with values based on the actual filler used.
Definition: Filler.cs:50
static Filler< T > Create(CudaDnn< T > cuda, Log log, FillerParameter p)
Create a new Filler instance.
Definition: Filler.cs:79
The InnerProductLayer, also know as a 'fully-connected' layer, computes the inner product with a set ...
override int MaxBottomBlobs
Returns the exact number of required bottom (input) Blobs: input, num_output.
override void dispose()
Releases all GPU and host resources used by the Layer.
override bool ReInitializeParameters(WEIGHT_TARGET target)
Re-initialize the parameters of the layer.
override int MinBottomBlobs
Returns the exact number of required bottom (input) Blobs: input.
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: ip
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
The forward computation.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the inner product loss error gradient w.r.t the outputs.
void ResetNoise()
Resample the noise for both weights and bias (if used).
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
InnerProductLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The InnerProductLayer constructor.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
T m_tZero
Specifies a generic type equal to 0.0.
Definition: Layer.cs:76
T m_tOne
Specifies a generic type equal to 1.0.
Definition: Layer.cs:72
bool shareParameter(Blob< T > b, List< int > rgMinShape, bool bAllowEndsWithComparison=false)
Attempts to share a parameter Blob if another parameter Blob with the same name and accpetable size i...
Definition: Layer.cs:1152
float convertF(T df)
Converts a generic to a float value.
Definition: Layer.cs:1359
BlobCollection< T > m_colInternalBlobs
Specifies internal blobs used by the layer.
Definition: Layer.cs:59
Phase m_phase
Specifies the Phase under which the Layer is run.
Definition: Layer.cs:51
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
BlobCollection< T > m_colBlobs
Specifies the learnable parameter Blobs of the Layer.
Definition: Layer.cs:55
DictionaryMap< bool > m_rgbParamPropagateDown
Specifies whether or not to compute the learnable diff of each parameter Blob.
Definition: Layer.cs:63
Specifies the filler parameters used to create each Filler.
double min
Specifies the minimum value to use with the 'uniform' filler.
double max
Specifies the maximum value to use with the 'uniform' filler.
double sigma_init
Specifies the initialization value for the sigma weight and sigma bias used when 'enable_noise' = tru...
FillerParameter weight_filler
The filler for the weights.
int axis
Specifies the first axis to be lumped into a single inner product computation; all preceding axes are...
bool enable_noise
Enable/disable noise in the inner-product layer (default = false).
double bias_grad_scale
Specifies a scaling value applied to the bias mutliplier and then unapplied after calculating the bia...
int min_top_axes
Optionally, specifies the minimum top axes (default = -1, which ignores this setting).
bool transpose
Specifies whether to transpose the weight matrix or not. If transpose == true, any operations will be...
FillerParameter bias_filler
The filler for the bias.
uint num_output
The number of outputs for the layer.
bool output_contains_predictions
Specifies that the output contains predictions and that the output blob is marked as BLOB_TYPE....
bool bias_term
Whether to have bias terms or not.
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
InnerProductParameter inner_product_param
Returns the parameter set when initialized with LayerType.INNERPRODUCT
LayerType
Specifies the layer type.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
Phase
Defines the Phase under which to run a Net.
Definition: Interfaces.cs:61
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
BLOB_TYPE
Defines the tpe of data held by a given Blob.
Definition: Interfaces.cs:62
WEIGHT_TARGET
Defines the type of weight to target in re-initializations.
Definition: Interfaces.cs:38
The MyCaffe.fillers namespace contains all fillers including the Filler class.
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12