MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
GrnLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using MyCaffe.basecode;
7using MyCaffe.common;
8using MyCaffe.param;
9
10namespace MyCaffe.layers.tft
11{
27 public class GrnLayer<T> : Layer<T>
28 {
29 Layer<T> m_ipSkipLayer = null;
30 Layer<T> m_ipFc1 = null;
31 Layer<T> m_ipContext = null;
32 Layer<T> m_act = null;
33 Layer<T> m_ipFc2 = null;
34 Layer<T> m_dropout = null;
35 Layer<T> m_gate = null;
36 Layer<T> m_layerNorm = null;
37 Blob<T> m_blobResidual = null;
38 Blob<T> m_blobIp1 = null;
39 Blob<T> m_blobContext = null;
40 Blob<T> m_blobContextAdd = null;
41 Blob<T> m_blobIp2 = null;
42 Blob<T> m_blobGate = null;
43 Blob<T> m_blobGatePlusResidual = null;
44 Blob<T> m_blobBtm = null;
45 BlobCollection<T> m_colTop = new BlobCollection<T>();
46 BlobCollection<T> m_colBtm = new BlobCollection<T>();
47
55 : base(cuda, log, p)
56 {
58
59 if (m_param.grn_param.input_dim != m_param.grn_param.output_dim)
60 m_blobResidual = new Blob<T>(cuda, log);
61
62 m_blobIp1 = new Blob<T>(cuda, log);
63 m_blobIp1.Name = p.name + ".ip1";
64 m_blobIp2 = new Blob<T>(cuda, log);
65 m_blobIp2.Name = p.name + ".ip2";
66 m_blobGate = new Blob<T>(cuda, log);
67 m_blobGate.Name = p.name + ".gate";
68 m_blobGatePlusResidual = new Blob<T>(cuda, log);
69 m_blobGatePlusResidual.Name = p.name + ".gate_p_res";
70 m_blobBtm = new Blob<T>(cuda, log);
71 m_blobBtm.Name = p.name + ".btm";
72 }
73
75 protected override void dispose()
76 {
77 dispose(ref m_blobResidual);
78 dispose(ref m_blobIp1);
79 dispose(ref m_blobContext);
80 dispose(ref m_blobContextAdd);
81 dispose(ref m_blobIp2);
82 dispose(ref m_blobGate);
83 dispose(ref m_blobGatePlusResidual);
84 dispose(ref m_blobBtm);
85
86 dispose(ref m_ipSkipLayer);
87 dispose(ref m_ipFc1);
88 dispose(ref m_ipContext);
89 dispose(ref m_act);
90 dispose(ref m_ipFc2);
91 dispose(ref m_dropout);
92 dispose(ref m_gate);
93 dispose(ref m_layerNorm);
94 }
95
97 protected override void setup_internal_blobs(BlobCollection<T> col)
98 {
99 if (col.Count > 0)
100 return;
101
102 if (m_blobContext != null)
103 col.Add(m_blobContext);
104 if (m_blobContextAdd != null)
105 col.Add(m_blobContextAdd);
106 if (m_blobResidual != null)
107 col.Add(m_blobResidual);
108 col.Add(m_blobIp1);
109 col.Add(m_blobIp2);
110 col.Add(m_blobGate);
111 col.Add(m_blobGatePlusResidual);
112 }
113
117 public override int MinBottomBlobs
118 {
119 get { return 1; }
120 }
121
125 public override int MaxBottomBlobs
126 {
127 get { return 2; }
128 }
129
133 public override int ExactNumTopBlobs
134 {
135 get { return 1; }
136 }
137
138 private void addBtmTop(Blob<T> btm, Blob<T> top)
139 {
140 m_colBtm.Clear();
141 m_colBtm.Add(btm);
142 m_colTop.Clear();
143 m_colTop.Add(top);
144 }
145
151 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
152 {
153 //-------------------------------------------------------
154 // Input conditioning components (Eq.4 in original paper)
155 //-------------------------------------------------------
156
157 // For a direct residual connection, the dimension of the input must match the dimension of the output,
158 // otherwise we need to project the input for creating the residual connection.
159 if (m_param.grn_param.input_dim != m_param.grn_param.output_dim)
160 {
161 if (m_ipSkipLayer == null)
162 {
163 LayerParameter ip = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT, m_param.name + ".skip");
164 ip.inner_product_param.num_output = (uint)m_param.grn_param.output_dim;
168 m_ipSkipLayer = Layer<T>.Create(m_cuda, m_log, convertLayerParam(ip, m_param), null);
169
170 addBtmTop(colBottom[0], m_blobResidual);
171 m_ipSkipLayer.Setup(m_colBtm, m_colTop);
172 blobs.Add(m_ipSkipLayer.blobs);
173 }
174 }
175
176 // Create the linear layer for projecting the primary input (across time if necessary)
177 if (m_ipFc1 == null)
178 {
179 LayerParameter ip1 = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT, m_param.name + ".fc1");
180 ip1.inner_product_param.num_output = (uint)m_param.grn_param.hidden_dim;
184 m_ipFc1 = Layer<T>.Create(m_cuda, m_log, convertLayerParam(ip1, m_param), null);
185
186 addBtmTop(colBottom[0], m_blobIp1);
187 m_ipFc1.Setup(m_colBtm, m_colTop);
188 blobs.Add(m_ipFc1.blobs);
189 }
190 Blob<T> blobIp1 = m_blobIp1;
191
192 // If a context input exists, project the context as well.
193 if (colBottom.Count > 1)
194 {
195 if (m_ipContext == null)
196 {
197 LayerParameter ip = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT, m_param.name + ".context");
198 ip.inner_product_param.num_output = (uint)m_param.grn_param.hidden_dim;
202 m_ipContext = Layer<T>.Create(m_cuda, m_log, convertLayerParam(ip, m_param), null);
203 m_blobContext = new Blob<T>(m_cuda, m_log);
204 m_blobContext.Name = m_param.name + ".ctx";
205 m_blobContextAdd = new Blob<T>(m_cuda, m_log);
206 m_blobContextAdd.Name = m_param.name + ".ctx_add";
207
208 addBtmTop(colBottom[1], m_blobContext);
209 m_ipContext.Setup(m_colBtm, m_colTop);
210 blobs.Add(m_ipContext.blobs);
211
212 m_cuda.add(m_blobContext.count(), m_blobContext.gpu_data, m_blobIp1.gpu_data, m_blobContext.mutable_gpu_data);
213 }
214 blobIp1 = m_blobContext;
215 }
216
217 // non-linear activation function applied to the sum of projections.
218 if (m_act == null)
219 {
220 if (m_param.grn_param.activation == param.tft.GrnParameter.ACTIVATION.RELU)
221 {
223 m_act = Layer<T>.Create(m_cuda, m_log, convertLayerParam(act, m_param), null);
224 }
225 else
226 {
229 act.elu_param.alpha = 1.0;
230 m_act = Layer<T>.Create(m_cuda, m_log, convertLayerParam(act, m_param), null);
231 }
232
233 addBtmTop(blobIp1, blobIp1);
234 m_act.Setup(m_colBtm, m_colTop);
235 }
236
237 //-------------------------------------------------------
238 // Further projection components (Eq.3 in original paper)
239 //-------------------------------------------------------
240
241 // Create the linear layer for projecting top of the activation function
242 if (m_ipFc2 == null)
243 {
244 LayerParameter ip2 = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT, m_param.name + ".fc2");
245 ip2.inner_product_param.num_output = (uint)m_param.grn_param.output_dim;
249 m_ipFc2 = Layer<T>.Create(m_cuda, m_log, convertLayerParam(ip2, m_param), null);
250
251 addBtmTop(blobIp1, m_blobIp2);
252 m_ipFc2.Setup(m_colBtm, m_colTop);
253 blobs.Add(m_ipFc2.blobs);
254 }
255
256 //-------------------------------------------------------
257 // Output gating components (Eq.2 in original paper)
258 //-------------------------------------------------------
259
260 if (m_param.grn_param.dropout_ratio > 0)
261 {
262 if (m_dropout == null)
263 {
264 LayerParameter drop = new LayerParameter(LayerParameter.LayerType.DROPOUT, m_param.name + ".drop");
265 drop.dropout_param.dropout_ratio = m_param.grn_param.dropout_ratio;
266 m_dropout = Layer<T>.Create(m_cuda, m_log, convertLayerParam(drop, m_param), null);
267
268 addBtmTop(m_blobIp2, m_blobIp2);
269 m_dropout.Setup(m_colBtm, m_colTop);
270 }
271 }
272
273 if (m_gate == null)
274 {
276 gate.glu_param.input_dim = m_param.grn_param.output_dim;
277 gate.glu_param.axis = m_param.grn_param.axis;
278 gate.glu_param.weight_filler = m_param.grn_param.weight_filler;
279 gate.glu_param.bias_filler = m_param.grn_param.bias_filler;
280 m_gate = Layer<T>.Create(m_cuda, m_log, convertLayerParam(gate, m_param), null);
281
282 addBtmTop(m_blobIp2, m_blobGate);
283 m_gate.Setup(m_colBtm, m_colTop);
284 blobs.Add(m_gate.blobs);
285 }
286
287 if (m_layerNorm == null)
288 {
289 LayerParameter layerNorm = new LayerParameter(LayerParameter.LayerType.LAYERNORM, m_param.name + ".layernorm");
290 layerNorm.layer_norm_param.epsilon = 1e-10;
291 m_layerNorm = Layer<T>.Create(m_cuda, m_log, convertLayerParam(layerNorm, m_param), null);
292
293 addBtmTop(m_blobGate, colTop[0]);
294 m_layerNorm.Setup(m_colBtm, m_colTop);
295 }
296
298 }
299
305 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
306 {
307 if (m_ipSkipLayer != null)
308 {
309 addBtmTop(colBottom[0], m_blobResidual);
310 m_ipSkipLayer.Reshape(m_colBtm, m_colTop);
311 }
312
313 addBtmTop(colBottom[0], m_blobIp1);
314 m_ipFc1.Reshape(m_colBtm, m_colTop);
315 Blob<T> blobIp1 = m_blobIp1;
316
317 if (colBottom.Count > 1)
318 {
319 addBtmTop(colBottom[1], m_blobContext);
320 m_ipContext.Reshape(m_colBtm, m_colTop);
321 m_blobContextAdd.ReshapeLike(m_blobContext);
322 blobIp1 = m_blobContext;
323 }
324
325 addBtmTop(blobIp1, blobIp1);
326 m_act.Reshape(m_colBtm, m_colTop);
327
328 addBtmTop(blobIp1, m_blobIp2);
329 m_ipFc2.Reshape(m_colBtm, m_colTop);
330
331 if (m_dropout != null)
332 {
333 addBtmTop(m_blobIp2, m_blobIp2);
334 m_dropout.Reshape(m_colBtm, m_colTop);
335 }
336
337 addBtmTop(m_blobIp2, m_blobGate);
338 m_gate.Reshape(m_colBtm, m_colTop);
339
340 m_blobGatePlusResidual.ReshapeLike(m_blobGate);
341
342 addBtmTop(m_blobGate, colTop[0]);
343 m_layerNorm.Reshape(m_colBtm, m_colTop);
344
345 m_blobBtm.ReshapeLike(colBottom[0]);
346 }
347
359 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
360 {
361 Blob<T> blobResidual = colBottom[0];
362
363 if (m_ipSkipLayer != null)
364 {
365 addBtmTop(colBottom[0], m_blobResidual);
366 m_ipSkipLayer.Forward(m_colBtm, m_colTop);
367 blobResidual = m_blobResidual;
368 }
369
370 addBtmTop(colBottom[0], m_blobIp1);
371 m_ipFc1.Forward(m_colBtm, m_colTop);
372 Blob<T> blobIp1 = m_blobIp1;
373
374 if (colBottom.Count > 1)
375 {
376 addBtmTop(colBottom[1], m_blobContext);
377 m_ipContext.Forward(m_colBtm, m_colTop);
378
379 m_cuda.add(m_blobContext.count(), m_blobIp1.gpu_data, m_blobContext.gpu_data, m_blobContextAdd.gpu_data);
380 blobIp1 = m_blobContextAdd;
381 }
382
383 // act
384 addBtmTop(blobIp1, blobIp1);
385 m_act.Forward(m_colBtm, m_colTop);
386
387 // Fc2
388 addBtmTop(blobIp1, m_blobIp2);
389 m_ipFc2.Forward(m_colBtm, m_colTop);
390
391 // dropout
392 if (m_dropout != null)
393 {
394 addBtmTop(m_blobIp2, m_blobIp2);
395 m_dropout.Forward(m_colBtm, m_colTop);
396 }
397
398 // gate
399 addBtmTop(m_blobIp2, m_blobGate);
400 m_gate.Forward(m_colBtm, m_colTop);
401
402 // add residual
403 m_cuda.add(m_blobGatePlusResidual.count(), m_blobGate.gpu_data, blobResidual.gpu_data, m_blobGatePlusResidual.mutable_gpu_data);
404
405 // layernorm
406 addBtmTop(m_blobGatePlusResidual, colTop[0]);
407 m_layerNorm.Forward(m_colBtm, m_colTop);
408 }
409
424 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
425 {
426 // layernorm
427 addBtmTop(m_blobGatePlusResidual, colTop[0]);
428 m_layerNorm.Backward(m_colTop, rgbPropagateDown, m_colBtm);
429
430 // add residual
431 if (m_ipSkipLayer != null)
432 m_blobResidual.CopyFrom(m_blobGatePlusResidual, true);
433 else
434 colBottom[0].CopyFrom(m_blobGatePlusResidual, true, false, 0, true);
435
436 m_blobGate.CopyFrom(m_blobGatePlusResidual, true, false, 0, true);
437
438 // gate
439 addBtmTop(m_blobIp2, m_blobGate);
440 m_gate.Backward(m_colTop, rgbPropagateDown, m_colBtm);
441
442 // dropout
443 if (m_dropout != null)
444 {
445 addBtmTop(m_blobIp2, m_blobIp2);
446 m_dropout.Backward(m_colTop, rgbPropagateDown, m_colBtm);
447 }
448
449 Blob<T> blobIp1 = m_blobIp1;
450 if (colBottom.Count > 1)
451 blobIp1 = m_blobContextAdd;
452
453 // Fc2
454 addBtmTop(blobIp1, m_blobIp2);
455 m_ipFc2.Backward(m_colTop, rgbPropagateDown, m_colBtm);
456
457 // act
458 addBtmTop(blobIp1, blobIp1);
459 m_act.Backward(m_colTop, rgbPropagateDown, m_colBtm);
460
461 if (colBottom.Count > 1)
462 {
463 m_blobContext.CopyFrom(m_blobContextAdd, true);
464 m_blobIp1.CopyFrom(m_blobContextAdd, true);
465
466 addBtmTop(colBottom[1], m_blobContext);
467 m_ipContext.Backward(m_colTop, rgbPropagateDown, m_colBtm);
468 }
469
470 m_blobBtm.CopyFrom(colBottom[0]);
471 addBtmTop(m_blobBtm, m_blobIp1);
472 m_ipFc1.Backward(m_colTop, rgbPropagateDown, m_colBtm);
473
474 m_cuda.add(colBottom[0].count(), colBottom[0].gpu_diff, m_blobBtm.gpu_diff, colBottom[0].mutable_gpu_diff);
475
476 if (m_ipSkipLayer != null)
477 {
478 addBtmTop(m_blobBtm, m_blobResidual);
479 m_ipSkipLayer.Backward(m_colTop, rgbPropagateDown, m_colBtm);
480 m_cuda.add(colBottom[0].count(), colBottom[0].gpu_diff, m_blobBtm.gpu_diff, colBottom[0].mutable_gpu_diff);
481 }
482 }
483 }
484}
The Log class provides general output in text form.
Definition: Log.cs:13
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void Clear(bool bDispose=false)
Remove all items from the collection.
void CopyFrom(BlobCollection< T > bSrc, bool bCopyDiff=false)
Copy the data or diff from another BlobCollection into this one.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
Definition: Blob.cs:903
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1541
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
void Backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Given the top Blob error gradients, compute the bottom Blob error gradients.
Definition: Layer.cs:815
double Forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Given the bottom (input) Blobs, this function computes the top (output) Blobs and the loss.
Definition: Layer.cs:728
abstract void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Adjust the shapes of top blobs and internal buffers to accomodate the shapes of the bottom blobs.
BlobCollection< T > m_colInternalBlobs
Specifies internal blobs used by the layer.
Definition: Layer.cs:59
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
void Setup(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Implements common Layer setup functionality.
Definition: Layer.cs:439
static Layer< T > Create(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel, IXDatabaseBase db=null, TransferInput trxinput=null)
Create a new Layer based on the LayerParameter.
Definition: Layer.cs:1468
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
BlobCollection< T > blobs
Returns the collection of learnable parameter Blobs for the Layer.
Definition: Layer.cs:875
LayerParameter convertLayerParam(LayerParameter pChild, LayerParameter pParent)
Called to convert a parent LayerParameterEx, used in blob sharing, with a child layer parameter.
Definition: Layer.cs:1134
The GrnLayer implements the Gated Linear Unit layer.
Definition: GrnLayer.cs:28
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
Definition: GrnLayer.cs:151
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: y
Definition: GrnLayer.cs:134
override int MinBottomBlobs
Returns the min number of required bottom (input) Blobs: x
Definition: GrnLayer.cs:118
override int MaxBottomBlobs
Returns the max number of required bottom (input) Blobs: x, context
Definition: GrnLayer.cs:126
GrnLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The constructor.
Definition: GrnLayer.cs:54
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t. the stacked embedding numeric and categorical value inputs.
Definition: GrnLayer.cs:424
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the top (output) blobs.
Definition: GrnLayer.cs:305
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
Definition: GrnLayer.cs:97
override void dispose()
Releases all GPU and host resources used by the Layer.
Definition: GrnLayer.cs:75
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation
Definition: GrnLayer.cs:359
double dropout_ratio
Specifies the dropout ratio. (e.g. the probability that values will be dropped out and set to zero....
double alpha
Described in Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs) by Clevert,...
Definition: EluParameter.cs:64
Specifies whether to use the NVIDIA cuDnn version or Caffe version of a given forward/backward operat...
Engine engine
Specifies the Engine in use.
Engine
Defines the type of engine to use.
FillerParameter weight_filler
The filler for the weights.
int axis
Specifies the first axis to be lumped into a single inner product computation; all preceding axes are...
FillerParameter bias_filler
The filler for the bias.
uint num_output
The number of outputs for the layer.
bool bias_term
Whether to have bias terms or not.
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
LayerNormParameter layer_norm_param
Returns the parameter set when initialized with LayerType.LAYERNORM
GluParameter glu_param
Returns the parameter set when initialized with LayerType.GLU
GrnParameter grn_param
Returns the parameter set when initialized with LayerType.GLU
EluParameter elu_param
Returns the parameter set when initialized with LayerType.ELU
InnerProductParameter inner_product_param
Returns the parameter set when initialized with LayerType.INNERPRODUCT
LayerType
Specifies the layer type.
DropoutParameter dropout_param
Returns the parameter set when initialized with LayerType.DROPOUT
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe.layers.tft namespace contains all TFT related layers.
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12