MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
CfcUnitLayer.cs
1using System;
2using System.Collections.Concurrent;
3using System.Collections.Generic;
4using System.Diagnostics;
5using System.Linq;
6using System.Text;
7using MyCaffe.basecode;
8using MyCaffe.common;
9using MyCaffe.param;
10using MyCaffe.param.lnn;
11
12namespace MyCaffe.layers.lnn
13{
23 public class CfcUnitLayer<T> : LnnUnitLayer<T>
24 {
25 Layer<T> m_cat;
26 Layer<T>[] m_rgLinearLayers = null;
27 Layer<T>[] m_rgActivationLayers = null;
28 Layer<T>[] m_rgDropoutLayers = null;
29 BlobCollection<T> m_rgLinearBtms = new BlobCollection<T>();
30 BlobCollection<T> m_rgLinearTops = new BlobCollection<T>();
31 BlobCollection<T> m_rgActivationBtms = new BlobCollection<T>();
32 BlobCollection<T> m_rgActivationTops = new BlobCollection<T>();
33 BlobCollection<T> m_colTop = new BlobCollection<T>();
34 BlobCollection<T> m_colBtm = new BlobCollection<T>();
35 Layer<T> m_tanh;
36 Layer<T> m_sigmoid;
37 Layer<T> m_ff1;
38 Layer<T> m_ff2;
39 Layer<T> m_timeA;
40 Layer<T> m_timeB;
41 Blob<T> m_blobFF1;
42 Blob<T> m_blobFF2;
43 Blob<T> m_blobTimeA;
44 Blob<T> m_blobTimeB;
45 Blob<T> m_blobTInterp;
46 Blob<T> m_blobTInterp1;
47 Blob<T> m_blobTInterpInv;
48 Blob<T> m_blobTInterpOnes;
49 Blob<T> m_blobTs;
50 Blob<T> m_blobX;
51 Blob<T> m_blobTop1;
52 Blob<T> m_blobTop2;
53 int m_nNumLayers;
54 int m_nNumUnits;
55
63 : base(cuda, log, p)
64 {
66
69
70 if (m_nNumLayers < 1)
71 m_nNumLayers = 1;
72
73 LayerParameter concat = new LayerParameter(LayerParameter.LayerType.CONCAT, "concat");
74 concat.concat_param.axis = 1;
75 m_cat = Layer<T>.Create(m_cuda, m_log, convertLayerParam(concat, p), null);
76
77 Blob<T> blobBtm = new Blob<T>(m_cuda, m_log);
78 blobBtm.Name = "bb";
79
80 for (int i = 0; i < m_nNumLayers; i++)
81 {
82 // Linear Layer
83 m_rgLinearBtms.Add(blobBtm);
84
85 Blob<T> blobTop = new Blob<T>(m_cuda, m_log);
86 blobTop.Name = "bb_" + i.ToString();
87
88 m_rgLinearTops.Add(blobTop);
89
90 // Activation Layer
91 blobBtm = blobTop;
92 m_rgActivationBtms.Add(blobBtm);
93
94 blobTop = new Blob<T>(m_cuda, m_log);
95 blobTop.Name = "bb_act_" + i.ToString();
96
97 m_rgActivationTops.Add(blobTop);
98 blobBtm = blobTop;
99 }
100
101 m_rgLinearLayers = new Layer<T>[m_nNumLayers];
102 m_rgActivationLayers = new Layer<T>[m_nNumLayers];
103
105 m_rgDropoutLayers = new Layer<T>[m_nNumLayers];
106
107 for (int i = 0; i < m_nNumLayers; i++)
108 {
109 LayerParameter ip = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT, "bb_" + i.ToString());
110 ip.inner_product_param.num_output = (uint)m_nNumUnits;
112 ip.inner_product_param.weight_filler = new FillerParameter("xavier", 0.0, 0.01);
113 ip.inner_product_param.bias_filler = new FillerParameter("constant", 0.1);
114 m_rgLinearLayers[i] = Layer<T>.Create(m_cuda, m_log, convertLayerParam(ip, p), null);
115
116 LayerParameter act;
118 {
120 act = new LayerParameter(LayerParameter.LayerType.SILU, "bb_act_" + i.ToString());
121 break;
122
124 act = new LayerParameter(LayerParameter.LayerType.RELU, "bb_act_" + i.ToString());
125 break;
126
128 act = new LayerParameter(LayerParameter.LayerType.TANH, "bb_act_" + i.ToString());
129 break;
130
132 act = new LayerParameter(LayerParameter.LayerType.GELU, "bb_act_" + i.ToString());
133 break;
134
135 case CfcUnitParameter.ACTIVATION.LECUN:
136 act = new LayerParameter(LayerParameter.LayerType.LECUN, "bb_act_" + i.ToString());
137 break;
138
139 default:
140 throw new Exception("Unknown activation type: " + m_param.cfc_unit_param.backbone_activation.ToString());
141 }
142
143 m_rgActivationLayers[i] = Layer<T>.Create(m_cuda, m_log, convertLayerParam(act, p), null);
144
145 if (i > 0 && m_rgDropoutLayers != null)
146 {
147 LayerParameter drop = new LayerParameter(LayerParameter.LayerType.DROPOUT, "bb_drop_" + i.ToString());
149
150 m_rgDropoutLayers[i] = Layer<T>.Create(m_cuda, m_log, convertLayerParam(drop, p), null);
151 }
152 }
153
154 m_blobX = new Blob<T>(m_cuda, m_log);
155 m_blobX.Name = "x";
156
157 // FF1 Layer
158 LayerParameter ff1 = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT, "ff1");
161 ff1.inner_product_param.weight_filler = new FillerParameter("xavier", 0.0, 0.01);
162 ff1.inner_product_param.bias_filler = new FillerParameter("constant", 0.1);
163 m_ff1 = Layer<T>.Create(m_cuda, m_log, convertLayerParam(ff1, p), null);
164
165 m_blobFF1 = new Blob<T>(m_cuda, m_log);
166 m_blobFF1.Name = "ff1";
167
168 // Tanh Layer
169 LayerParameter tanh = new LayerParameter(LayerParameter.LayerType.TANH, "tanh");
170 m_tanh = Layer<T>.Create(m_cuda, m_log, convertLayerParam(tanh, p), null);
171
172 // FF2 Layer
173 LayerParameter ff2 = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT, "ff2");
176 ff2.inner_product_param.weight_filler = new FillerParameter("xavier", 0.0, 0.01);
177 ff2.inner_product_param.bias_filler = new FillerParameter("constant", 0.1);
178 m_ff2 = Layer<T>.Create(m_cuda, m_log, convertLayerParam(ff2, p), null);
179
180 m_blobFF2 = new Blob<T>(m_cuda, m_log);
181 m_blobFF2.Name = "ff2";
182
183 // Time A Layer
184 LayerParameter timeA = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT, "time_a");
186 timeA.inner_product_param.bias_term = true;
187 timeA.inner_product_param.weight_filler = new FillerParameter("xavier", 0.0, 0.01);
188 timeA.inner_product_param.bias_filler = new FillerParameter("constant", 0.1);
189 m_timeA = Layer<T>.Create(m_cuda, m_log, convertLayerParam(timeA, p), null);
190
191 m_blobTimeA = new Blob<T>(m_cuda, m_log);
192 m_blobTimeA.Name = "time_a";
193
194 // Time B Layer
195 LayerParameter timeB = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT, "time_b");
197 timeB.inner_product_param.bias_term = true;
198 timeB.inner_product_param.weight_filler = new FillerParameter("xavier", 0.0, 0.01);
199 timeB.inner_product_param.bias_filler = new FillerParameter("constant", 0.1);
200 m_timeB = Layer<T>.Create(m_cuda, m_log, convertLayerParam(timeB, p), null);
201
202 m_blobTimeB = new Blob<T>(m_cuda, m_log);
203 m_blobTimeB.Name = "time_b";
204
205 // Sigmoid Layer
206 LayerParameter sigmoid = new LayerParameter(LayerParameter.LayerType.SIGMOID, "sigmoid");
207 m_sigmoid = Layer<T>.Create(m_cuda, m_log, convertLayerParam(sigmoid, p), null);
208
209 // T-Interp
210 m_blobTInterp = new Blob<T>(m_cuda, m_log);
211 m_blobTInterp.Name = "t-interp";
212
213 m_blobTInterpInv = new Blob<T>(m_cuda, m_log);
214 m_blobTInterpInv.Name = "t-interpinv";
215
216 m_blobTInterp1 = new Blob<T>(m_cuda, m_log);
217 m_blobTInterp1.Name = "t-interp1";
218 m_blobTInterpOnes = new Blob<T>(m_cuda, m_log, true);
219 m_blobTInterpOnes.Name = "t_interp_ones";
220
221 m_blobTs = new Blob<T>(m_cuda, m_log);
222 m_blobTs.Name = "ts";
223
224 m_blobTop1 = new Blob<T>(m_cuda, m_log);
225 m_blobTop1.Name = "top1";
226 m_blobTop2 = new Blob<T>(m_cuda, m_log);
227 m_blobTop2.Name = "top2";
228 }
229
231 protected override void dispose()
232 {
233 base.dispose();
234
235 if (m_rgLinearLayers != null)
236 {
237 for (int i = 0; i < m_rgLinearLayers.Length; i++)
238 {
239 m_rgLinearLayers[i].Dispose();
240 }
241 m_rgLinearLayers = null;
242 }
243
245 dispose_internal_blobs();
246 else
247 clear_internal_blobs();
248
249 dispose(ref m_blobTInterpOnes);
250
251 dispose(ref m_cat);
252 dispose(ref m_tanh);
253 dispose(ref m_sigmoid);
254 dispose(ref m_ff1);
255 dispose(ref m_ff2);
256 dispose(ref m_timeA);
257 dispose(ref m_timeB);
258 }
259
260 private void dispose_internal_blobs(bool bSetToNull = true)
261 {
262 dispose(ref m_rgLinearBtms, bSetToNull);
263 dispose(ref m_rgLinearTops, bSetToNull);
264 dispose(ref m_rgActivationBtms, bSetToNull);
265 dispose(ref m_rgActivationTops, bSetToNull);
266
267 dispose(ref m_blobFF1);
268 dispose(ref m_blobFF2);
269 dispose(ref m_blobTimeA);
270 dispose(ref m_blobTimeB);
271 dispose(ref m_blobTInterp);
272 dispose(ref m_blobTInterp1);
273 dispose(ref m_blobTInterpInv);
274 dispose(ref m_blobTs);
275 dispose(ref m_blobX);
276 dispose(ref m_blobTop1);
277 dispose(ref m_blobTop2);
278 }
279
280 private void clear_internal_blobs()
281 {
282 m_rgLinearBtms.Clear();
283 m_rgLinearTops.Clear();
284 m_rgActivationBtms.Clear();
285 m_rgActivationTops.Clear();
286 }
287
298 public override BlobCollection<T> CreateInternalSharedBlobs(int nIdx, CudaDnn<T> cuda, Log log)
299 {
301
302 dispose_internal_blobs(false);
303
304 Blob<T> blobFF1 = new Blob<T>(cuda, log);
305 blobFF1.Name = "ff1_" + nIdx.ToString();
306 col.Add(blobFF1);
307
308 Blob<T> blobFF2 = new Blob<T>(cuda, log);
309 blobFF2.Name = "ff2_" + nIdx.ToString();
310 col.Add(blobFF2);
311
312 Blob<T> blobTimeA = new Blob<T>(cuda, log);
313 blobTimeA.Name = "timeA_" + nIdx.ToString();
314 col.Add(blobTimeA);
315
316 Blob<T> blobTimeB = new Blob<T>(cuda, log);
317 blobTimeB.Name = "timeB_" + nIdx.ToString();
318 col.Add(blobTimeB);
319
320 Blob<T> blobTInterp = new Blob<T>(cuda, log);
321 blobTInterp.Name = "tInterp_" + nIdx.ToString();
322 col.Add(blobTInterp);
323
324 Blob<T> blobTInterp1 = new Blob<T>(cuda, log);
325 blobTInterp1.Name = "tInterp1_" + nIdx.ToString();
326 col.Add(blobTInterp1);
327
328 Blob<T> blobTInterpInv = new Blob<T>(cuda, log);
329 blobTInterpInv.Name = "tInterpInv_" + nIdx.ToString();
330 col.Add(blobTInterpInv);
331
332 Blob<T> blobTs = new Blob<T>(cuda, log);
333 blobTs.Name = "ts_" + nIdx.ToString();
334 col.Add(blobTs);
335
336 Blob<T> blobX = new Blob<T>(cuda, log);
337 blobX.Name = "x_" + nIdx.ToString();
338 col.Add(blobX);
339
340 Blob<T> blobTop1 = new Blob<T>(cuda, log);
341 blobTop1.Name = "top1_" + nIdx.ToString();
342 col.Add(blobTop1);
343
344 Blob<T> blobTop2 = new Blob<T>(cuda, log);
345 blobTop2.Name = "top2_" + nIdx.ToString();
346 col.Add(blobTop2);
347
348 Blob<T> blobBb = new Blob<T>(cuda, log);
349 blobBb.Name = "bb_" + nIdx.ToString();
350 col.Add(blobBb);
351
352 for (int i = 0; i < m_param.cfc_unit_param.backbone_layers; i++)
353 {
354 Blob<T> blobFc = new Blob<T>(cuda, log);
355 blobFc.Name = "bb_fc" + (i + 1).ToString() + "_" + nIdx.ToString();
356 col.Add(blobFc);
357
358 Blob<T> blobAct = new Blob<T>(cuda, log);
359 blobAct.Name = "bb_act" + (i + 1).ToString() + "_" + nIdx.ToString();
360 col.Add(blobAct);
361 }
362
363 return col;
364 }
365
371 {
372 int nIdx = 0;
373
374 m_bOwnInternalBlobs = false;
375 m_blobFF1 = col[nIdx];
376 nIdx++;
377
378 m_blobFF2 = col[nIdx];
379 nIdx++;
380
381 m_blobTimeA = col[nIdx];
382 nIdx++;
383
384 m_blobTimeB = col[nIdx];
385 nIdx++;
386
387 m_blobTInterp = col[nIdx];
388 nIdx++;
389
390 m_blobTInterp1 = col[nIdx];
391 nIdx++;
392
393 m_blobTInterpInv = col[nIdx];
394 nIdx++;
395
396 m_blobTs = col[nIdx];
397 nIdx++;
398
399 m_blobX = col[nIdx];
400 nIdx++;
401
402 m_blobTop1 = col[nIdx];
403 nIdx++;
404
405 m_blobTop2 = col[nIdx];
406 nIdx++;
407
409 while (nIdx < col.Count)
410 {
411 colLin.Add(col[nIdx]);
412 nIdx++;
413 }
414
415 m_rgLinearBtms.Clear();
416 m_rgLinearTops.Clear();
417 m_rgActivationBtms.Clear();
418 m_rgActivationTops.Clear();
419
420 nIdx = 0;
421 for (int i=0; i<m_param.cfc_unit_param.backbone_layers; i++)
422 {
423 m_rgLinearBtms.Add(colLin[nIdx]);
424 nIdx++;
425 m_rgLinearTops.Add(colLin[nIdx]);
426 m_rgActivationBtms.Add(colLin[nIdx]);
427 nIdx++;
428 m_rgActivationTops.Add(colLin[nIdx]);
429 }
430
431 colLin.Clear();
432 }
433
434 private void addBtmTop(Blob<T> btm, Blob<T> top)
435 {
436 m_colBtm.Clear();
437 m_colBtm.Add(btm);
438 m_colTop.Clear();
439 m_colTop.Add(top);
440 }
441
443 protected override void setup_internal_blobs(BlobCollection<T> col)
444 {
445 if (col.Count > 0)
446 return;
447 }
448
452 public override int ExactNumBottomBlobs
453 {
454 get { return 3; }
455 }
456
460 public override int ExactNumTopBlobs
461 {
462 get { return 1; }
463 }
464
470 public override bool ReInitializeParameters(WEIGHT_TARGET target)
471 {
472 base.ReInitializeParameters(target);
473 return true;
474 }
475
481 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
482 {
483 addBtmTop(colBottom[0], m_rgLinearBtms[0]);
484 m_colBtm.Add(colBottom[1]);
485 m_cat.Setup(m_colBtm, m_colTop);
486
487 for (int i = 0; i < m_nNumLayers; i++)
488 {
489 addBtmTop(m_rgLinearBtms[i], m_rgLinearTops[i]);
490 m_rgLinearLayers[i].Setup(m_colBtm, m_colTop);
491 blobs.Add(m_rgLinearLayers[i].blobs);
492
493 addBtmTop(m_rgActivationBtms[i], m_rgActivationTops[i]);
494 m_rgActivationLayers[i].Setup(m_colBtm, m_colTop);
495
496 if (i > 0 && m_rgDropoutLayers != null)
497 m_rgDropoutLayers[i].Setup(m_colBtm, m_colTop);
498 }
499
500 Blob<T> blobX = m_rgActivationTops[m_nNumLayers - 1];
501 m_blobX.ReshapeLike(blobX);
502
503 // FF1 Layer
504 addBtmTop(blobX, m_blobFF1);
505 m_ff1.Setup(m_colBtm, m_colTop);
506 blobs.Add(m_ff1.blobs);
507
508 // Tanh Layer
509 addBtmTop(m_blobFF1, m_blobFF1);
510 m_tanh.Setup(m_colBtm, m_colTop);
511
512 // FF2 Layer
513 addBtmTop(blobX, m_blobFF2);
514 m_ff2.Setup(m_colBtm, m_colTop);
515 blobs.Add(m_ff2.blobs);
516
517 addBtmTop(m_blobFF2, m_blobFF2);
518 m_tanh.Setup(m_colBtm, m_colTop);
519
520 // Time A Layer
521 addBtmTop(blobX, m_blobTimeA);
522 m_timeA.Setup(m_colBtm, m_colTop);
523 blobs.Add(m_timeA.blobs);
524
525 // Time B Layer
526 addBtmTop(blobX, m_blobTimeB);
527 m_timeB.Setup(m_colBtm, m_colTop);
528 blobs.Add(m_timeB.blobs);
529
530 // T-Interp
531 m_blobTInterp.ReshapeLike(m_blobTimeA);
532 m_blobTInterpInv.ReshapeLike(m_blobTimeA);
533 m_blobTInterp1.ReshapeLike(m_blobTimeA);
534 m_blobTInterpOnes.ReshapeLike(m_blobTimeA);
535
536 addBtmTop(m_blobTInterp, colTop[0]);
537 m_sigmoid.Setup(m_colBtm, m_colTop);
538
539 m_blobTs.ReshapeLike(m_blobTimeA);
540 m_blobTop1.ReshapeLike(colTop[0]);
541 m_blobTop2.ReshapeLike(colTop[0]);
542 }
543
549 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
550 {
551 addBtmTop(colBottom[0], m_rgLinearBtms[0]);
552 m_colBtm.Add(colBottom[1]);
553 m_cat.Reshape(m_colBtm, m_colTop);
554
555 for (int i = 0; i < m_rgLinearLayers.Length; i++)
556 {
557 addBtmTop(m_rgLinearBtms[i], m_rgLinearTops[i]);
558 m_rgLinearLayers[i].Reshape(m_colBtm, m_colTop);
559
560 addBtmTop(m_rgActivationBtms[i], m_rgActivationTops[i]);
561 m_rgActivationLayers[i].Reshape(m_colBtm, m_colTop);
562
563 if (m_rgDropoutLayers != null)
564 m_rgDropoutLayers[i].Reshape(m_colBtm, m_colTop);
565 }
566
567 Blob<T> blobX = m_rgActivationTops[m_rgLinearLayers.Length - 1];
568 m_blobX.ReshapeLike(blobX);
569
570 // FF1 Layer
571 addBtmTop(blobX, m_blobFF1);
572 m_ff1.Reshape(m_colBtm, m_colTop);
573
574 // Tanh Layer
575 addBtmTop(m_blobFF1, m_blobFF1);
576 m_tanh.Reshape(m_colBtm, m_colTop);
577
578 // FF2 Layer
579 addBtmTop(blobX, m_blobFF2);
580 m_ff2.Reshape(m_colBtm, m_colTop);
581
582 addBtmTop(m_blobFF2, m_blobFF2);
583 m_tanh.Reshape(m_colBtm, m_colTop);
584
585 // Time A Layer
586 addBtmTop(blobX, m_blobTimeA);
587 m_timeA.Reshape(m_colBtm, m_colTop);
588 m_blobTs.ReshapeLike(m_blobTimeA);
589
590 m_blobTInterp.ReshapeLike(m_blobTimeA);
591 m_blobTInterpInv.ReshapeLike(m_blobTimeA);
592 m_blobTInterp1.ReshapeLike(m_blobTimeA);
593 m_blobTInterpOnes.ReshapeLike(m_blobTimeA);
594 m_blobTInterpOnes.SetData(1.0);
595
596 // Time B Layer
597 addBtmTop(blobX, m_blobTimeB);
598 m_timeB.Reshape(m_colBtm, m_colTop);
599
600 // Sigmoid Layer
601 addBtmTop(m_blobTInterp, colTop[0]);
602 m_sigmoid.Reshape(m_colBtm, m_colTop);
603
604 m_blobTop1.ReshapeLike(colTop[0]);
605 m_blobTop2.ReshapeLike(colTop[0]);
606 }
607
621 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
622 {
623 addBtmTop(colBottom[0], m_rgLinearBtms[0]);
624 m_colBtm.Add(colBottom[1]);
625 m_cat.Forward(m_colBtm, m_colTop);
626
627 for (int i = 0; i < m_rgLinearLayers.Length; i++)
628 {
629 addBtmTop(m_rgLinearBtms[i], m_rgLinearTops[i]);
630 m_rgLinearLayers[i].Forward(m_colBtm, m_colTop);
631
632 addBtmTop(m_rgActivationBtms[i], m_rgActivationTops[i]);
633 m_rgActivationLayers[i].Forward(m_colBtm, m_colTop);
634
635 if (m_rgDropoutLayers != null)
636 m_rgDropoutLayers[i].Forward(m_colBtm, m_colTop);
637 }
638
639 Blob<T> blobX = m_rgActivationTops[m_rgLinearLayers.Length - 1];
640 m_blobX.CopyFrom(blobX, false);
641
642 // FF1 Layer
643 addBtmTop(blobX, m_blobFF1);
644 m_ff1.Forward(m_colBtm, m_colTop);
645
646 // Tanh Layer
647 addBtmTop(m_blobFF1, m_blobFF1);
648 m_tanh.Forward(m_colBtm, m_colTop);
649
650 // FF2 Layer
651 addBtmTop(blobX, m_blobFF2);
652 m_ff2.Forward(m_colBtm, m_colTop);
653
654 addBtmTop(m_blobFF2, m_blobFF2);
655 m_tanh.Forward(m_colBtm, m_colTop);
656
657 // Time A Layer
658 addBtmTop(blobX, m_blobTimeA);
659 m_timeA.Forward(m_colBtm, m_colTop);
660
661 // Time B Layer
662 addBtmTop(blobX, m_blobTimeB);
663 m_timeB.Forward(m_colBtm, m_colTop);
664
665 // Calculate the t-interpolation factor.
666 m_cuda.channel_fillfrom(m_blobTs.count(), m_blobTs.num, 1, m_blobTs.channels, colBottom[2].gpu_data, m_blobTs.mutable_gpu_data, DIR.FWD);
667 // t_a * ts
668 m_cuda.mul(m_blobTInterp.count(), m_blobTimeA.gpu_data, m_blobTs.gpu_data, m_blobTInterp.mutable_gpu_data);
669
670 // t_interp = t_a * ts + t_b
671 m_cuda.add(m_blobTInterp.count(), m_blobTimeB.gpu_data, m_blobTInterp.gpu_data, m_blobTInterp.mutable_gpu_data);
672
673 // Sigmoid Layer
674 addBtmTop(m_blobTInterp, m_blobTInterp);
675 m_sigmoid.Forward(m_colBtm, m_colTop);
676
678 {
679 // t_interp * ff2
680 m_cuda.mul(m_blobTop1.count(), m_blobTInterp.gpu_data, m_blobFF2.gpu_data, m_blobTop1.mutable_gpu_data);
681 // ff1 + t_interp * ff2
682 m_cuda.add(m_blobTop2.count(), m_blobFF1.gpu_data, m_blobTop1.gpu_data, m_blobTop2.mutable_gpu_data);
683 colTop[0].CopyFrom(m_blobTop2);
684 }
685 else
686 {
687 // 1.0 - t_interp
688 m_blobTInterpInv.SetData(1.0);
689 m_cuda.sub(m_blobTInterpInv.count(), m_blobTInterpInv.gpu_data, m_blobTInterp.gpu_data, m_blobTInterpInv.mutable_gpu_data);
690 // ff1 * (1.0 - t_interp)
691 m_cuda.mul(m_blobTInterpInv.count(), m_blobTInterpInv.gpu_data, m_blobFF1.gpu_data, m_blobTop1.mutable_gpu_data);
692 // t_interp * ff2
693 m_cuda.mul(colTop[0].count(), m_blobTInterp.gpu_data, m_blobFF2.gpu_data, m_blobTop2.mutable_gpu_data);
694 // ff1 * (1.0 - t_interp) + t_interp * ff2
695 m_cuda.add(colTop[0].count(), m_blobTop1.gpu_data, m_blobTop2.gpu_data, colTop[0].mutable_gpu_data);
696 }
697 }
698
715 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
716 {
718 {
719 // grad = top.grad * ones
720 m_blobFF1.CopyFrom(colTop[0], true);
721
722 // grad = top.grad * t-interp
723 m_cuda.mul(m_blobFF2.count(), colTop[0].gpu_diff, m_blobTInterp.gpu_data, m_blobFF2.mutable_gpu_diff);
724
725 // grad = top.grad * ff2
726 m_cuda.mul(m_blobTInterp.count(), colTop[0].gpu_diff, m_blobFF2.gpu_data, m_blobTInterp1.mutable_gpu_diff);
727 }
728 else
729 {
730 // ff1 grad = top.trad * (1.0 - t_interp)
731 m_cuda.sub(m_blobFF1.count(), m_blobTInterpOnes.gpu_data, m_blobTInterp.gpu_data, m_blobFF1.mutable_gpu_diff);
732 m_cuda.mul(m_blobFF1.count(), m_blobFF1.gpu_diff, colTop[0].gpu_diff, m_blobFF1.mutable_gpu_diff);
733
734 // ff2 grad = top.grad * t_interp
735 m_cuda.mul(m_blobFF2.count(), colTop[0].gpu_diff, m_blobTInterp.gpu_data, m_blobFF2.mutable_gpu_diff);
736
737 // ti grad = top.grad * (ff2 - ff1)
738 m_cuda.sub(m_blobTInterp1.count(), m_blobFF2.gpu_data, m_blobFF1.gpu_data, m_blobTInterp1.mutable_gpu_diff);
739 m_cuda.mul(m_blobTInterp1.count(), m_blobTInterp1.gpu_diff, colTop[0].gpu_diff, m_blobTInterp1.mutable_gpu_diff);
740 }
741
742 // Sigmoid Grad
743 m_blobTInterp1.CopyFrom(m_blobTInterp);
744 addBtmTop(m_blobTInterp, m_blobTInterp1);
745 m_sigmoid.Backward(m_colTop, rgbPropagateDown, m_colBtm);
746
747 // t_b grad = t-interp grad * 1.0
748 m_blobTimeB.CopyFrom(m_blobTInterp, true);
749
750 // t_a grad = t-interp grad * ts
751 m_cuda.mul(m_blobTimeA.count(), m_blobTInterp.gpu_diff, m_blobTs.gpu_data, m_blobTimeA.mutable_gpu_diff);
752
753 // ts grad = t-interp grad * t_a
754 m_cuda.mul(m_blobTs.count(), m_blobTInterp.gpu_diff, m_blobTimeA.gpu_data, m_blobTs.mutable_gpu_diff);
755 m_cuda.channel_sum(m_blobTs.count(), 1, m_blobTs.num, m_blobTs.channels, m_blobTs.gpu_diff, colBottom[2].mutable_gpu_diff, false);
756
757 Blob<T> blobX = m_rgActivationTops[m_rgActivationTops.Count-1];
758 blobX.SetDiff(0);
759
760 // time_b grad
761 addBtmTop(m_blobX, m_blobTimeB);
762 m_timeB.Backward(m_colTop, rgbPropagateDown, m_colBtm);
763 m_cuda.add(m_blobX.count(), m_blobX.gpu_diff, blobX.gpu_diff, blobX.mutable_gpu_diff);
764
765 // time_a grad
766 addBtmTop(m_blobX, m_blobTimeA);
767 m_timeA.Backward(m_colTop, rgbPropagateDown, m_colBtm);
768 m_cuda.add(m_blobX.count(), m_blobX.gpu_diff, blobX.gpu_diff, blobX.mutable_gpu_diff);
769
770 // ff2 grad
771 addBtmTop(m_blobFF2, m_blobFF2);
772 m_tanh.Backward(m_colTop, rgbPropagateDown, m_colBtm);
773
774 addBtmTop(m_blobX, m_blobFF2);
775 m_ff2.Backward(m_colTop, rgbPropagateDown, m_colBtm);
776 m_cuda.add(m_blobX.count(), m_blobX.gpu_diff, blobX.gpu_diff, blobX.mutable_gpu_diff);
777
778 // ff1 grad
779 addBtmTop(m_blobFF1, m_blobFF1);
780 m_tanh.Backward(m_colTop, rgbPropagateDown, m_colBtm);
781
782 addBtmTop(m_blobX, m_blobFF1);
783 m_ff1.Backward(m_colTop, rgbPropagateDown, m_colBtm);
784 m_cuda.add(m_blobX.count(), m_blobX.gpu_diff, blobX.gpu_diff, blobX.mutable_gpu_diff);
785
786 // Backbone grad
787 for (int i = m_rgLinearLayers.Length - 1; i >= 0; i--)
788 {
789 addBtmTop(m_rgActivationBtms[i], m_rgActivationTops[i]);
790
791 if (m_rgDropoutLayers != null)
792 m_rgDropoutLayers[i].Backward(m_colTop, rgbPropagateDown, m_colBtm);
793
794 m_rgActivationLayers[i].Backward(m_colTop, rgbPropagateDown, m_colBtm);
795
796 addBtmTop(m_rgLinearBtms[i], m_rgLinearTops[i]);
797 m_rgLinearLayers[i].Backward(m_colTop, rgbPropagateDown, m_colBtm);
798 }
799
800 addBtmTop(colBottom[0], m_rgLinearBtms[0]);
801 m_colBtm.Add(colBottom[1]);
802 m_cat.Backward(m_colTop, new List<bool>() { true, true }, m_colBtm);
803 }
804 }
805}
The Log class provides general output in text form.
Definition: Log.cs:13
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void Clear(bool bDispose=false)
Remove all items from the collection.
void CopyFrom(BlobCollection< T > bSrc, bool bCopyDiff=false)
Copy the data or diff from another BlobCollection into this one.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
Definition: Blob.cs:800
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
Definition: Blob.cs:1922
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
Definition: Blob.cs:903
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1541
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
Definition: Blob.cs:1981
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
Definition: Blob.cs:792
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
void Backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Given the top Blob error gradients, compute the bottom Blob error gradients.
Definition: Layer.cs:815
double Forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Given the bottom (input) Blobs, this function computes the top (output) Blobs and the loss.
Definition: Layer.cs:728
abstract void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Adjust the shapes of top blobs and internal buffers to accomodate the shapes of the bottom blobs.
void Dispose()
Releases all GPU and host resources used by the Layer.
Definition: Layer.cs:180
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
void Setup(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Implements common Layer setup functionality.
Definition: Layer.cs:439
static Layer< T > Create(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel, IXDatabaseBase db=null, TransferInput trxinput=null)
Create a new Layer based on the LayerParameter.
Definition: Layer.cs:1468
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
BlobCollection< T > blobs
Returns the collection of learnable parameter Blobs for the Layer.
Definition: Layer.cs:875
LayerParameter convertLayerParam(LayerParameter pChild, LayerParameter pParent)
Called to convert a parent LayerParameterEx, used in blob sharing, with a child layer parameter.
Definition: Layer.cs:1134
The CfcUnitLayer implements the Closed form Continuous Cell (CfcCell) layer.
Definition: CfcUnitLayer.cs:24
override void dispose()
Releases all GPU and host resources used by the Layer.
CfcUnitLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The CfcUnitLayer constructor.
Definition: CfcUnitLayer.cs:62
override BlobCollection< T > CreateInternalSharedBlobs(int nIdx, CudaDnn< T > cuda, Log log)
Create the internal shared blobs used by the layer for a given index.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
override void SetInternalSharedBlobs(BlobCollection< T > col)
Set the internal shared blobs to a set of external blobs.
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: attn
override bool ReInitializeParameters(WEIGHT_TARGET target)
Re-initialize the parameters of the layer.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t. the CfcUnit value inputs.
override int ExactNumBottomBlobs
Returns the exact number of required bottom (input) Blobs: input, hx, ts
The LnnUnitLayer implements the base class to the Cfc and Ltc Unit layers.
Definition: LnnUnitLayer.cs:19
bool m_bOwnInternalBlobs
Specifies member variable used to track whether or not the internal blobs are owned by this layer.
Definition: LnnUnitLayer.cs:23
int axis
The axis along which to concatenate – may be negative to index from the end (e.g.,...
double dropout_ratio
Specifies the dropout ratio. (e.g. the probability that values will be dropped out and set to zero....
Specifies the filler parameters used to create each Filler.
FillerParameter weight_filler
The filler for the weights.
FillerParameter bias_filler
The filler for the bias.
uint num_output
The number of outputs for the layer.
bool bias_term
Whether to have bias terms or not.
Specifies the base parameter for all layers.
InnerProductParameter inner_product_param
Returns the parameter set when initialized with LayerType.INNERPRODUCT
ConcatParameter concat_param
Returns the parameter set when initialized with LayerType.CONCAT
CfcUnitParameter cfc_unit_param
Returns the parameter set when initialized with LayerType.CFC_UNIT
LayerType
Specifies the layer type.
DropoutParameter dropout_param
Returns the parameter set when initialized with LayerType.DROPOUT
Specifies the parameters for the CfcUnitLayer used by the CfCLayer.
ACTIVATION
Defines the activation function used by the backbone.
bool no_gate
Specifies whether to use the gate or not (when true, the no gate mode is used to calculate the forwar...
ACTIVATION backbone_activation
Specifies the backbone activation function.
int hidden_size
Specifies the hidden size used to size the backbone units and other internal layers.
int backbone_units
Specifies the number of backbone units
int backbone_layers
Specifies the number of backbone layers.
float backbone_dropout_ratio
Specifies the backbone dropout ratio.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
DIR
Defines the direction of data flow.
Definition: CudaDnn.cs:22
WEIGHT_TARGET
Defines the type of weight to target in re-initializations.
Definition: Interfaces.cs:38
The MyCaffe.layers.lnn namespace contains all Liquid Neural Network (LNN) related layers.
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12