MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
RecurrentLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading.Tasks;
6using MyCaffe.basecode;
7using MyCaffe.db.image;
8using MyCaffe.common;
9using MyCaffe.param;
10using MyCaffe.fillers;
11using System.Diagnostics;
12
13namespace MyCaffe.layers
14{
23 public abstract class RecurrentLayer<T> : Layer<T>
24 {
25 Layer<T> m_transposeData = null;
26 Layer<T> m_transposeClip = null;
27 Blob<T> m_blobBtmData = null;
28 Blob<T> m_blobBtmClip = null;
29 Blob<T> m_blobTopData = null;
30 Blob<T> m_blobWork = null;
31 BlobCollection<T> m_colBtm = null;
32 BlobCollection<T> m_colTop = null;
36 Net<T> m_unrolledNet = null;
37
41 protected int m_nN;
42
47 protected int m_nT;
48
52 protected bool m_bStaticInput;
53
58 int m_nLastLayerIndex;
59
64 bool m_bExposeHiddenInput;
65
70 bool m_bExposeHiddenOutput;
71
72 BlobCollection<T> m_colRecurInputBlobs = new BlobCollection<T>();
73 BlobCollection<T> m_colRecurOutputBlobs = new BlobCollection<T>();
74 BlobCollection<T> m_colOutputBlobs = new BlobCollection<T>();
75 Blob<T> m_blobXInputBlob;
76 Blob<T> m_blobXStaticInputBlob;
77 Blob<T> m_blobContInputBlob;
78 CancelEvent m_evtCancel;
79
80 // cuDNN Specific Members
81 long m_hCuDnn;
82 int m_nInputSize = 1;
83 int m_nHiddenSize;
84 int m_nNumLayers;
85 Blob<T> m_blobX;
86 Blob<T> m_blobHx;
87 Blob<T> m_blobCx;
88 Blob<T> m_blobY;
89 Blob<T> m_blobHy;
90 Blob<T> m_blobCy;
91 Blob<T> m_blobWts;
92 long m_hXDesc;
93 long m_hYDesc;
94 long m_hHxDesc;
95 long m_hCxDesc;
96 long m_hHyDesc;
97 long m_hCyDesc;
98 long m_hDropoutDesc;
99 long m_hDropoutStates;
100 long m_hWeightDesc;
101 long m_hRnnDesc;
102 long m_hRnn8;
103 long m_hWorkspace;
104 ulong m_nWorkspaceSizeInBytes;
105 bool m_bWorkspaceOwned = true;
106 long m_hReserved;
107 ulong m_nReservedSizeInBytes;
108 bool m_bReservedOwned = true;
109 RNN_MODE m_rnnMode;
110 bool m_bUseTensors = false;
111 List<int> m_rgShape = new List<int>(4);
112 bool m_bWarningShown = false;
113 bool m_bCudnnRnn8Supported = false;
114 bool m_bUseCudnnRnn8 = false;
115
125 : base(cuda, log, p)
126 {
127 m_evtCancel = evtCancel;
128
129 if (p.type == LayerParameter.LayerType.LSTM)
130 m_rnnMode = RNN_MODE.LSTM;
131 else
132 m_rnnMode = RNN_MODE.RNN_RELU;
133 }
134
135 private void free_tensor(ref long h)
136 {
137 if (h != 0)
138 {
139 m_cuda.FreeTensorDesc(h);
140 h = 0;
141 }
142 }
143
145 protected override void dispose()
146 {
147 base.dispose();
148
149 if (m_unrolledNet != null)
150 {
151 m_unrolledNet.Dispose();
152 m_unrolledNet = null;
153 }
154
155 dispose(ref m_blobHx);
156 dispose(ref m_blobCx);
157 dispose(ref m_blobHy);
158 dispose(ref m_blobCy);
159 dispose(ref m_blobWts);
160 dispose(ref m_blobBtmData);
161 dispose(ref m_blobBtmClip);
162 dispose(ref m_blobTopData);
163 dispose(ref m_blobWork);
164
165 free_tensor(ref m_hHxDesc);
166 free_tensor(ref m_hCxDesc);
167 free_tensor(ref m_hHyDesc);
168 free_tensor(ref m_hCyDesc);
169
170 if (m_hWeightDesc != 0)
171 {
172 m_cuda.FreeFilterDesc(m_hWeightDesc);
173 m_hWeightDesc = 0;
174 }
175
176 if (m_hRnnDesc != 0)
177 {
178 m_cuda.FreeRnnDesc(m_hRnnDesc);
179 m_hRnnDesc = 0;
180 }
181
182 if (m_hDropoutDesc != 0)
183 {
184 m_cuda.FreeDropoutDesc(m_hDropoutDesc);
185 m_hDropoutDesc = 0;
186 }
187
188 if (m_hDropoutStates != 0)
189 {
190 m_cuda.FreeMemory(m_hDropoutStates);
191 m_hDropoutStates = 0;
192 }
193
194 if (m_hXDesc != 0)
195 {
196 m_cuda.FreeRnnDataDesc(m_hXDesc);
197 m_hXDesc = 0;
198 }
199
200 if (m_hYDesc != 0)
201 {
202 m_cuda.FreeRnnDataDesc(m_hYDesc);
203 m_hYDesc = 0;
204 }
205
206 if (m_hWorkspace != 0)
207 {
208 if (m_bWorkspaceOwned)
209 m_cuda.FreeMemory(m_hWorkspace);
210 m_hWorkspace = 0;
211 }
212
213 if (m_hReserved != 0)
214 {
215 if (m_bReservedOwned)
216 m_cuda.FreeMemory(m_hReserved);
217 m_hReserved = 0;
218 }
219
220 if (m_hCuDnn != 0)
221 {
222 m_cuda.FreeCuDNN(m_hCuDnn);
223 m_hCuDnn = 0;
224 }
225
226 if (m_transposeData != null)
227 {
228 m_transposeData.Dispose();
229 m_transposeData = null;
230 }
231
232 if (m_transposeClip != null)
233 {
234 m_transposeClip.Dispose();
235 m_transposeClip = null;
236 }
237 }
238
243 public override void SetOnDebug(EventHandler<GetWorkBlobArgs<T>> fn)
244 {
245 base.SetOnDebug(fn);
246
247 if (m_unrolledNet == null)
248 return;
249
250 foreach (Layer<T> layer in m_unrolledNet.layers)
251 {
252 layer.SetOnDebug(fn);
253 }
254 }
255
260 public override void ResetOnDebug(EventHandler<GetWorkBlobArgs<T>> fn)
261 {
262 base.ResetOnDebug(fn);
263
264 if (m_unrolledNet == null)
265 return;
266
267 foreach (Layer<T> layer in m_unrolledNet.layers)
268 {
269 layer.ResetOnDebug(fn);
270 }
271 }
272
273 private void addBtmTop(Blob<T> btm, Blob<T> top)
274 {
275 m_colBtm.Clear();
276 m_colBtm.Add(btm);
277 m_colTop.Clear();
278 m_colTop.Add(top);
279 }
280
286 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
287 {
288 Blob<T> blobBtm0 = colBottom[0];
289 Blob<T> blobBtm1 = colBottom[1];
290
291 m_bWarningShown = false;
292 m_bCudnnRnn8Supported = m_cuda.IsRnn8Supported();
293 if (m_bCudnnRnn8Supported && m_param.recurrent_param.use_cudnn_rnn8_if_supported)
294 m_bUseCudnnRnn8 = true;
295
296 m_blobBtmData = new Blob<T>(m_cuda, m_log);
297 m_blobTopData = new Blob<T>(m_cuda, m_log);
298
300 {
301 m_colBtm = new BlobCollection<T>();
302 m_colTop = new BlobCollection<T>();
303
304 LayerParameter transpose = new LayerParameter(LayerParameter.LayerType.TRANSPOSE, m_param.name + ".trans");
305 transpose.transpose_param.dim[0] = 1;
306 transpose.transpose_param.dim[1] = 0;
307
308 while (transpose.transpose_param.dim.Count > colBottom[0].num_axes)
309 {
310 transpose.transpose_param.dim.RemoveAt(transpose.transpose_param.dim.Count - 1);
311 }
312
313 m_transposeData = Layer<T>.Create(m_cuda, m_log, convertLayerParam(transpose, m_param), null);
314
315 addBtmTop(colBottom[0], m_blobBtmData);
316 m_transposeData.Setup(m_colBtm, m_colTop);
317 blobBtm0 = m_blobBtmData;
318
319 while (transpose.transpose_param.dim.Count > colBottom[1].num_axes)
320 {
321 transpose.transpose_param.dim.RemoveAt(transpose.transpose_param.dim.Count - 1);
322 }
323
324 m_transposeClip = Layer<T>.Create(m_cuda, m_log, convertLayerParam(transpose, m_param), null);
325 m_blobBtmClip = new Blob<T>(m_cuda, m_log);
326
327 addBtmTop(colBottom[1], m_blobBtmClip);
328 m_transposeClip.Setup(m_colBtm, m_colTop);
329
330 m_rgShape.Clear();
331 m_rgShape.Add(m_blobBtmClip.num);
332 m_rgShape.Add(m_blobBtmClip.channels);
333 m_blobBtmClip.Reshape(m_rgShape);
334
335 blobBtm1 = m_blobBtmClip;
336 }
337
338 m_log.CHECK_GE(blobBtm0.num_axes, 2, "Bottom[0] must have at least 2 axes -- (#timesteps, #streams, ...)");
339 m_nT = blobBtm0.shape(0);
340 m_nN = blobBtm0.shape(1);
341
342 if (blobBtm0.num_axes > 2)
343 m_nInputSize = colBottom[0].count(2);
344
345 m_log.WriteLine("Initializing recurrent layer: assuming input batch contains " + m_nT.ToString() + " timesteps of " + m_nN.ToString() + " independent streams.");
346
347 m_log.CHECK_EQ(blobBtm1.num_axes, 2, "Bottom[1] must have exactly 2 axes -- (#timesteps, #streams)");
348 m_log.CHECK_EQ(m_nT, blobBtm1.shape(0), "The bottom[1].shape(0) must equal T = " + m_nT.ToString());
349 m_log.CHECK_EQ(m_nN, blobBtm1.shape(1), "The bottom[1].shape(1) must equal N = " + m_nN.ToString());
350
351 // If expose_hidden is set, we take as input and produce as output
352 // the hidden state blobs at the first and last timesteps.
353 m_bExposeHiddenInput = m_param.recurrent_param.expose_hidden_input;
354 m_bExposeHiddenOutput = m_param.recurrent_param.expose_hidden_output;
355
356 m_blobWork = new Blob<T>(m_cuda, m_log);
357
359 layerSetUpCuDnn(colBottom, colTop);
360 else
361 layerSetUpCaffe(colBottom, colTop);
362 }
363
364 private void layerSetUpCuDnn(BlobCollection<T> colBottom, BlobCollection<T> colTop)
365 {
366 if (m_bUseCudnnRnn8)
367 layerSetupCudnnRnn8(colBottom, colTop);
368 else
369 layerSetupCudnnRnn(colBottom, colTop);
370 }
371
372 private void setupSharedWorkspaceAndReserved(ulong ulWsInBytes, ulong ulResInBytes)
373 {
374 m_nWorkspaceSizeInBytes = ulWsInBytes;
375 m_bWorkspaceOwned = true;
376 m_nReservedSizeInBytes = ulResInBytes;
377 m_bReservedOwned = true;
378
379 if (ulWsInBytes > 0)
380 m_hWorkspace = m_cuda.AllocMemory((long)m_nWorkspaceSizeInBytes);
381 if (ulResInBytes > 0)
382 m_hReserved = m_cuda.AllocMemory((long)ulResInBytes);
383 }
384
385 private void layerSetupCudnnRnn8(BlobCollection<T> colBottom, BlobCollection<T> colTop)
386 {
387 try
388 {
390 m_log.WriteLine("WARNING: RNN8 currently does not support Tensor Cores, disabling Tensor Cores for RNN8.");
391
392 m_nHiddenSize = (int)m_param.recurrent_param.num_output;
393 m_nNumLayers = (int)m_param.recurrent_param.num_layers;
394
395 m_hCuDnn = m_cuda.CreateCuDNN();
396
397 m_blobX = new Blob<T>(m_cuda, m_log);
398 m_blobX.Name = m_param.name + " x";
399 m_blobY = new Blob<T>(m_cuda, m_log);
400 m_blobY.Name = m_param.name + " y";
401
402 m_blobHx = new Blob<T>(m_cuda, m_log);
403 m_blobHx.Name = m_param.name + " hx";
404 m_blobCx = new Blob<T>(m_cuda, m_log);
405 m_blobCx.Name = m_param.name + " cx";
406 m_blobHy = new Blob<T>(m_cuda, m_log);
407 m_blobHy.Name = m_param.name + " hy";
408 m_blobCy = new Blob<T>(m_cuda, m_log);
409 m_blobCy.Name = m_param.name + " cy";
410 m_blobWts = new Blob<T>(m_cuda, m_log);
411 m_blobWts.Name = m_param.name + " weights";
412
413 blobs.Clear();
414 blobs.Add(m_blobWts);
415
416 int nBidirectionalScale = (m_param.recurrent_param.bidirectional) ? 2 : 1;
417
418 m_hRnn8 = m_cuda.CreateRnn8();
419 m_cuda.SetRnn8(m_hCuDnn,
420 m_hRnn8,
421 (m_phase == Phase.TRAIN) ? true : false,
422 RNN_DATALAYOUT.RNN_SEQ_MAJOR_PACKED,
423 m_rnnMode,
424 RNN_BIAS_MODE.RNN_DOUBLE_BIAS,
425 m_nT,
426 m_nN,
427 m_nInputSize,
428 m_nHiddenSize,
429 m_nHiddenSize * nBidirectionalScale, // Outputs
430 m_nHiddenSize, // Projection
431 m_nNumLayers,
435
436 Blob<T> blobBtm0 = colBottom[0];
438 blobBtm0 = m_blobBtmData;
439
440 m_blobX.ReshapeLike(blobBtm0);
441 m_blobX.ShareData(blobBtm0);
442 m_blobX.ShareDiff(blobBtm0);
443 m_log.CHECK_EQ(m_blobX.count(), m_nT * m_nN * m_nInputSize, "The input should be Sequence * Batch * InputSize in length.");
444
445 int nDir = (m_param.recurrent_param.bidirectional) ? 2 : 1;
446 m_blobHx.Reshape(m_nNumLayers, m_nN, m_nHiddenSize, nDir);
447 m_blobCx.Reshape(m_nNumLayers, m_nN, m_nHiddenSize, nDir);
448
449 m_blobY.Reshape(m_nT, m_nN, m_nHiddenSize, nDir);
450 m_blobHy.Reshape(m_nNumLayers, m_nN, m_nHiddenSize, nDir);
451 m_blobCy.Reshape(m_nNumLayers, m_nN, m_nHiddenSize, nDir);
452
453 m_blobHx.SetData(0);
454 m_blobCx.SetData(0);
455 m_blobHy.SetData(0);
456 m_blobCy.SetData(0);
457
458 // Setup parameters - do this after the rnn descriptor is set
459 // otherwise we will not know how many parameters we have to allocate.
460 ulong szWtCount;
461 ulong ulWorkspaceSizeInBytes;
462 ulong ulReservedSizeInBytes;
463 m_cuda.GetRnn8MemorySizes(m_hCuDnn, m_hRnn8, out szWtCount, out ulWorkspaceSizeInBytes, out ulReservedSizeInBytes);
464
465 List<int> rgWtShape = new List<int>() { (int)szWtCount, 1, 1 };
466 m_blobWts.Reshape(rgWtShape);
467
468 // Setup the workspace and reserved memory.
469 setupSharedWorkspaceAndReserved(ulWorkspaceSizeInBytes, ulReservedSizeInBytes);
470
471 // Fill the weights.
472 if (!shareParameter(m_blobWts, rgWtShape))
473 {
474 double dfWtVal = 0;
475 double dfWtVal2 = 0;
476 RNN_FILLER_TYPE ftWt = RNN_FILLER_TYPE.RNN_CONSTANT_FILLER;
478 ftWt = RNN_FILLER_TYPE.RNN_XAVIER_FILLER;
479 else if (m_param.recurrent_param.weight_filler.type == "gaussian")
480 {
483 ftWt = RNN_FILLER_TYPE.RNN_GAUSSIAN_FILLER;
484 }
485 else if (m_param.recurrent_param.weight_filler.type == "constant")
487 else
488 throw new Exception("Currently the RNN2 weights only support 'constant' and 'xavier' fillers.");
489
490 double dfBiasVal = 0;
491 double dfBiasVal2 = 0;
492 RNN_FILLER_TYPE ftBias = RNN_FILLER_TYPE.RNN_CONSTANT_FILLER;
493 if (m_param.recurrent_param.bias_filler.type == "xavier")
494 ftBias = RNN_FILLER_TYPE.RNN_XAVIER_FILLER;
495 else if (m_param.recurrent_param.bias_filler.type == "gaussian")
496 {
499 ftBias = RNN_FILLER_TYPE.RNN_GAUSSIAN_FILLER;
500 }
501 else if (m_param.recurrent_param.bias_filler.type == "constant")
503 else
504 throw new Exception("Currently the RNN2 bias' only support 'constant' and 'xavier' fillers.");
505
506 m_cuda.InitializeRnn8Weights(m_hCuDnn, m_hRnn8, m_blobWts.mutable_gpu_data, ftWt, dfWtVal, dfWtVal2, ftBias, dfBiasVal, dfBiasVal2);
507 }
508
509 m_blobWts.SetDiff(0);
510 }
511 catch (Exception excpt)
512 {
513 throw excpt;
514 }
515 finally
516 {
517 }
518 }
519
520 private void layerSetupCudnnRnn(BlobCollection<T> colBottom, BlobCollection<T> colTop)
521 {
522 try
523 {
525 m_nHiddenSize = (int)m_param.recurrent_param.num_output;
526 m_nNumLayers = (int)m_param.recurrent_param.num_layers;
527
528 m_hCuDnn = m_cuda.CreateCuDNN();
529
530 m_blobX = new Blob<T>(m_cuda, m_log);
531 m_blobX.Name = m_param.name + " x";
532 m_blobY = new Blob<T>(m_cuda, m_log);
533 m_blobY.Name = m_param.name + " y";
534
535 m_blobHx = new Blob<T>(m_cuda, m_log);
536 m_blobHx.Name = m_param.name + " hx";
537 m_blobCx = new Blob<T>(m_cuda, m_log);
538 m_blobCx.Name = m_param.name + " cx";
539 m_blobHy = new Blob<T>(m_cuda, m_log);
540 m_blobHy.Name = m_param.name + " hy";
541 m_blobCy = new Blob<T>(m_cuda, m_log);
542 m_blobCy.Name = m_param.name + " cy";
543 m_blobWts = new Blob<T>(m_cuda, m_log);
544 m_blobWts.Name = m_param.name + " weights";
545
546 blobs.Clear();
547 blobs.Add(m_blobWts);
548
549 m_hXDesc = m_cuda.CreateRnnDataDesc();
550 m_hYDesc = m_cuda.CreateRnnDataDesc();
551
552 m_hHxDesc = m_cuda.CreateTensorDesc();
553 m_hCxDesc = m_cuda.CreateTensorDesc();
554 m_hHyDesc = m_cuda.CreateTensorDesc();
555 m_hCyDesc = m_cuda.CreateTensorDesc();
556
557 // Setup Rnn Descriptor
558 m_hRnnDesc = m_cuda.CreateRnnDesc();
559 m_hWeightDesc = m_cuda.CreateFilterDesc();
560 m_hDropoutDesc = m_cuda.CreateDropoutDesc();
561
562
563 //------------------------------------
564 // Start reshape here.
565 //------------------------------------
566
567 Blob<T> blobBtm0 = colBottom[0];
569 blobBtm0 = m_blobBtmData;
570
571 m_blobX.ReshapeLike(blobBtm0);
572 m_blobX.ShareData(blobBtm0);
573 m_blobX.ShareDiff(blobBtm0);
574 m_log.CHECK_EQ(m_blobX.count(), m_nT * m_nN * m_nInputSize, "The input should be Sequence * Batch * InputSize in length.");
575
576 int nDir = (m_param.recurrent_param.bidirectional) ? 2 : 1;
577 m_blobHx.Reshape(m_nNumLayers, m_nN, m_nHiddenSize, nDir);
578 m_blobCx.Reshape(m_nNumLayers, m_nN, m_nHiddenSize, nDir);
579
580 m_blobY.Reshape(m_nT, m_nN, m_nHiddenSize, nDir);
581 m_blobHy.Reshape(m_nNumLayers, m_nN, m_nHiddenSize, nDir);
582 m_blobCy.Reshape(m_nNumLayers, m_nN, m_nHiddenSize, nDir);
583
584 m_blobHx.SetData(0);
585 m_blobCx.SetData(0);
586 m_blobHy.SetData(0);
587 m_blobCy.SetData(0);
588
589 // Set the input/output data descriptors
590 m_cuda.SetRnnDataDesc(m_hXDesc, RNN_DATALAYOUT.RNN_SEQ_MAJOR_UNPACKED, m_nT, m_nN, m_nInputSize, false);
591 m_cuda.SetRnnDataDesc(m_hYDesc, RNN_DATALAYOUT.RNN_SEQ_MAJOR_UNPACKED, m_nT, m_nN, m_nHiddenSize, m_param.recurrent_param.bidirectional);
592
593 int[] rgDimA = new int[3];
594 int[] rgStrideA = new int[3];
595
596 rgDimA[0] = m_nNumLayers * ((m_param.recurrent_param.bidirectional) ? 2 : 1);
597 rgDimA[1] = m_nN; // mini batch.
598 rgDimA[2] = m_nHiddenSize;
599
600 rgStrideA[0] = rgDimA[2] * rgDimA[1];
601 rgStrideA[1] = rgDimA[2];
602 rgStrideA[2] = 1;
603
604 m_cuda.SetTensorNdDesc(m_hHxDesc, rgDimA, rgStrideA);
605 m_cuda.SetTensorNdDesc(m_hCxDesc, rgDimA, rgStrideA);
606 m_cuda.SetTensorNdDesc(m_hHyDesc, rgDimA, rgStrideA);
607 m_cuda.SetTensorNdDesc(m_hCyDesc, rgDimA, rgStrideA);
608
609 // Setup the dropout descriptor.
610 ulong ulStateCount;
611 ulong ulReservedCount;
612 m_cuda.GetDropoutInfo(m_hCuDnn, 0, out ulStateCount, out ulReservedCount);
613 m_hDropoutStates = m_cuda.AllocMemory((long)ulStateCount);
614 m_cuda.SetDropoutDesc(m_hCuDnn, m_hDropoutDesc, m_param.recurrent_param.dropout_ratio, m_hDropoutStates, m_param.recurrent_param.dropout_seed);
615
616 // Setup the RNN descriptor.
617 RNN_DIRECTION dir = (m_param.recurrent_param.bidirectional) ? RNN_DIRECTION.RNN_BIDIRECTIONAL : RNN_DIRECTION.RNN_UNIDIRECTIONAL;
618 m_cuda.SetRnnDesc(m_hCuDnn, m_hRnnDesc, m_nHiddenSize, m_nNumLayers, m_hDropoutDesc, m_rnnMode, m_bUseTensors, dir);
619
620 // Setup parameters - do this after the rnn descriptor is set
621 // otherwise we will not know how many parameters we have to allocate.
622 int nCount = m_cuda.GetRnnParamCount(m_hCuDnn, m_hRnnDesc, m_hXDesc);
623 List<int> rgWtShape = new List<int>() { nCount, 1, 1 };
624 m_blobWts.Reshape(rgWtShape);
625
626 int[] rgDimW = new int[3];
627 rgDimW[0] = nCount;
628 rgDimW[1] = 1;
629 rgDimW[2] = 1;
630
631 m_cuda.SetFilterNdDesc(m_hWeightDesc, rgDimW);
632
633 // Setup the workspace and reserved memory.
634 ulong ulReservedSizeInBytes;
635 ulong ulWorkspaceSizeInBytes = m_cuda.GetRnnWorkspaceCount(m_hCuDnn, m_hRnnDesc, m_hXDesc, out ulReservedSizeInBytes);
636
637 // Setup the workspace and reserved memory.
638 setupSharedWorkspaceAndReserved(ulWorkspaceSizeInBytes, ulReservedSizeInBytes);
639
640 // Fill the weights.
641 if (!shareParameter(m_blobWts, rgWtShape))
642 {
643 int nNumLinearLayers = (m_rnnMode == RNN_MODE.LSTM) ? 8 : 2;
646 int nWtCount;
647 long hWt;
648 int nBiasCount;
649 long hBias;
650 int nBidir = (m_param.recurrent_param.bidirectional) ? 2 : 1;
651
652 for (int i = 0; i < m_nNumLayers * nBidir; i++)
653 {
654 for (int j = 0; j < nNumLinearLayers; j++)
655 {
656 m_cuda.GetRnnLinLayerParams(m_hCuDnn, m_hRnnDesc, i, m_hXDesc, m_hWeightDesc, m_blobWts.gpu_data, j, out nWtCount, out hWt, out nBiasCount, out hBias);
657
658 if (nWtCount % 2 != 0)
659 {
660 // Since, some fillers (gaussian) require an even number of items,
661 // we can temporarily use the all weight diff area and then copy
662 // the non-even number of items into the layer weights.
663 fillerWt.Fill(nWtCount + 1, m_blobWts.mutable_gpu_diff);
664 m_cuda.copy(nWtCount, m_blobWts.mutable_gpu_diff, hWt);
665 }
666 else
667 {
668 fillerWt.Fill(nWtCount, hWt);
669 }
670
671 if (nBiasCount % 2 != 0)
672 {
673 // Since, some fillers (gaussian) require an even number of items,
674 // we can temporarily use the all weight diff area and then copy
675 // the non-even number of items into the layer bias.
676 fillerBias.Fill(nBiasCount + 1, m_blobWts.mutable_gpu_diff);
677 m_cuda.copy(nBiasCount, m_blobWts.mutable_gpu_diff, hBias);
678 }
679 else
680 {
681 fillerBias.Fill(nBiasCount, hBias);
682 }
683
684 m_cuda.FreeMemoryPointer(hWt);
685 m_cuda.FreeMemoryPointer(hBias);
686 }
687 }
688 }
689
690 m_blobWts.SetDiff(0);
691 }
692 catch (Exception excpt)
693 {
694 throw excpt;
695 }
696 finally
697 {
698 }
699 }
700
701 private void layerSetUpCaffe(BlobCollection<T> colBottom, BlobCollection<T> colTop)
702 {
704 m_log.FAIL("The 'auto_repeat_hidden_states_across_layers' setting is not supported in the Caffe implementation, use the cuDNN implementation instead.");
705
706 Blob<T> blobBtm0 = colBottom[0];
707 Blob<T> blobBtm1 = colBottom[1];
709 {
710 blobBtm0 = m_blobBtmData;
711 blobBtm1 = m_blobBtmClip;
712 }
713
714 // Get (recurrent) input/output names.
715 List<string> rgOutputNames = new List<string>();
716 OutputBlobNames(rgOutputNames);
717
718 List<string> rgRecurInputNames = new List<string>();
719 RecurrentInputBlobNames(rgRecurInputNames);
720
721 List<string> rgRecurOutputNames = new List<string>();
722 RecurrentOutputBlobNames(rgRecurOutputNames);
723
724 int nNumRecurBlobs = rgRecurInputNames.Count;
725 m_log.CHECK_EQ(nNumRecurBlobs, rgRecurOutputNames.Count, "The number of recurrent input names must equal the number of recurrent output names.");
726
727 // If provided, bottom[2] is a static input to the recurrent net.
728 int nNumHiddenExposed = (m_bExposeHiddenOutput) ? nNumRecurBlobs : 0;
729 int nBottomCount = (m_bExposeHiddenInput) ? 4 : 2;
730 m_bStaticInput = (colBottom.Count > nBottomCount + nNumHiddenExposed) ? true : false;
731
732 if (m_bStaticInput)
733 {
734 m_log.CHECK_GE(colBottom[2].num_axes, 1, "When static input is present, the bottom[2].num_axes must be >= 1");
735 m_log.CHECK_EQ(m_nN, colBottom[2].shape(1), "When static input is present, the bottom[2].shape(1) must = N which is " + m_nN.ToString());
736
737 // Original appears to be a bug, for ordering is T,N,x,x
738 //m_log.CHECK_EQ(m_nN, colBottom[2].shape(0), "When static input is present, the bottom[2].shape(0) must = N which is " + m_nN.ToString());
739 }
740
741 // Create a NetParameter; setup the inputs that aren't unique to particular
742 // recurrent architectures.
743 NetParameter net_param = new NetParameter();
744
745 LayerParameter input_layer = new LayerParameter(LayerParameter.LayerType.INPUT);
746 input_layer.top.Add("x");
747 BlobShape input_shape1 = new param.BlobShape();
748 for (int i = 0; i < blobBtm0.num_axes; i++)
749 {
750 input_shape1.dim.Add(blobBtm0.shape(i));
751 }
752 input_layer.input_param.shape.Add(input_shape1);
753
754 input_layer.top.Add("cont");
755 BlobShape input_shape2 = new param.BlobShape();
756 for (int i = 0; i < blobBtm1.num_axes; i++)
757 {
758 input_shape2.dim.Add(blobBtm1.shape(i));
759 }
760 input_layer.input_param.shape.Add(input_shape2);
761
762 if (m_bStaticInput)
763 {
764 input_layer.top.Add("x_static");
765 BlobShape input_shape3 = new BlobShape();
766 for (int i = 0; i < colBottom[2].num_axes; i++)
767 {
768 input_shape3.dim.Add(colBottom[2].shape(i));
769 }
770 input_layer.input_param.shape.Add(input_shape3);
771 }
772
773 net_param.layer.Add(input_layer);
774
775 // Call the child's FillUnrolledNet implementation to specify the unrolled
776 // recurrent architecture.
777 FillUnrolledNet(net_param);
778
779 // Prepend this layer's name to the names of each layer in the unrolled net.
780 string strLayerName = m_param.name;
781 if (strLayerName.Length > 0)
782 {
783 for (int i = 0; i < net_param.layer.Count; i++)
784 {
785 LayerParameter layer = net_param.layer[i];
786 layer.name = strLayerName + "_" + layer.name;
787 }
788 }
789
790 // Add 'pseudo-losses' to all outputs to force backpropagation.
791 // (Setting force_backward is too agressive as we may not need to backprop to
792 // all inputs, e.g., the sequence continuation indicators.)
793 List<string> rgPseudoLosses = new List<string>();
794 for (int i = 0; i < rgOutputNames.Count; i++)
795 {
796 rgPseudoLosses.Add(rgOutputNames[i] + "_pseudoloss");
797 LayerParameter layer = new LayerParameter(LayerParameter.LayerType.REDUCTION, rgPseudoLosses[i]);
798 layer.bottom.Add(rgOutputNames[i]);
799 layer.top.Add(rgPseudoLosses[i]);
800 layer.loss_weight.Add(1.0);
801 net_param.layer.Add(layer);
802 }
803
804 // Create the unrolled net.
805 Net<T> sharedNet = null;
806 if (m_param is LayerParameterEx<T>)
807 {
808 RecurrentLayer<T> sharedLayer = ((LayerParameterEx<T>)m_param).SharedLayer as RecurrentLayer<T>;
809 if (sharedLayer != null)
810 sharedNet = sharedLayer.m_unrolledNet;
811 }
812
813 m_unrolledNet = new Net<T>(m_cuda, m_log, net_param, m_evtCancel, null, m_phase, null, sharedNet);
815
816 // Setup pointers to the inputs.
817 m_blobXInputBlob = m_unrolledNet.blob_by_name("x");
818 m_blobContInputBlob = m_unrolledNet.blob_by_name("cont");
819
820 if (m_bStaticInput)
821 m_blobXStaticInputBlob = m_unrolledNet.blob_by_name("x_static");
822
823 // Setup pointers to paired recurrent inputs/outputs.
824 m_colRecurInputBlobs = new common.BlobCollection<T>();
825 m_colRecurOutputBlobs = new common.BlobCollection<T>();
826
827 for (int i = 0; i < nNumRecurBlobs; i++)
828 {
829 m_colRecurInputBlobs.Add(m_unrolledNet.blob_by_name(rgRecurInputNames[i]));
830 m_colRecurOutputBlobs.Add(m_unrolledNet.blob_by_name(rgRecurOutputNames[i]));
831 }
832
833 // Setup pointers to outputs.
834 m_log.CHECK_EQ(colTop.Count() - nNumHiddenExposed, rgOutputNames.Count, "OutputBlobNames must provide output blob name for each top.");
835 m_colOutputBlobs = new common.BlobCollection<T>();
836 for (int i = 0; i < rgOutputNames.Count; i++)
837 {
838 m_colOutputBlobs.Add(m_unrolledNet.blob_by_name(rgOutputNames[i]));
839 }
840
841 // We should have 2 inputs (x and cont), plus a number of recurrent inputs,
842 // plus maybe a static input.
843 int nStaticInput = (m_bStaticInput) ? 1 : 0;
844 m_log.CHECK_EQ(2 + nNumRecurBlobs + nStaticInput, m_unrolledNet.input_blobs.Count, "The unrolled net input count should equal 2 + number of recurrent blobs (" + nNumRecurBlobs.ToString() + ") + static inputs (" + nStaticInput.ToString() + ")");
845
846 // This layer's parameters are any parameters in the layers of the unrolled
847 // net. We only want one copy of each parameter, so check that the parameter
848 // is 'owned' by the layer, rather than shared with another.
849 blobs.Clear();
850 for (int i = 0; i < m_unrolledNet.parameters.Count; i++)
851 {
852 if (m_unrolledNet.param_owners[i] == -1)
853 {
854 m_log.WriteLine("Adding parameter " + i.ToString() + ": " + m_unrolledNet.param_display_names[i]);
855 blobs.Add(m_unrolledNet.parameters[i]);
856 }
857 }
858
859 // Check that param_propagate_down is set for all of the parameters in the
860 // unrolled net; set param_propagate_down to true in this layer.
861 for (int i = 0; i < m_unrolledNet.layers.Count; i++)
862 {
863 for (int j = 0; j < m_unrolledNet.layers[i].blobs.Count; j++)
864 {
865 m_log.CHECK(m_unrolledNet.layers[i].param_propagate_down(j), "param_propagate_down not set for layer " + i.ToString() + ", param " + j.ToString());
866 }
867 }
868 m_rgbParamPropagateDown = new DictionaryMap<bool>(blobs.Count, true);
869
870 // Set the diffs of recurrent outputs to 0 -- we can't backpropagate across
871 // batches.
872 for (int i = 0; i < m_colRecurOutputBlobs.Count; i++)
873 {
874 m_colRecurOutputBlobs[i].SetDiff(0);
875 }
876
877 // Check that the last output_names.count layers are the pseudo-losses;
878 // set last_layer_index so that we don't actually run these layers.
879 List<string> rgLayerNames = m_unrolledNet.layer_names;
880 m_nLastLayerIndex = rgLayerNames.Count - 1 - rgPseudoLosses.Count;
881 for (int i = m_nLastLayerIndex + 1, j = 0; i < rgLayerNames.Count; i++, j++)
882 {
883 m_log.CHECK(rgLayerNames[i] == rgPseudoLosses[j], "The last layer at idx " + i.ToString() + " should be the pseudo layer named " + rgPseudoLosses[j]);
884 }
885
886 // Setup shared Hx, Cx, Hy, Cy for transfers between tops and bottoms in
887 // forward and backward when specified - Sharing is used so that code
888 // is similar between Caffe and CuDnn.
889 Blob<T> blob;
890 m_blobHx = new Blob<T>(m_cuda, m_log);
891 m_blobHx.Name = m_param.name + " hx";
892 m_blobHx.reshape_when_sharing = false;
893 blob = m_colRecurInputBlobs[0];
894 m_blobHx.ReshapeLike(blob);
895 m_blobHx.ShareData(blob);
896 m_blobHx.ShareDiff(blob);
897
898 if (m_colRecurInputBlobs.Count > 1)
899 {
900 m_blobCx = new Blob<T>(m_cuda, m_log);
901 m_blobCx.Name = m_param.name + " cx";
902 m_blobCx.reshape_when_sharing = false;
903 blob = m_colRecurInputBlobs[1];
904 m_blobCx.ReshapeLike(blob);
905 m_blobCx.ShareData(blob);
906 m_blobCx.ShareDiff(blob);
907 }
908
909 m_blobHy = new Blob<T>(m_cuda, m_log);
910 m_blobHy.Name = m_param.name + " hy";
911 m_blobHy.reshape_when_sharing = false;
912 blob = m_colRecurOutputBlobs[0];
913 m_blobHy.ReshapeLike(blob);
914 m_blobHy.ShareData(blob);
915 m_blobHy.ShareDiff(blob);
916
917 if (m_colRecurOutputBlobs.Count > 1)
918 {
919 m_blobCy = new Blob<T>(m_cuda, m_log);
920 m_blobCy.Name = m_param.name + " cy";
921 m_blobCy.reshape_when_sharing = false;
922 blob = m_colRecurOutputBlobs[1];
923 m_blobCy.ReshapeLike(blob);
924 m_blobCy.ShareData(blob);
925 m_blobCy.ShareDiff(blob);
926 }
927 }
928
934 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
935 {
936 Blob<T> blobBtm0 = colBottom[0];
937 Blob<T> blobBtm1 = colBottom[1];
938
940 {
941 addBtmTop(colBottom[0], m_blobBtmData);
942 m_transposeData.Reshape(m_colBtm, m_colTop);
943 blobBtm0 = m_blobBtmData;
944
945 addBtmTop(colBottom[1], m_blobBtmClip);
946 m_transposeClip.Reshape(m_colBtm, m_colTop);
947
948 m_rgShape.Clear();
949 m_rgShape.Add(m_blobBtmClip.num);
950 m_rgShape.Add(m_blobBtmClip.channels);
951 m_blobBtmClip.Reshape(m_rgShape);
952
953 blobBtm1 = m_blobBtmClip;
954 }
955
956 m_log.CHECK_GE(blobBtm0.num_axes, 2, "bottom[0] must have at least 2 axes -- (#timesteps, #streams, ...)");
957 m_log.CHECK_EQ(m_nT, blobBtm0.shape(0), "input number of timesteps changed.");
958 m_nN = blobBtm0.shape(1);
959 m_log.CHECK_EQ(blobBtm1.num_axes, 2, "bottom[1] must have exactly 2 axes -- (#timesteps, #streams)");
960 m_log.CHECK_EQ(m_nT, blobBtm1.shape(0), "bottom[1].shape(0) should equal the timesteps T (" + m_nT.ToString() + ")");
961 m_log.CHECK_EQ(m_nN, blobBtm1.shape(1), "bottom[1].shape(1) should equal the streams N (" + m_nN + ")");
962
964 reshapeCuDnn(colBottom, colTop);
965 else
966 reshapeCaffe(colBottom, colTop);
967
969 {
970 addBtmTop(m_blobTopData, colTop[0]);
971 m_transposeData.Reshape(m_colBtm, m_colTop);
972 }
973 }
974
975 private void reshapeCuDnn(BlobCollection<T> colBottom, BlobCollection<T> colTop)
976 {
977 if (m_bUseCudnnRnn8)
978 reshapeCudnnRnn8(colBottom, colTop);
979 else
980 reshapeCudnnRnn(colBottom, colTop);
981 }
982
983 private void reshapeCudnnRnn8(BlobCollection<T> colBottom, BlobCollection<T> colTop)
984 {
985 Blob<T> blobBtm0 = colBottom[0];
986 Blob<T> blobTop0 = colTop[0];
987
989 {
990 blobBtm0 = m_blobBtmData;
991 blobTop0 = m_blobTopData;
992 }
993
994 m_blobX.ShareData(blobBtm0);
995 m_blobX.ShareDiff(blobBtm0);
996 m_log.CHECK_EQ(m_blobX.count(), m_nT * m_nN * m_nInputSize, "The input should be Sequence * Batch * InputSize in length.");
997
998 m_blobHx.Reshape(m_nNumLayers, m_nN, m_nHiddenSize, 1);
999 m_blobHx.SetData(0);
1000 m_blobCx.Reshape(m_nNumLayers, m_nN, m_nHiddenSize, 1);
1001 m_blobCx.SetData(0);
1002
1003 m_blobY.Reshape(m_nT, m_nN, m_nHiddenSize, 1);
1004 m_blobHy.Reshape(m_nNumLayers, m_nN, m_nHiddenSize, 1);
1005 m_blobCy.Reshape(m_nNumLayers, m_nN, m_nHiddenSize, 1);
1006
1007 blobTop0.ReshapeLike(m_blobY);
1008 blobTop0.ShareData(m_blobY);
1009 blobTop0.ShareDiff(m_blobY);
1010
1012 {
1013 colTop[1].ReshapeLike(m_blobHy);
1014 colTop[1].ShareData(m_blobHy);
1015 colTop[1].ShareDiff(m_blobHy);
1016
1017 colTop[2].ReshapeLike(m_blobCy);
1018 colTop[2].ShareData(m_blobCy);
1019 colTop[2].ShareDiff(m_blobCy);
1020 }
1021 }
1022
1023 private void reshapeCudnnRnn(BlobCollection<T> colBottom, BlobCollection<T> colTop)
1024 {
1025 Blob<T> blobBtm0 = colBottom[0];
1026 Blob<T> blobTop0 = colTop[0];
1027
1029 {
1030 blobBtm0 = m_blobBtmData;
1031 blobTop0 = m_blobTopData;
1032 }
1033
1034 m_blobX.ReshapeLike(blobBtm0);
1035 m_blobX.ShareData(blobBtm0);
1036 m_blobX.ShareDiff(blobBtm0);
1037 m_log.CHECK_EQ(m_blobX.count(), m_nT * m_nN * m_nInputSize, "The input should be Sequence * Batch * InputSize in length.");
1038
1039 m_blobHx.Reshape(m_nNumLayers, m_nN, m_nHiddenSize, 1);
1040 m_blobCx.Reshape(m_nNumLayers, m_nN, m_nHiddenSize, 1);
1041
1042 m_blobY.Reshape(m_nT, m_nN, m_nHiddenSize, 1);
1043 m_blobHy.Reshape(m_nNumLayers, m_nN, m_nHiddenSize, 1);
1044 m_blobCy.Reshape(m_nNumLayers, m_nN, m_nHiddenSize, 1);
1045
1046 blobTop0.ReshapeLike(m_blobY);
1047 blobTop0.ShareData(m_blobY);
1048 blobTop0.ShareDiff(m_blobY);
1049
1051 {
1052 colTop[1].ReshapeLike(m_blobHy);
1053 colTop[1].ShareData(m_blobHy);
1054 colTop[1].ShareDiff(m_blobHy);
1055
1056 colTop[2].ReshapeLike(m_blobCy);
1057 colTop[2].ShareData(m_blobCy);
1058 colTop[2].ShareDiff(m_blobCy);
1059 }
1060 }
1061
1062 private void reshapeCaffe(BlobCollection<T> colBottom, BlobCollection<T> colTop)
1063 {
1064 Blob<T> blobBtm0 = colBottom[0];
1065 Blob<T> blobBtm1 = colBottom[1];
1066 Blob<T> blobTop0 = colTop[0];
1067
1069 {
1070 blobBtm0 = m_blobBtmData;
1071 blobBtm1 = m_blobBtmClip;
1072 blobTop0 = m_blobTopData;
1073 }
1074
1075 m_blobXInputBlob.ReshapeLike(blobBtm0);
1076 List<int> rgContShape = blobBtm1.shape();
1077 m_blobContInputBlob.Reshape(rgContShape);
1078
1079 if (m_bStaticInput)
1080 m_blobXStaticInputBlob.ReshapeLike(colBottom[2]);
1081
1082 List<BlobShape> rgRecurInputShapes = new List<BlobShape>();
1083 RecurrentInputShapes(rgRecurInputShapes);
1084 m_log.CHECK_EQ(rgRecurInputShapes.Count, m_colRecurInputBlobs.Count, "The number of recurrent input shapes must equal the number of recurrent input blobs!");
1085
1086 for (int i = 0; i < rgRecurInputShapes.Count; i++)
1087 {
1088 m_colRecurInputBlobs[i].Reshape(rgRecurInputShapes[i]);
1089 }
1090
1091 m_unrolledNet.Reshape();
1092
1093 m_blobXInputBlob.ShareData(blobBtm0);
1094 m_blobXInputBlob.ShareDiff(blobBtm0);
1095 m_blobContInputBlob.ShareData(blobBtm1);
1096
1097 int nStaticInput = 0;
1098
1099 if (m_bStaticInput)
1100 {
1101 nStaticInput = 1;
1102 m_blobXStaticInputBlob.ShareData(colBottom[2]);
1103 m_blobXStaticInputBlob.ShareDiff(colBottom[2]);
1104 }
1105
1106 if (m_bExposeHiddenInput)
1107 {
1108 int nBottomOffset = 2 + nStaticInput;
1109 for (int i = nBottomOffset, j = 0; i < colBottom.Count; i++, j++)
1110 {
1111 m_log.CHECK(Utility.Compare<int>(m_colRecurInputBlobs[j].shape(), colBottom[i].shape()), "Shape mismatch - recur_input_blobs_[" + j.ToString() + "]: '" + m_colRecurInputBlobs[j].shape_string + "' vs. bottom[" + i.ToString() + "]: '" + colBottom[i].shape_string + "'");
1112 m_colRecurInputBlobs[j].ShareData(colBottom[i]);
1113 }
1114 }
1115
1116 for (int i = 0; i < m_colOutputBlobs.Count; i++)
1117 {
1118 if (i == 0)
1119 {
1120 blobTop0.ReshapeLike(m_colOutputBlobs[i]);
1121 blobTop0.ShareData(m_colOutputBlobs[i]);
1122 blobTop0.ShareDiff(m_colOutputBlobs[i]);
1123 }
1124 else
1125 {
1126 colTop[i].ReshapeLike(m_colOutputBlobs[i]);
1127 colTop[i].ShareData(m_colOutputBlobs[i]);
1128 colTop[i].ShareDiff(m_colOutputBlobs[i]);
1129 }
1130 }
1131
1132 if (m_bExposeHiddenOutput)
1133 {
1134 int nTopOffset = m_colOutputBlobs.Count;
1135 for (int i = nTopOffset, j = 0; i < colTop.Count; i++, j++)
1136 {
1137 colTop[i].ReshapeLike(m_colRecurOutputBlobs[j]);
1138 colTop[i].ShareData(m_colRecurOutputBlobs[j]);
1139 colTop[i].ShareDiff(m_colRecurOutputBlobs[j]);
1140 }
1141 }
1142 }
1143
1147 public virtual void Reset()
1148 {
1149 for (int i = 0; i < m_colRecurOutputBlobs.Count; i++)
1150 {
1151 m_colRecurOutputBlobs[i].SetData(0);
1152 }
1153 }
1154
1158 public override int MinBottomBlobs
1159 {
1160 get
1161 {
1162 int nMinBottoms = 2;
1163
1165 {
1166 List<string> rgInputs = new List<string>();
1167 RecurrentInputBlobNames(rgInputs);
1168 nMinBottoms += rgInputs.Count;
1169 nMinBottoms -= 1;
1170 }
1171
1172 return nMinBottoms;
1173 }
1174 }
1175
1179 public override int MaxBottomBlobs
1180 {
1181 get { return MinBottomBlobs + 1; }
1182 }
1183
1187 //public override int MinTopBlobs
1188 //{
1189 // get { return 1; }
1190 //}
1191
1195 public override int ExactNumTopBlobs
1196 {
1197 get
1198 {
1199 int nNumTops = 1; // MinTopBlobs;
1200
1202 {
1203 List<string> rgOutputs = new List<string>();
1204 RecurrentOutputBlobNames(rgOutputs);
1205 nNumTops += rgOutputs.Count;
1206 }
1207
1208 return nNumTops;
1209 }
1210 }
1211
1217 public override bool AllowForceBackward(int nBottomIdx)
1218 {
1219 // Can't propagate to sequence continuation indicators.
1220 return (nBottomIdx != 1) ? true : false;
1221 }
1222
1228 protected abstract void FillUnrolledNet(NetParameter net_param);
1229
1236 protected abstract void RecurrentInputBlobNames(List<string> rgNames);
1237
1244 protected abstract void RecurrentInputShapes(List<BlobShape> rgShapes);
1245
1252 protected abstract void RecurrentOutputBlobNames(List<string> rgNames);
1253
1261 protected abstract void OutputBlobNames(List<string> rgNames);
1262
1264 protected override void setup_internal_blobs(BlobCollection<T> col)
1265 {
1266 if (col.Count > 0)
1267 return;
1268
1269 if (m_blobCx != null)
1270 col.Add(m_blobCx);
1271
1272 if (m_blobHx != null)
1273 col.Add(m_blobHx);
1274
1275 if (m_blobCy != null)
1276 col.Add(m_blobCy);
1277
1278 if (m_blobHy != null)
1279 col.Add(m_blobHy);
1280 }
1281
1329 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
1330 {
1332 {
1333 addBtmTop(colBottom[0], m_blobBtmData);
1334 m_transposeData.Forward(m_colBtm, m_colTop);
1335 addBtmTop(colBottom[1], m_blobBtmClip);
1336 m_transposeClip.Forward(m_colBtm, m_colTop);
1337 }
1338
1340 forward_cudnn(colBottom, colTop);
1341 else
1342 forward_cuda(colBottom, colTop);
1343
1345 {
1346 addBtmTop(m_blobTopData, colTop[0]);
1347 m_transposeData.Forward(m_colBtm, m_colTop);
1348 }
1349 }
1350
1351 private void copy_or_repeat_fwd(Blob<T> bBtm, Blob<T> bTop)
1352 {
1354 {
1355 if (bBtm.count() == bTop.count())
1356 m_cuda.copy(bBtm.count(), bBtm.gpu_data, bTop.mutable_gpu_data);
1357 }
1358 else
1359 {
1360 // Repeat the hidden for each layer
1361 m_log.CHECK_EQ(bBtm.count(bBtm.num_axes - 2), bTop.count(1), "The '" + bBtm.Name.ToString() + "' should have the same shape as '" + bTop.Name.ToString() + "' which has a shape after the first axis = " + bTop.shape_string);
1362 m_cuda.channel_copy(bBtm.count(), 1, 1, bTop.num, bBtm.count(), 0, bTop.mutable_gpu_data, bBtm.gpu_data, DIR.BWD);
1363 for (int i = 1; i < bTop.num; i++)
1364 {
1365 m_cuda.channel_copy(bBtm.count(), 1, 1, bTop.num, bBtm.count(), i, bTop.mutable_gpu_data, bBtm.gpu_data, DIR.BWD);
1366 }
1367 }
1368 }
1369
1370 private void copy_or_repeat_bwd(Blob<T> bBtm, Blob<T> bTop)
1371 {
1373 {
1374 m_log.CHECK_EQ(bBtm.count(), bTop.count(), "The '" + bBtm.Name.ToString() + "' should have the same shape as '" + bTop.Name.ToString() + "' which has a shape = " + bTop.shape_string);
1375 m_cuda.copy(bBtm.count(), bTop.gpu_diff, bBtm.mutable_gpu_diff);
1376 }
1377 else
1378 {
1379 // Repeat the hidden for each layer
1380 m_log.CHECK_EQ(bBtm.count(), bTop.count(1), "The '" + bBtm.Name.ToString() + "' should have the same shape as '" + bTop.Name.ToString() + "' which has a shape after the first axis = " + bTop.shape_string);
1381 m_cuda.channel_copy(bBtm.count(), 1, 1, bTop.num, bBtm.count(), 0, bTop.gpu_diff, bBtm.mutable_gpu_diff, DIR.FWD);
1382
1383 for (int i = 1; i < bTop.num; i++)
1384 {
1385 m_cuda.channel_add(bBtm.count(), 1, 1, bTop.num, bBtm.count(), i, bTop.gpu_diff, bBtm.mutable_gpu_diff, DIR.FWD);
1386 }
1387 }
1388 }
1389
1390 private void forward_cudnn(BlobCollection<T> colBottom, BlobCollection<T> colTop)
1391 {
1392 if (m_bUseCudnnRnn8)
1393 forward_cudnnRnn8(colBottom, colTop);
1394 else
1395 forward_cudnnRnn(colBottom, colTop);
1396 }
1397
1398 private void forward_cudnnRnn8(BlobCollection<T> colBottom, BlobCollection<T> colTop)
1399 {
1400 if (colBottom.Count > 2)
1401 {
1402 // Allow for setting initial state used with cuDnn LSTM
1403 if (colBottom.Count > 2)
1404 copy_or_repeat_fwd(colBottom[2], m_blobHx);
1405
1406 if (colBottom.Count > 3)
1407 copy_or_repeat_fwd(colBottom[3], m_blobCx);
1408
1409 m_blobHy.CopyFrom(m_blobHx); // initialized with previous state in LayerSetup when colBottom.Count > 3
1410 m_blobCy.CopyFrom(m_blobCx); // initialized with previous state in LayerSetup when colBottom.Count > 2
1411 }
1412
1413 m_cuda.Rnn8Forward(m_hCuDnn,
1414 m_hRnn8,
1415 m_blobX.gpu_data,
1416 m_blobY.mutable_gpu_data,
1417 m_blobHx.gpu_data,
1418 m_blobHy.mutable_gpu_data,
1419 m_blobCx.gpu_data,
1420 m_blobCy.mutable_gpu_data,
1421 m_blobWts.gpu_data,
1422 m_hWorkspace,
1423 m_hReserved);
1424 }
1425
1426 private void forward_cudnnRnn(BlobCollection<T> colBottom, BlobCollection<T> colTop)
1427 {
1428 Blob<T> blobBtm1 = colBottom[1];
1430 blobBtm1 = m_blobBtmClip;
1431
1432 double dfClip = Utility.ConvertVal<T>(blobBtm1.GetData(0));
1433
1434 if (dfClip > 0 || colBottom.Count > 2)
1435 {
1436 // Allow for setting initial state used with cuDnn LSTM
1437 if (colBottom.Count > 2)
1438 copy_or_repeat_fwd(colBottom[2], m_blobHy);
1439
1440 if (colBottom.Count > 3)
1441 copy_or_repeat_fwd(colBottom[3], m_blobCy);
1442
1443 m_blobCx.CopyFrom(m_blobCy); // initialized with previous state in LayerSetup when colBottom.Count > 2
1444 m_blobHx.CopyFrom(m_blobHy); // initialized with previous state in LayerSetup when colBottom.Count > 3
1445 }
1446
1447 m_cuda.RnnForward(m_hCuDnn,
1448 m_hRnnDesc,
1449 m_hXDesc,
1450 m_blobX.gpu_data,
1451 m_hHxDesc,
1452 m_blobHx.gpu_data,
1453 m_hCxDesc,
1454 m_blobCx.gpu_data,
1455 m_hWeightDesc,
1456 m_blobWts.gpu_data,
1457 m_hYDesc,
1458 m_blobY.mutable_gpu_data,
1459 m_hHyDesc,
1460 m_blobHy.mutable_gpu_data,
1461 m_hCyDesc,
1462 m_blobCy.mutable_gpu_data,
1463 m_hWorkspace,
1464 m_nWorkspaceSizeInBytes,
1465 m_hReserved,
1466 m_nReservedSizeInBytes,
1467 (m_phase == Phase.TRAIN) ? true : false);
1468
1469 // Tops are shared with cy and hy in Reshape
1470 }
1471
1472 private void forward_cuda(BlobCollection<T> colBottom, BlobCollection<T> colTop)
1473 {
1474 // Hacky fix for test time... reshare all the shared blobs.
1475 // TODO: somehow make this work non-hackily.
1476 //if (m_phase == Phase.TEST || m_phase == Phase.RUN)
1477 // m_unrolledNet.ShareWeights();
1478
1479 m_log.CHECK_EQ(m_colRecurInputBlobs.Count, m_colRecurOutputBlobs.Count, "The recurrent input and output blobs must have the same count.");
1480
1481 if (!m_bExposeHiddenInput)
1482 {
1483 // Copy timestep T to timestep 0
1484 for (int i = 0; i < m_colRecurInputBlobs.Count; i++)
1485 {
1486 int nCount = m_colRecurInputBlobs[i].count();
1487 m_log.CHECK_EQ(nCount, m_colRecurOutputBlobs[i].count(), "The input and output blob at " + i.ToString() + " must have the same count.");
1488 long hTimestep_T_Data = m_colRecurOutputBlobs[i].gpu_data;
1489 long hTimestep_0_Data = m_colRecurInputBlobs[i].mutable_gpu_data;
1490 m_cuda.copy(nCount, hTimestep_T_Data, hTimestep_0_Data);
1491 }
1492 }
1493
1494 m_unrolledNet.ForwardFromTo(0, m_nLastLayerIndex);
1495
1496 // Tops are shared with cy and hy in Reshape
1497 }
1498
1505 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
1506 {
1508 {
1509 addBtmTop(m_blobTopData, colTop[0]);
1510 m_transposeData.Backward(m_colTop, rgbPropagateDown, m_colBtm);
1511 }
1512
1514 backward_cudnn(colTop, rgbPropagateDown, colBottom);
1515 else
1516 backward_cuda(colTop, rgbPropagateDown, colBottom);
1517
1519 {
1520 addBtmTop(colBottom[0], m_blobBtmData);
1521 m_transposeData.Backward(m_colTop, rgbPropagateDown, m_colBtm);
1522 }
1523 }
1524
1525 private void backward_cudnn(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
1526 {
1527 if (m_bUseCudnnRnn8)
1528 backward_cudnnRnn8(colTop, rgbPropagateDown, colBottom);
1529 else
1530 backward_cudnnRnn(colTop, rgbPropagateDown, colBottom);
1531 }
1532
1533 private void backward_cudnnRnn8(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
1534 {
1535 // Copy top diffs to timestep T diffs
1536 long hhYDiff = 0;
1537 long hcYDiff = 0;
1538
1539 if (colTop.Count > 2)
1540 {
1541 // Copy state diffs back to previous LSTM
1542 if (colTop.Count > 1)
1543 {
1544 m_log.CHECK_EQ(colTop[1].count(), m_blobHy.count(), "The bottom(1) should have the same shape as 'hy' which has a shape = " + m_blobHy.shape_string);
1545 m_blobHy.CopyFrom(colTop[1], true);
1546 hhYDiff = m_blobHy.gpu_diff;
1547 }
1548 if (colTop.Count > 2)
1549 {
1550 m_log.CHECK_EQ(colTop[2].count(), m_blobCy.count(), "The bottom(2) should have the same shape as 'cy' which has a shape = " + m_blobCy.shape_string);
1551 m_blobCy.CopyFrom(colTop[2], true);
1552 hcYDiff = m_blobCy.gpu_diff;
1553 }
1554 }
1555
1556 m_cuda.Rnn8Backward(m_hCuDnn,
1557 m_hRnn8,
1558 m_blobY.gpu_data,
1559 m_blobY.gpu_diff,
1560 m_blobX.gpu_data,
1561 m_blobX.mutable_gpu_diff,
1562 m_blobHx.gpu_data,
1563 hhYDiff,
1564 m_blobHx.mutable_gpu_diff,
1565 m_blobCx.gpu_data,
1566 hcYDiff,
1567 m_blobCx.mutable_gpu_diff,
1568 m_blobWts.gpu_data,
1569 m_blobWts.mutable_gpu_diff,
1570 m_hWorkspace,
1571 m_hReserved);
1572
1573 // Copy timestep 0 diff to bottom diffs
1574 if (colBottom.Count > 2)
1575 {
1576 // Copy state diffs back to previous LSTM
1577 if (colBottom.Count > 2)
1578 copy_or_repeat_bwd(colBottom[2], m_blobHx);
1579
1580 if (colBottom.Count > 3)
1581 copy_or_repeat_bwd(colBottom[3], m_blobCx);
1582 }
1583 }
1584
1585 private void backward_cudnnRnn(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
1586 {
1587 if (rgbPropagateDown[1] && !m_bWarningShown)
1588 {
1589 m_log.WriteLine("WARNING: Cannot backpropagate to sequence indicators, sequence backprop will be ignored.");
1590 m_bWarningShown = true;
1591 }
1592
1593 // Copy top diffs to timestep T diffs
1594 if (colTop.Count > 2)
1595 {
1596 // Copy state diffs back to previous LSTM
1597 if (colTop.Count > 1)
1598 {
1599 m_log.CHECK_EQ(colTop[1].count(), m_blobHy.count(), "The bottom(1) should have the same shape as 'hy' which has a shape = " + m_blobHy.shape_string);
1600 m_blobHy.CopyFrom(colTop[1], true);
1601 }
1602 if (colTop.Count > 2)
1603 {
1604 m_log.CHECK_EQ(colTop[2].count(), m_blobCy.count(), "The bottom(2) should have the same shape as 'cy' which has a shape = " + m_blobCy.shape_string);
1605 m_blobCy.CopyFrom(colTop[2], true);
1606 }
1607 }
1608
1609 m_cuda.RnnBackwardData(m_hCuDnn,
1610 m_hRnnDesc,
1611 m_hYDesc,
1612 m_blobY.gpu_data,
1613 m_blobY.gpu_diff,
1614 m_hHyDesc,
1615 m_blobHy.gpu_diff,
1616 m_hCyDesc,
1617 m_blobCy.gpu_diff,
1618 m_hWeightDesc,
1619 m_blobWts.gpu_data,
1620 m_hHxDesc,
1621 m_blobHx.gpu_data,
1622 m_hCxDesc,
1623 m_blobCx.gpu_data,
1624 m_hXDesc,
1625 m_blobX.mutable_gpu_diff,
1626 m_hHxDesc,
1627 m_blobHx.mutable_gpu_diff,
1628 m_hCxDesc,
1629 m_blobCx.mutable_gpu_diff,
1630 m_hWorkspace,
1631 m_nWorkspaceSizeInBytes,
1632 m_hReserved,
1633 m_nReservedSizeInBytes);
1634 // cudnnBackwardWeights adds to the data in weight diff.
1635 m_blobWts.SetDiff(0);
1636
1637 m_cuda.RnnBackwardWeights(m_hCuDnn,
1638 m_hRnnDesc,
1639 m_hXDesc,
1640 m_blobX.gpu_data,
1641 m_hHxDesc,
1642 m_blobHx.gpu_data,
1643 m_hYDesc,
1644 m_blobY.gpu_data,
1645 m_hWorkspace,
1646 m_nWorkspaceSizeInBytes,
1647 m_hWeightDesc,
1648 m_blobWts.mutable_gpu_diff,
1649 m_hReserved,
1650 m_nReservedSizeInBytes);
1651
1652 // Copy timestep 0 diff to bottom diffs
1653 if (colBottom.Count > 2)
1654 {
1655 // Copy state diffs back to previous LSTM
1656 if (colBottom.Count > 2)
1657 copy_or_repeat_bwd(colBottom[2], m_blobHx);
1658
1659 if (colBottom.Count > 3)
1660 copy_or_repeat_bwd(colBottom[3], m_blobCx);
1661 }
1662 }
1663
1664 private void backward_cuda(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
1665 {
1666 m_log.CHECK(!rgbPropagateDown[1], "Cannot backpropagate to sequence indicators.");
1667
1668 // Copy top diffs to timestep T diffs (done automatically with tops sharing diffs)
1669
1670 // TODO: skip backpropagation to inputs and parameters inside the unrolled
1671 // net according to propagate_down[0] and propagate_down[2]. For now just
1672 // backprop to inputs and parameters unconditionally, as either the inputs or
1673 // the parameters do need backward (or Net would have set
1674 // layer_needs_backward[i] = false for this layer).
1675 m_unrolledNet.Backward(m_nLastLayerIndex);
1676
1677 // Copy timestep 0 diff to bottom diffs
1678 int nCount = (m_bStaticInput) ? 3 : 2;
1679 if (colBottom.Count > nCount)
1680 {
1681 // Copy state diffs back to previous LSTM
1682 if (colBottom.Count > nCount)
1683 {
1684 m_log.CHECK_EQ(colBottom[nCount].count(), m_blobHx.count(), "The bottom(" + nCount.ToString() + ") should have the same shape as 'hx' which has a shape = " + m_blobHx.shape_string);
1685 colBottom[nCount].CopyFrom(m_blobHx, true);
1686 }
1687 if (colBottom.Count > nCount+1)
1688 {
1689 m_log.CHECK_EQ(colBottom[nCount + 1].count(), m_blobCx.count(), "The bottom(" + (nCount + 1).ToString() + ") should have the same shape as 'cx' which has a shape = " + m_blobCx.shape_string);
1690 colBottom[nCount + 1].CopyFrom(m_blobCx, true);
1691 }
1692 }
1693 }
1694 }
1695}
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
Definition: CancelEvent.cs:17
The Log class provides general output in text form.
Definition: Log.cs:13
void CHECK(bool b, string str)
Test a flag for true.
Definition: Log.cs:227
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
Definition: Log.cs:394
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
Definition: Log.cs:287
The Utility class provides general utility funtions.
Definition: Utility.cs:35
The BlobCollection contains a list of Blobs.
BlobCollection()
The BlobCollection constructor.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void Clear(bool bDispose=false)
Remove all items from the collection.
void ReshapeLike(BlobCollection< T > src)
Reshapes all blobs in the collection to the sizes of the source.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
void CopyFrom(BlobCollection< T > bSrc, bool bCopyDiff=false)
Copy the data or diff from another BlobCollection into this one.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
Definition: Blob.cs:800
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
Definition: Blob.cs:1922
void ShareData(Blob< T > b)
Set the data to point to the data of the other blob – useful in Layers which simply perform a copy in...
Definition: Blob.cs:1813
int num_axes
Returns the number of axes in the Blob.
Definition: Blob.cs:705
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487
string shape_string
Returns a string describing the Blob's shape.
Definition: Blob.cs:657
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442
bool reshape_when_sharing
When true, this Blob is reshaped to the source when sharing the source data (default = false).
Definition: Blob.cs:1803
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
Definition: Blob.cs:903
List< int > shape()
Returns an array where each element contains the shape of an axis of the Blob.
Definition: Blob.cs:684
T GetData(int nIdx)
Returns the data at a given flat index within the Blob.
Definition: Blob.cs:1893
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739
void ShareDiff(Blob< T > b)
Set the diff to point to the diff of the other blob – useful in Layers which simply perform a copy in...
Definition: Blob.cs:1832
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1541
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
Definition: Blob.cs:1981
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
Definition: Blob.cs:792
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
The GetWorkBlobArgs are passed to the Layer::OnGetWorkBlob event which is supported for debugging onl...
Definition: EventArgs.cs:91
Connects Layer's together into a direct acrylic graph (DAG) specified by a NetParameter
Definition: Net.cs:23
List< Layer< T > > layers
Returns the layers.
Definition: Net.cs:2003
void Reshape()
Reshape all layers from the bottom to the top.
Definition: Net.cs:1800
double ForwardFromTo(int nStart=0, int nEnd=int.MaxValue)
The FromTo variant of forward and backward operate on the (topological) ordering by which the net is ...
Definition: Net.cs:1402
BlobCollection< T > parameters
Returns the parameters.
Definition: Net.cs:2085
List< string > layer_names
Returns the layer names.
Definition: Net.cs:1979
BlobCollection< T > input_blobs
Returns the collection of input Blobs.
Definition: Net.cs:2201
void set_debug_info(bool bVal)
Sets the debug information flag.
Definition: Net.cs:2330
void Backward(int nStart=int.MaxValue, int nEnd=0)
The network backward should take no input and output, since it solely computes the gradient w....
Definition: Net.cs:1499
virtual void Dispose(bool bDisposing)
Releases all resources (GPU and Host) used by the Net.
Definition: Net.cs:184
Blob< T > blob_by_name(string strName, bool bThrowExceptionOnError=true)
Returns a blob given its name.
Definition: Net.cs:2245
List< int > param_owners
Returns the list of parameter owner indexes.
Definition: Net.cs:2149
List< string > param_display_names
Returns the list of parameter display names.
Definition: Net.cs:2157
Abstract Filler class used to fill blobs with values.
Definition: Filler.cs:19
void Fill(Blob< T > b)
Fill the blob with values based on the actual filler used.
Definition: Filler.cs:50
static Filler< T > Create(CudaDnn< T > cuda, Log log, FillerParameter p)
Create a new Filler instance.
Definition: Filler.cs:79
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
virtual void SetOnDebug(EventHandler< GetWorkBlobArgs< T > > fn)
Set the OnDebug event.
Definition: Layer.cs:370
bool shareParameter(Blob< T > b, List< int > rgMinShape, bool bAllowEndsWithComparison=false)
Attempts to share a parameter Blob if another parameter Blob with the same name and accpetable size i...
Definition: Layer.cs:1152
double Forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Given the bottom (input) Blobs, this function computes the top (output) Blobs and the loss.
Definition: Layer.cs:728
abstract void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Adjust the shapes of top blobs and internal buffers to accomodate the shapes of the bottom blobs.
void Dispose()
Releases all GPU and host resources used by the Layer.
Definition: Layer.cs:180
Phase m_phase
Specifies the Phase under which the Layer is run.
Definition: Layer.cs:51
virtual void ResetOnDebug(EventHandler< GetWorkBlobArgs< T > > fn)
Reset the OnDebug event, disabling it.
Definition: Layer.cs:379
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
void Setup(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Implements common Layer setup functionality.
Definition: Layer.cs:439
static Layer< T > Create(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel, IXDatabaseBase db=null, TransferInput trxinput=null)
Create a new Layer based on the LayerParameter.
Definition: Layer.cs:1468
BlobCollection< T > blobs
Returns the collection of learnable parameter Blobs for the Layer.
Definition: Layer.cs:875
DictionaryMap< bool > m_rgbParamPropagateDown
Specifies whether or not to compute the learnable diff of each parameter Blob.
Definition: Layer.cs:63
LayerParameter convertLayerParam(LayerParameter pChild, LayerParameter pParent)
Called to convert a parent LayerParameterEx, used in blob sharing, with a child layer parameter.
Definition: Layer.cs:1134
The RecurrentLayer is an abstract class for implementing recurrent behavior inside of an unrolled new...
RecurrentLayer(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel)
The RecurrentLayer constructor.
override int ExactNumTopBlobs
Returns the min number of required top (output) Blobs.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Peforms the forward calculation.
override int MaxBottomBlobs
Returns the maximum number of required bottom (input) Blobs: min+1
abstract void RecurrentInputShapes(List< BlobShape > rgShapes)
Fills shapes with the shapes of the recurrent input Blob's. Subclassses should define this – see RNNL...
abstract void RecurrentOutputBlobNames(List< string > rgNames)
Fills names with the names of the Tth timestep recurrent output Blob's. Subclassses should define thi...
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Backward computation.
abstract void RecurrentInputBlobNames(List< string > rgNames)
Fills names with the names of the 0th timestep recurrent input Blob's. Subclasses should define this ...
override void SetOnDebug(EventHandler< GetWorkBlobArgs< T > > fn)
Set the OnDebug event on the unrolled net.
abstract void FillUnrolledNet(NetParameter net_param)
Fills net_param with the recurrent network architecture. Subclasses should define this – see RNNLayer...
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
abstract void OutputBlobNames(List< string > rgNames)
Fills names with the names of the output blobs, concatenated across all timesteps....
override int MinBottomBlobs
Returns the minimum number of required bottom (input) Blobs.
virtual void Reset()
Reset the hidden state of the net by zeroing out all recurrent outputs.
int m_nN
The number of independent streams to process simultaneously.
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
int m_nT
The number of timesteps in the layer's input, and the number of timesteps over which to backpropagate...
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override void ResetOnDebug(EventHandler< GetWorkBlobArgs< T > > fn)
Reset the OnDebug event, disabling it on the unrolled net.
bool m_bStaticInput
Whether the layer has a 'static' input copies across all timesteps.
override bool AllowForceBackward(int nBottomIdx)
Returns true for all but the bottom index = 1, for you can't propagate to the sequence continuation i...
override void dispose()
Releases all GPU and host resources used by the Layer.
Specifies the shape of a Blob.
Definition: BlobShape.cs:15
BlobShape()
The BlobShape constructor.
Definition: BlobShape.cs:21
List< int > dim
The blob shape dimensions.
Definition: BlobShape.cs:93
double value
Specifies the value used by 'constant' filler.
double mean
Specifies the mean value to use with the 'gaussian' filler.
string type
Specifies the type of filler to use.
double std
Specifies the standard deviation value to use with the 'gaussian' filler.
List< BlobShape > shape
Define N shapes to set a shape for each top. Define 1 shape to set the same shape for every top....
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
List< double > loss_weight
Specifies the loss weight.
LayerType type
Specifies the type of this LayerParameter.
InputParameter input_param
Returns the parameter set when initialized with LayerType.INPUT
List< string > top
Specifies the active top connections (in the bottom, out the top)
TransposeParameter transpose_param
Returns the parameter set when initialized with LayerType.TRANSPOSE
RecurrentParameter recurrent_param
Returns the parameter set when initialized with LayerType.RECURRENT
List< string > bottom
Specifies the active bottom connections (in the bottom, out the top).
LayerType
Specifies the layer type.
Specifies the parameters use to create a Net
Definition: NetParameter.cs:18
List< LayerParameter > layer
The layers that make up the net. Each of their configurations, including connectivity and behavior,...
bool use_cudnn_rnn8_if_supported
Specifies to use cuDnn RNN8 if supported (requires cuDnn 8.0 or higher), (default = false).
bool debug_info
Whether to enable displaying debug info in the unrolled recurrent net.
uint num_layers
The number of LSTM layers to implement.
uint num_output
The dimension of the output (and usually hidden state) representation – must be explicitly set to non...
FillerParameter weight_filler
The filler for the weights.
bool expose_hidden_output
Whether to add as additional outputs (tops) the final timestep hidden state blobs....
bool bidirectional
Specifies whether the network is bidirectional (true) or unidirectional (false - default).
bool batch_first
The input and outputs are shaped with the batch in the first dimension.
long dropout_seed
Specifies the seed used by cuDnn for random number generation.
bool useCudnn()
Queries whether or not to use NVIDIA's cuDnn.
double dropout_ratio
Specifies the dropout ratio. (e.g. the probability that values will be dropped out and set to zero....
FillerParameter bias_filler
The filler for the bias.
bool cudnn_enable_tensor_cores
Specifies to enable the CUDA tensor cores when performing the rnn operations which is faster but not ...
bool auto_repeat_hidden_states_across_layers
Auto repeat the hidden and cell states so that a separate state is fed to each layer.
bool expose_hidden_input
Whether to add as additional inputs (bottoms) the initial hidden state blobss. The number of addition...
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
Phase
Defines the Phase under which to run a Net.
Definition: Interfaces.cs:61
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
RNN_DATALAYOUT
Specifies the RNN data layout of the data input.
Definition: CudaDnn.cs:424
DIR
Defines the direction of data flow.
Definition: CudaDnn.cs:22
RNN_MODE
Specifies the RNN mode to use with the Recurrent Layer when using the cuDNN engine.
Definition: CudaDnn.cs:376
RNN_BIAS_MODE
Specifies the RNN bias mode to use with the Recurrent Layer when using the cuDNN engine.
Definition: CudaDnn.cs:401
RNN_DIRECTION
Specifies the RNN directional used.
Definition: CudaDnn.cs:443
RNN_FILLER_TYPE
Defines the filler types used to fill the RNN8 weights.
Definition: CudaDnn.cs:458
The MyCaffe.db.image namespace contains all image database related classes.
Definition: Database.cs:18
The MyCaffe.fillers namespace contains all fillers including the Filler class.
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12