MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
Net.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading;
6using System.IO;
7using System.Diagnostics;
8using MyCaffe.basecode;
9using MyCaffe.db.image;
10using MyCaffe.param;
11using MyCaffe.data;
12using MyCaffe.layers;
13using MyCaffe.fillers;
14
15namespace MyCaffe.common
16{
22 public class Net<T> : IDisposable
23 {
24 NetParameter m_param;
25 CudaDnn<T> m_cuda;
26 Log m_log;
27
28 // The network name.
29 string m_strName;
30
31 // The phase: TRAIN or TEST
32 Phase m_phase = Phase.NONE;
33 Phase m_phaseOriginal = Phase.NONE;
34
35 // Individual layers in the net.
36 List<Layer<T>> m_rgLayers = new List<Layer<T>>();
37 List<string> m_rgstrLayerNames = new List<string>();
38 DictionaryEx<string, int> m_rgLayerNamesIndex = new DictionaryEx<string, int>(0);
39 List<bool> m_rgbLayerNeedBackward = new List<bool>();
40
41 // The blobs storing intermediate results between the layer.
42 BlobCollection<T> m_colBlobs = new BlobCollection<T>();
43 List<string> m_rgstrBlobNames = new List<string>();
44 DictionaryEx<string, int> m_rgBlobNamesIndex = new DictionaryEx<string, int>(0);
45 List<bool> m_rgbBlobNeedBackward = new List<bool>();
46
47 // The bottom vecs storing the vectors containing the input for each layer.
48 // They don't actually host the blobs (blobs_ does), so we simply store
49 // the reference.
50 List<BlobCollection<T>> m_rgcolBottomVecs = new List<BlobCollection<T>>();
51 List<List<int>> m_rgrgnBottomIdVecs;
52 List<List<bool>> m_rgrgbBottomNeedBackward = new List<List<bool>>();
53
54 // The top vecs stores the vecotrs containing the output of each layer.
55 List<BlobCollection<T>> m_rgcolTopVecs = new List<BlobCollection<T>>();
56 List<List<int>> m_rgrgnTopIdVecs = new List<List<int>>();
57
58 // Vector of weight in the loss (or objective) function of each net blob,
59 // indexed by blob_id.
60 List<double> m_rgdfBlobLossWeights = new List<double>();
61 List<List<int>> m_rgrgnParamIdVecs = new List<List<int>>();
62 List<int> m_rgnParamOwners = new List<int>();
63 List<string> m_rgstrParamDisplayNames = new List<string>();
64 List<KeyValuePair<int, int>> m_rgParamLayerIndices = new List<KeyValuePair<int, int>>();
65 DictionaryEx<string, int> m_rgParamNamesIndex = new DictionaryEx<string, int>(0);
66
67 // blob indices for the input and the output of the net.
68 List<int> m_rgnNetInputBlobIndices = new List<int>();
69 List<int> m_rgnNetOutputBlobIndices = new List<int>();
70 BlobCollection<T> m_colNetInputBlobs = new BlobCollection<T>();
71 BlobCollection<T> m_colNetOutputBlobs = new BlobCollection<T>();
72
73 // The parameters in the network.
74 BlobCollection<T> m_colParams = new BlobCollection<T>();
75 BlobCollection<T> m_colLearnableParams = new BlobCollection<T>();
76
77 // The mapping from params -> learnable_params : we have
78 // learnable_param_ids.Count == params.Count,
79 // and learnable_params[learnable_param_ids[i]] == params[i]
80 // if and only if params[i] is an 'owner'; otherwise params[i] is a sharer
81 // and learnable_params[learnable_param_ids[i]] gives its owner.
82 List<int> m_rgnLearnableParamIds = new List<int>();
83
84 // The learning rate multipliers from learnable params.
85 List<double?> m_rgdfParamsLr = new List<double?>();
86
87 // The weight decay multipliers for learnable params.
88 List<double?> m_rgdfParamsWeightDecay = new List<double?>();
89
90 // The bytes of memory used by this net
91 long m_lMemoryUsed = 0;
92
93 // Whether to compute and display debug info for the net.
94 bool m_bDebugInfo = false;
95
96 // The in-memory database passed through to the data layer(s).
97 IXDatabaseBase m_db = null;
98
99 // Cancel event used to cancel training and testing.
100 CancelEvent m_evtCancel;
101
102 // Store the last forward input to the data layer, if any (currently only supported by BatchDataInput).
103 BatchInput m_lastBatchInput = null;
104
105 // When enabled, the best result mask zero's out all data items except for those that have the greatest value
106 string m_strBestResultTargetNodeToMask = null;
107 int m_nBestResultCount = 5;
108 BEST_RESULT_TYPE m_nBestResultType = BEST_RESULT_TYPE.BY_CHANNEL;
109
110 long m_hWorkspaceData = 0; // shared among the layers, only grows in size.
111 ulong m_lWorkspaceSizeInBytes = 0;
112
113 Net<T> m_sharedNet = null;
114 bool m_bBreakOnFirstNan = false;
115 bool m_bDetectDetailedNans = false;
116 bool m_bEnableLayerDebugging = false;
117 Blob<T> m_blobWork = null;
118 List<Layer<T>> m_rgConnectedLayers = new List<Layer<T>>();
119 Blob<T> m_debugBlob = null;
120 int m_nLastNonFrozenLayerIdx = 0;
121 string m_strDataSource = null;
122 Layer<T> m_labelMappingLayer = null;
123 bool m_bFirstForwardInputWarning = true;
124
128 public event EventHandler<WorkspaceArgs> OnGetWorkspace;
132 public event EventHandler<WorkspaceArgs> OnSetWorkspace;
136 public event EventHandler<GetIterationArgs> OnGetIteration;
137
138#pragma warning disable 1591
139
140 public enum BEST_RESULT_TYPE
141 {
142 BY_CHANNEL,
143 BY_WEIGHT
144 }
145
146#pragma warning restore 1591
147
161 public Net(CudaDnn<T> cuda, Log log, NetParameter p, CancelEvent evtCancel, IXDatabaseBase db, Phase phaseOverride = Phase.NONE, AutoResetEvent evtTrainingCompleted = null, Net<T> sharedNet = null, onGetWorkspace getws = null, onSetWorkspace setws = null)
162 {
163 m_sharedNet = sharedNet;
164 m_db = db;
165 m_cuda = cuda;
166 m_log = log;
167 m_blobWork = new Blob<T>(cuda, log);
168
169 m_evtCancel = evtCancel;
170
171 if (getws != null)
172 OnGetWorkspace += new EventHandler<WorkspaceArgs>(getws);
173
174 if (setws != null)
175 OnSetWorkspace += new EventHandler<WorkspaceArgs>(setws);
176
177 Init(p, phaseOverride, evtTrainingCompleted);
178 }
179
184 protected virtual void Dispose(bool bDisposing)
185 {
186 foreach (Layer<T> layer in m_rgConnectedLayers)
187 {
188 if (layer is DataLayer<T>)
189 ((DataLayer<T>)layer).Disconnect();
190 }
191
192 m_rgConnectedLayers.Clear();
193
194 foreach (Layer<T> layer in m_rgLayers)
195 {
196 layer.Dispose();
197 }
198
199 m_rgLayers.Clear();
200
201 if (m_colBlobs != null)
202 {
203 m_colBlobs.Dispose();
204 m_colBlobs = null;
205 }
206
207 foreach (BlobCollection<T> b in m_rgcolBottomVecs)
208 {
209 b.Dispose();
210 }
211
212 m_rgcolBottomVecs.Clear();
213
214 foreach (BlobCollection<T> b in m_rgcolTopVecs)
215 {
216 b.Dispose();
217 }
218
219 m_rgcolTopVecs.Clear();
220
221 if (m_colNetOutputBlobs != null)
222 {
223 m_colNetOutputBlobs.Dispose();
224 m_colNetOutputBlobs = null;
225 }
226
227 if (m_colParams != null)
228 {
229 m_colParams.Dispose();
230 m_colParams = null;
231 }
232
233 if (m_colLearnableParams != null)
234 {
235 m_colLearnableParams.Dispose();
236 m_colLearnableParams = null;
237 }
238
239 if (m_hWorkspaceData != 0)
240 {
241 m_cuda.DisableGhostMemory();
242 m_cuda.FreeMemory(m_hWorkspaceData);
243 m_cuda.ResetGhostMemory();
244 m_hWorkspaceData = 0;
245 m_lWorkspaceSizeInBytes = 0;
246 }
247
248 if (m_blobWork != null)
249 {
250 m_blobWork.Dispose();
251 m_blobWork = null;
252 }
253 }
254
258 public void Dispose()
259 {
260 if (m_debugBlob != null)
261 {
262 m_debugBlob.Dispose();
263 m_debugBlob = null;
264 }
265
266 Dispose(true);
267 }
268
269 private void scanForRecommendations(NetParameter p)
270 {
271 for (int i = 0; i < p.layer.Count; i++)
272 {
273 if (p.layer[i].type == LayerParameter.LayerType.LSTM_SIMPLE)
274 m_log.WriteLine("WARNING: Layer '" + p.layer[i].name + "' uses the LSTM_SIMPLE type, we recommend using the LSTM with CUDNN engine instead.");
275
276 if (p.layer[i].type == LayerParameter.LayerType.LSTM && p.layer[i].recurrent_param.engine == EngineParameter.Engine.CAFFE)
277 m_log.WriteLine("WARNING: Layer '" + p.layer[i].name + "' uses the LSTM type with the CAFFE engine (which is quite slow), we recommend using the LSTM with CUDNN engine instead.");
278 }
279 }
280
287 public void Init(NetParameter p, Phase phaseOverride = Phase.NONE, AutoResetEvent evtTrainingCompleted = null)
288 {
289 try
290 {
291 m_bFirstForwardInputWarning = true;
292 m_param = p;
293
294 // Set phase from the state.
295 if (phaseOverride != Phase.NONE)
296 {
297 m_phase = phaseOverride;
298 p.state.phase = m_phase;
299 }
300 else
301 {
302 m_phase = p.state.phase;
303 }
304
305 m_phaseOriginal = m_phase;
306
307 scanForRecommendations(p);
308
309 // Filter layser based on their include/exclude rules and
310 // the current NetState.
311 NetParameter filtered_param = FilterNet(p);
312 m_log.WriteLine("Initializing net from parameters: " + filtered_param.DebugString());
313
314 // Create a copy of filtered_param with splits added where necessary;
315 NetParameter param = InsertSplits(filtered_param);
316
317 // Basically, build all the layers and set up their connections.
318 m_strName = param.name;
319
320 DictionaryEx<string, int> blob_name_to_idx = new DictionaryEx<string, int>(0);
321 List<string> available_blobs = new List<string>();
322
323 m_log.CHECK(param.input_dim.Count == 0 || param.input_shape.Count == 0, "Must specify either input_shape OR depreciated input_dim, not both.");
324
325 if (param.input_dim.Count > 0)
326 {
327 // Depreciated 4D dimensions.
328 m_log.CHECK_EQ(param.input.Count * 4, param.input_dim.Count, "Incorrect inpub blob dimension specification.");
329 }
330 else
331 {
332 m_log.CHECK_EQ(param.input.Count, param.input_shape.Count, "Exactly one input_shape must be specified per input.");
333 }
334
335 m_lMemoryUsed = 0;
336
337 // Set the input blobs
338 for (int input_id = 0; input_id < param.input.Count; input_id++)
339 {
340 int layer_id = -1; // inputs have fake layer ID = -1
341 AppendTop(param, layer_id, input_id, available_blobs, blob_name_to_idx);
342 }
343
344 // For each layer, set up its input and output
345 m_rgcolBottomVecs = new List<BlobCollection<T>>();
346 m_rgcolTopVecs = new List<BlobCollection<T>>();
347 m_rgrgnBottomIdVecs = new List<List<int>>();
348 m_rgrgnParamIdVecs = new List<List<int>>();
349 m_rgrgnTopIdVecs = new List<List<int>>();
350 m_rgrgbBottomNeedBackward = new List<List<bool>>();
351
352 Dictionary<string, Layer<T>> rgSyncLayers = new Dictionary<string, Layer<T>>();
353
354 for (int layer_id = 0; layer_id < param.layer.Count; layer_id++)
355 {
356 m_rgcolBottomVecs.Add(new BlobCollection<T>());
357 m_rgcolTopVecs.Add(new BlobCollection<T>());
358 m_rgrgnBottomIdVecs.Add(new List<int>());
359 m_rgrgnTopIdVecs.Add(new List<int>());
360 m_rgrgnParamIdVecs.Add(new List<int>());
361 m_rgrgbBottomNeedBackward.Add(new List<bool>());
362
363 // Inherit phase from net if unset.
364 if (param.layer[layer_id].phase == Phase.NONE)
365 param.layer[layer_id].phase = m_phase;
366
367 // Setup layer.
368 LayerParameter layer_param = param.layer[layer_id];
369 if (layer_param.propagate_down.Count > 0)
370 m_log.CHECK_EQ(layer_param.propagate_down.Count, layer_param.bottom.Count, "propagate_down param must be specified either 0 or bottom.Count times.");
371
372 //-------------------------------------------
373 // When sharing the blobs of another net
374 // (e.g. The run net does this when also
375 // training, to save memory)
376 // pass the parameters and internal blobs
377 // into the layer_parameter thus allowing
378 // each layer to share blobs as appropriate.
379 //-------------------------------------------
380 LayerParameter layer_paramEx = layer_param;
381 if (m_sharedNet != null)
382 {
383 Layer<T> sharedLayer = m_sharedNet.FindLayer(layer_param.type, layer_param.name);
384 layer_paramEx = new LayerParameterEx<T>(layer_param, m_sharedNet.parameters, m_sharedNet.layer_blobs(layer_param.name), sharedLayer);
385 }
386
387 layer_paramEx.solver_count = m_param.solver_count;
388 layer_paramEx.solver_rank = m_param.solver_rank;
389
390 // Setup layer continued.
391 Layer<T> layer1 = Layer<T>.Create(m_cuda, m_log, layer_paramEx, m_evtCancel, m_db, new TransferInput(getInput, setInput));
392 layer1.OnGetWorkspace += layer_OnGetWorkspace;
393 layer1.OnSetWorkspace += layer_OnSetWorkspace;
394 layer1.OnGetIteration += layer_OnGetIteration;
395
396 if (layer1.type == LayerParameter.LayerType.DATA)
397 m_strDataSource = layer1.layer_param.data_param.source;
398 else if (layer1.type == LayerParameter.LayerType.LABELMAPPING)
399 m_labelMappingLayer = layer1;
400
401 m_rgLayers.Add(layer1);
402
403 m_rgstrLayerNames.Add(layer_param.name);
404 m_log.WriteLine("Creating layer " + layer_param.name);
405
406 bool need_backward = false;
407
408 // Figure out this layer's input and output
409 for (int bottom_id = 0; bottom_id < layer_param.bottom.Count; bottom_id++)
410 {
411 int blob_id = AppendBottom(param, layer_id, bottom_id, available_blobs, blob_name_to_idx);
412
413 // If a blob needs backward, this layer should provide it.
414 need_backward |= m_rgbBlobNeedBackward[blob_id];
415 }
416
417 int num_top = layer_param.top.Count;
418 for (int top_id = 0; top_id < num_top; top_id++)
419 {
420 // Ignore top's named 'null'
421 if (param.layer[layer_id] != null && param.layer[layer_id].top[top_id] == "null")
422 continue;
423
424 AppendTop(param, layer_id, top_id, available_blobs, blob_name_to_idx);
425
426 // Collect Input layer tops as Net inputs.
427 if (layer_param.type == LayerParameter.LayerType.INPUT)
428 {
429 int nBlobID = blobs.Count - 1;
430 m_rgnNetInputBlobIndices.Add(nBlobID);
431 m_colNetInputBlobs.Add(blobs[nBlobID]);
432 }
433 }
434
435 // If the layer specifies that AutoTopBlobs() == true and the LayerParameter
436 // specified fewer than the required number (as specified by
437 // ExactNumTopBlobs() or MinTopBlobs()), allocate them here.
438 Layer<T> layer = m_rgLayers[layer_id];
439 if (layer.AutoTopBlobs)
440 {
441 int needed_num_top = Math.Max(layer.MinTopBlobs, layer.ExactNumTopBlobs);
442
443 while (num_top < needed_num_top)
444 {
445 // Add 'anonymous' top blobs -- do not modify available_blobs or
446 // blob_name_to_idx as we don't want these blbos to be usable as input
447 // to other layers.
448 AppendTop(param, layer_id, num_top, null, null);
449 num_top++;
450 }
451 }
452
453 // After this layer is connected, set it up.
454 m_rgLayers[layer_id].SetNetParameterUsed(param); // used for label mapping
455 m_rgLayers[layer_id].Setup(m_rgcolBottomVecs[layer_id], m_rgcolTopVecs[layer_id]);
456
457 // Setup the layer.
458 m_log.WriteLine("Setting up " + m_rgstrLayerNames[layer_id]);
459
460 for (int top_id = 0; top_id < m_rgcolTopVecs[layer_id].Count; top_id++)
461 {
462 int nIdx = m_rgrgnTopIdVecs[layer_id][top_id];
463
464 if (m_rgdfBlobLossWeights.Count <= nIdx)
465 Utility.Resize<double>(ref m_rgdfBlobLossWeights, nIdx + 1, 0.0);
466
467 double dfLoss = layer.loss(top_id);
468
469 m_rgdfBlobLossWeights[nIdx] = dfLoss;
470
471 if (m_log.IsEnabled)
472 {
473 string strOut = "Top shape: " + m_rgcolTopVecs[layer_id][top_id].shape_string;
474
475 if (dfLoss != 0)
476 strOut += " with loss weight " + dfLoss.ToString();
477
478 m_log.WriteLine(strOut);
479 }
480
481 m_lMemoryUsed += m_rgcolTopVecs[layer_id][top_id].count();
482 }
483
484 m_log.WriteLine("Memory required for data: " + (m_lMemoryUsed * Utility.BaseTypeSize<T>()).ToString());
485
486 int param_size = layer_param.GetParameterCount();
487 int num_param_blobs = m_rgLayers[layer_id].blobs.Count();
488 m_log.CHECK_LE(param_size, num_param_blobs, "Too many params specified for layer " + layer_param.name);
489
490 ParamSpec default_param_spec = new ParamSpec();
491
492 for (int param_id = 0; param_id < num_param_blobs; param_id++)
493 {
494 ParamSpec param_spec = (param_id < param_size) ? layer_param.parameters[param_id] : default_param_spec;
495 bool param_need_backward = (param_spec.lr_mult != 0.0) ? true : false;
496
497 need_backward |= param_need_backward;
498 m_rgLayers[layer_id].set_param_propagate_down(param_id, param_need_backward);
499 }
500
501 for (int param_id = 0; param_id < num_param_blobs; param_id++)
502 {
503 AppendParam(param, layer_id, param_id);
504 }
505
506 // Connect up the synced layers, if any.
507 if (layer_param.type == LayerParameter.LayerType.DATA)
508 {
509 if (layer_param.data_param.synchronize_with != null)
510 {
511 rgSyncLayers.Add(layer_param.data_param.synchronize_with, m_rgLayers[layer_id]);
512 }
513 else
514 {
515 List<KeyValuePair<string, Layer<T>>> rgSyncLayers1 = rgSyncLayers.ToList();
516 for (int i = 0; i < rgSyncLayers1.Count; i++)
517 {
518 if (rgSyncLayers1[i].Key == layer_param.name)
519 {
520 ((DataLayer<T>)layer).Connect((DataLayer<T>)rgSyncLayers1[i].Value);
521 m_rgConnectedLayers.Add(layer);
522 rgSyncLayers.Remove(layer_param.name);
523 }
524 }
525 }
526 }
527
528 // Finally, set the backward flag.
529 m_rgbLayerNeedBackward.Add(need_backward);
530
531 if (need_backward)
532 {
533 for (int top_id = 0; top_id < m_rgrgnTopIdVecs[layer_id].Count; top_id++)
534 {
535 int nIdx = m_rgrgnTopIdVecs[layer_id][top_id];
536 m_rgbBlobNeedBackward[nIdx] = true;
537 }
538 }
539 }
540
541 if (rgSyncLayers.Count > 0)
542 {
543 string strLayer = "";
544
545 foreach (KeyValuePair<string, Layer<T>> kv in rgSyncLayers)
546 {
547 strLayer += kv.Key + ", ";
548 }
549
550 strLayer = strLayer.TrimEnd(',', ' ');
551 m_log.FAIL("The following layers are expected to be marked with 'synchronize_target = True': '" + strLayer + "'.");
552 }
553
554 // Go through the net backwards to determine which blobs contribute to the
555 // loss. We can skip backward computation for blobs that don't contribute
556 // to the loss.
557 // Also checks if all bottom blobs don't need backward computation (possible
558 // because the skip_propagate_down param) and so we can skip backward
559 // computation for the entire layer.
560 List<string> blobs_under_loss = new List<string>();
561 List<string> blobs_skip_backp = new List<string>();
562
563 for (int layer_id = m_rgLayers.Count - 1; layer_id >= 0; layer_id--)
564 {
565 bool layer_contributes_loss = false;
566 bool layer_skip_propagate_down = true;
567
568 for (int top_id = 0; top_id < m_rgcolTopVecs[layer_id].Count; top_id++)
569 {
570 int nIdx = m_rgrgnTopIdVecs[layer_id][top_id];
571 string blob_name = m_rgstrBlobNames[nIdx];
572
573 if (m_rgLayers[layer_id].loss(top_id) != 0 || blobs_under_loss.Contains(blob_name))
574 layer_contributes_loss = true;
575
576 if (!blobs_skip_backp.Contains(blob_name))
577 layer_skip_propagate_down = false;
578
579 if (layer_contributes_loss && !layer_skip_propagate_down)
580 break;
581 }
582
583 // If this layer can skip backward computation, also all of its bottom blobs
584 // don't need backpropagation
585 if (m_rgbLayerNeedBackward[layer_id] && layer_skip_propagate_down)
586 {
587 m_rgbLayerNeedBackward[layer_id] = false;
588
589 for (int bottom_id = 0; bottom_id < m_rgcolBottomVecs[layer_id].Count; bottom_id++)
590 {
591 m_rgrgbBottomNeedBackward[layer_id][bottom_id] = false;
592 }
593 }
594
595 if (!layer_contributes_loss)
596 m_rgbLayerNeedBackward[layer_id] = false;
597
598 if (m_log.IsEnabled)
599 {
600 if (m_rgbLayerNeedBackward[layer_id])
601 m_log.WriteLine(m_rgstrLayerNames[layer_id] + " needs backward computation.");
602 else
603 m_log.WriteLine(m_rgstrLayerNames[layer_id] + " does not need backward computation.");
604 }
605
606 for (int bottom_id = 0; bottom_id < m_rgcolBottomVecs[layer_id].Count; bottom_id++)
607 {
608 if (layer_contributes_loss)
609 {
610 int nIdx = m_rgrgnBottomIdVecs[layer_id][bottom_id];
611 string blob_name = m_rgstrBlobNames[nIdx];
612
613 blobs_under_loss.Add(blob_name);
614 }
615 else
616 {
617 m_rgrgbBottomNeedBackward[layer_id][bottom_id] = false;
618 }
619
620 if (!m_rgrgbBottomNeedBackward[layer_id][bottom_id])
621 {
622 int nIdx = m_rgrgnBottomIdVecs[layer_id][bottom_id];
623 string blob_name = m_rgstrBlobNames[nIdx];
624
625 blobs_skip_backp.Add(blob_name);
626 }
627 }
628 }
629
630 // Handle force_backward if needed.
631 if (param.force_backward)
632 {
633 for (int layer_id = 0; layer_id < m_rgLayers.Count; layer_id++)
634 {
635 m_rgbLayerNeedBackward[layer_id] = true;
636
637 for (int bottom_id = 0; bottom_id < m_rgrgbBottomNeedBackward[layer_id].Count; bottom_id++)
638 {
639 m_rgrgbBottomNeedBackward[layer_id][bottom_id] = m_rgrgbBottomNeedBackward[layer_id][bottom_id] || m_rgLayers[layer_id].AllowForceBackward(bottom_id);
640
641 int nIdx = m_rgrgnBottomIdVecs[layer_id][bottom_id];
642 m_rgbBlobNeedBackward[nIdx] = m_rgbBlobNeedBackward[nIdx] || m_rgrgbBottomNeedBackward[layer_id][bottom_id];
643 }
644
645 for (int param_id = 0; param_id < m_rgLayers[layer_id].blobs.Count; param_id++)
646 {
647 m_rgLayers[layer_id].set_param_propagate_down(param_id, true);
648 }
649 }
650 }
651
652 // In the end, all remaining blobs are considered output blobs.
653 foreach (string blob_name in available_blobs)
654 {
655 m_log.WriteLine("This network produces output " + blob_name);
656 int nIdx = blob_name_to_idx[blob_name];
657 Blob<T> blob = m_colBlobs[nIdx];
658
659 m_colNetOutputBlobs.Add(blob);
660 m_rgnNetOutputBlobIndices.Add(nIdx);
661 }
662
663 for (int blob_id = 0; blob_id < m_rgstrBlobNames.Count; blob_id++)
664 {
665 string blob_name = m_rgstrBlobNames[blob_id];
666 m_rgBlobNamesIndex[blob_name] = blob_id;
667 }
668
669 for (int layer_id = 0; layer_id < m_rgstrLayerNames.Count; layer_id++)
670 {
671 string layer_name = m_rgstrLayerNames[layer_id];
672 m_rgLayerNamesIndex[layer_name] = layer_id;
673 }
674
675 if (m_sharedNet == null)
676 ShareWeights();
677
678 // Set last non frozen layer to optimize back propagation
679 // by only performing back-brpopagation up to the last
680 // non-frozen layer.
681 for (int i = 0; i < m_rgLayers.Count; i++)
682 {
683 m_nLastNonFrozenLayerIdx = i;
684
685 if (!m_rgLayers[i].layer_param.freeze_learning && m_rgLayers[i].layer_param.type != LayerParameter.LayerType.SPLIT)
686 break;
687 }
688
689 LossLayer<T> lossLayer = null;
690 for (int i=m_rgLayers.Count-1; i>=0; i--)
691 {
692 if (m_rgLayers[i] is LossLayer<T>)
693 {
694 lossLayer = m_rgLayers[i] as LossLayer<T>;
695 break;
696 }
697 }
698
699 // Connect any loss events
700 if (lossLayer != null)
701 {
702 for (int i = 0; i < m_rgLayers.Count; i++)
703 {
704 if (m_rgLayers[i].layer_param.connect_loss_event)
705 m_rgLayers[i].ConnectLoss(lossLayer);
706 }
707 }
708
709 m_bDebugInfo = param.debug_info;
710 m_log.WriteLine("Network initialization done.");
711 }
712 catch (Exception excpt)
713 {
714 foreach (Layer<T> layer in m_rgConnectedLayers)
715 {
716 ((DataLayer<T>)layer).Disconnect();
717 }
718
719 m_rgConnectedLayers.Clear();
720
721 foreach (Layer<T> layer in m_rgLayers)
722 {
723 layer.Dispose();
724 }
725
726 m_rgLayers.Clear();
727 m_rgstrLayerNames.Clear();
728 m_rgLayerNamesIndex.Clear();
729 m_rgbBlobNeedBackward.Clear();
730 m_colBlobs.Dispose();
731 m_rgBlobNamesIndex.Clear();
732 m_rgbBlobNeedBackward.Clear();
733 m_rgcolBottomVecs.Clear();
734 m_rgrgbBottomNeedBackward.Clear();
735 m_rgcolTopVecs.Clear();
736 m_rgrgnTopIdVecs.Clear();
737 m_rgdfBlobLossWeights.Clear();
738 m_rgrgnParamIdVecs.Clear();
739 m_rgnParamOwners.Clear();
740 m_rgstrParamDisplayNames.Clear();
741 m_rgParamLayerIndices.Clear();
742 m_rgParamNamesIndex.Clear();
743 m_rgnNetInputBlobIndices.Clear();
744 m_rgnNetOutputBlobIndices.Clear();
745 m_colNetInputBlobs.Clear();
746 m_colNetOutputBlobs.Clear();
747 m_colParams.Clear();
748 m_colLearnableParams.Clear();
749 m_rgnLearnableParamIds.Clear();
750 m_rgdfParamsLr.Clear();
751 m_rgdfParamsWeightDecay.Clear();
752 m_lMemoryUsed = 0;
753 m_bDebugInfo = false;
754 m_db = null;
755 throw excpt;
756 }
757 }
758
759 private void layer_OnDebug(object sender, GetWorkBlobArgs<T> e)
760 {
761 e.Blob = m_blobWork;
762 }
763
764 private void layer_OnGetIteration(object sender, GetIterationArgs e)
765 {
766 if (OnGetIteration != null)
767 OnGetIteration(sender, e);
768 }
769
770 private void layer_OnSetWorkspace(object sender, WorkspaceArgs e)
771 {
772 if (e.WorkspaceSizeInBytes == 0)
773 return;
774
775 if (OnSetWorkspace != null)
776 {
777 OnSetWorkspace(sender, e);
778 return;
779 }
780
781 m_cuda.DisableGhostMemory();
782
783 if (e.WorkspaceSizeInBytes > m_lWorkspaceSizeInBytes)
784 {
785 m_lWorkspaceSizeInBytes = e.WorkspaceSizeInBytes;
786
787 if (m_hWorkspaceData != 0)
788 m_cuda.FreeMemory(m_hWorkspaceData);
789
790 ulong lCount = CudaDnn<T>.ConvertByteSizeToCount(m_lWorkspaceSizeInBytes);
791 m_hWorkspaceData = m_cuda.AllocMemory((long)lCount);
792 }
793
794 m_cuda.ResetGhostMemory();
795 }
796
797 private void layer_OnGetWorkspace(object sender, WorkspaceArgs e)
798 {
799 if (OnGetWorkspace != null)
800 {
801 OnGetWorkspace(sender, e);
802 return;
803 }
804
805 e.WorkspaceData = m_hWorkspaceData;
806 e.WorkspaceSizeInBytes = m_lWorkspaceSizeInBytes;
807 }
808
815 {
816 Phase phaseOriginal = m_phase;
817
818 m_phase = phase;
819
820 for (int i = 0; i < m_rgLayers.Count; i++)
821 {
822 m_rgLayers[i].SetPhase(phase);
823 }
824
825 return phaseOriginal;
826 }
827
831 public void RestorePhase()
832 {
833 m_phase = m_phaseOriginal;
834
835 for (int i = 0; i < m_rgLayers.Count; i++)
836 {
837 m_rgLayers[i].SetPhase(m_phase);
838 }
839 }
840
845 {
846 get { return m_bBreakOnFirstNan; }
847 set { m_bBreakOnFirstNan = value; }
848 }
849
855 {
856 get { return m_bDetectDetailedNans; }
857 set { m_bDetectDetailedNans = value; }
858 }
859
867 {
868 get { return m_bEnableLayerDebugging; }
869 set
870 {
871 if (m_bEnableLayerDebugging == value)
872 return;
873
874 foreach (Layer<T> layer in m_rgLayers)
875 {
876 // Enable layer debugging which checks for NAN/INF on each fwd/bwd pass, but is much
877 // slower and therefore only for debugging.
878 if (value)
879 layer.SetOnDebug(layer_OnDebug);
880 else
881 layer.ResetOnDebug(layer_OnDebug);
882 }
883
884 m_bEnableLayerDebugging = value;
885
886 if (m_bEnableLayerDebugging)
887 m_log.WriteLine("WARNING: Layer debugging enabled, training will be slow.");
888 else
889 m_log.WriteLine("Layer debugging disabled.");
890 }
891 }
892
893#pragma warning disable 1591
894
895 public void EnableBestResultMask(string strTargetNode, int nBestResultCount = 5, BEST_RESULT_TYPE resultType = BEST_RESULT_TYPE.BY_CHANNEL)
896 {
897 m_strBestResultTargetNodeToMask = strTargetNode;
898 m_nBestResultCount = nBestResultCount;
899 m_nBestResultType = resultType;
900 }
901
902 public void DisableBestResultMask()
903 {
904 m_strBestResultTargetNodeToMask = null;
905 m_nBestResultCount = 50;
906 }
907
908#pragma warning restore 1591
909
913 public string ActiveLabelCounts
914 {
915 get
916 {
917 string strSrc = m_strDataSource; // Set during Init
918
919 if (m_labelMappingLayer != null)
920 return ((LabelMappingLayer<T>)m_labelMappingLayer).GetActualLabelCounts(strSrc);
921
922 if (string.IsNullOrEmpty(strSrc))
923 return "n/a";
924
925 if (m_db.GetVersion() == DB_VERSION.TEMPORAL)
926 return "n/a";
927
928 return ((IXImageDatabaseBase)m_db).GetLabelCountsAsTextFromSourceName(strSrc);
929 }
930 }
931
936 {
937 get
938 {
939 string strSrc = m_strDataSource; // Set during Init
940
941 if (string.IsNullOrEmpty(strSrc))
942 return "n/a";
943
944 if (m_db.GetVersion() == DB_VERSION.TEMPORAL)
945 return "n/a";
946
947 return ((IXImageDatabaseBase)m_db).GetLabelQueryHitPercentsAsTextFromSourceName(strSrc);
948 }
949 }
950
954 public string LabelQueryEpochs
955 {
956 get
957 {
958 string strSrc = m_strDataSource; // Set during Init
959
960 if (string.IsNullOrEmpty(strSrc))
961 return "n/a";
962
963 if (m_db.GetVersion() == DB_VERSION.TEMPORAL)
964 return "n/a";
965
966 return ((IXImageDatabaseBase)m_db).GetLabelQueryEpocsAsTextFromSourceName(strSrc);
967 }
968 }
969
974 {
975 get
976 {
977 string strSrc = m_strDataSource; // Set during Init
978
979 if (string.IsNullOrEmpty(strSrc))
980 return "n/a";
981
982 if (m_db.GetVersion() == DB_VERSION.TEMPORAL)
983 return "n/a";
984
985 return ((IXImageDatabaseBase)m_db).GetBoostQueryHitPercentsAsTextFromSourceName(strSrc);
986 }
987 }
988
996 public void SetEnablePassthrough(bool bEnable)
997 {
998 foreach (Layer<T> layer in m_rgLayers)
999 {
1000 layer.SetEnablePassthrough(bEnable);
1001 }
1002 }
1003
1004 private BatchInput getInput()
1005 {
1006 return m_lastBatchInput;
1007 }
1008
1009 private void setInput(BatchInput biInput)
1010 {
1011 m_lastBatchInput = biInput;
1012 }
1013
1021 {
1022 NetState net_state = param.state;
1023 NetParameter param_filtered = param.Clone(false);
1024
1025 for (int i = 0; i < param.layer.Count; i++)
1026 {
1027 LayerParameter layer_param = param.layer[i];
1028 string layer_name = layer_param.name;
1029
1030 m_log.CHECK(layer_param.include.Count == 0 || layer_param.exclude.Count == 0, "Specify either include rules or exclude rules; not both.");
1031
1032 // If no include rules are specified, the layer is included by default and
1033 // only excluded if it meets one of the exclude rules.
1034 bool layer_included = (layer_param.include.Count == 0) ? true : false;
1035
1036 for (int j = 0; layer_included && j < layer_param.exclude.Count; j++)
1037 {
1038 if (StateMeetsRule(net_state, layer_param.exclude[j], layer_name))
1039 layer_included = false;
1040 }
1041
1042 for (int j = 0; !layer_included & j < layer_param.include.Count; j++)
1043 {
1044 if (StateMeetsRule(net_state, layer_param.include[j], layer_name))
1045 layer_included = true;
1046 }
1047
1048 if (layer_included)
1049 param_filtered.layer.Add(layer_param.Clone(true));
1050 }
1051
1052 return param_filtered;
1053 }
1054
1062 public bool StateMeetsRule(NetState state, NetStateRule rule, string strLayer)
1063 {
1064 if (rule.phase == Phase.ALL)
1065 return true;
1066
1067 // Check whether the rule is broken due to phase.
1068 if (rule.phase != Phase.NONE)
1069 {
1070 if (rule.phase != state.phase)
1071 {
1072 m_log.WriteLine("The NetState phase (" + state.phase.ToString() + ") differed from the phase (" + rule.phase.ToString() + ") specified by a rule in layer " + strLayer);
1073 return false;
1074 }
1075 }
1076
1077 // Check whether the rule is broken due to min level.
1078 if (rule.min_level.HasValue)
1079 {
1080 if (state.level < rule.min_level.Value)
1081 {
1082 m_log.WriteLine("The NetState level (" + state.level.ToString() + ") is below the min_level ( " + rule.min_level.Value.ToString() + ") specified by a rule in layer " + strLayer);
1083 return false;
1084 }
1085 }
1086
1087 // Check whether the rule is broken due to max level.
1088 if (rule.max_level.HasValue)
1089 {
1090 if (state.level > rule.max_level.Value)
1091 {
1092 m_log.WriteLine("The NetState level (" + state.level.ToString() + ") is above the max_level ( " + rule.max_level.Value.ToString() + ") specified by a rule in layer " + strLayer);
1093 return false;
1094 }
1095 }
1096
1097 // Check whether the rule is broken due to stage. The NetState must
1098 // contain ALL of the rule's stages to meet it.
1099 for (int i = 0; i < rule.stage.Count; i++)
1100 {
1101 // Check that the NetState contains the rule's ith stage.
1102 bool has_stage = false;
1103
1104 for (int j = 0; !has_stage && j < state.stage.Count; j++)
1105 {
1106 if (rule.stage[i] == state.stage[j])
1107 {
1108 has_stage = true;
1109 break;
1110 }
1111 }
1112
1113 if (!has_stage)
1114 {
1115 m_log.WriteLine("The NetState did not contain stage '" + rule.stage[i] + "' specified by a rule in layer " + strLayer);
1116 return false;
1117 }
1118 }
1119
1120 // Check whether the rule is broken due to not_stage. The NetState must
1121 // contain NONE of the rule's not_stages to meet it.
1122 for (int i = 0; i < rule.not_stage.Count; i++)
1123 {
1124 // Check that the NetState contains the rule's ith not_stage.
1125 bool has_stage = false;
1126
1127 for (int j = 0; !has_stage && j < state.stage.Count; j++)
1128 {
1129 if (rule.not_stage[i] == state.stage[j])
1130 {
1131 has_stage = true;
1132 break;
1133 }
1134 }
1135
1136 if (has_stage)
1137 {
1138 m_log.WriteLine("The NetState contained a not_stage '" + rule.not_stage[i] + "' specified by a rule in layer " + strLayer);
1139 return false;
1140 }
1141 }
1142
1143 return true;
1144 }
1145
1154 protected void AppendTop(NetParameter param, int layer_id, int top_id, List<string> available_blobs, DictionaryEx<string, int> blob_name_to_idx)
1155 {
1156 LayerParameter layer_param = null;
1157 string blob_name;
1158
1159 if (layer_id >= 0)
1160 layer_param = param.layer[layer_id].Clone(false);
1161
1162 if (layer_param != null)
1163 {
1164 if (layer_param.top.Count > top_id)
1165 blob_name = layer_param.top[top_id];
1166 else
1167 blob_name = "(automatic)";
1168 }
1169 else
1170 {
1171 blob_name = param.input[top_id];
1172 }
1173
1174 // Check if we are doing in-place computation
1175 if (blob_name_to_idx != null && layer_param != null && layer_param.bottom.Count > top_id && blob_name == layer_param.bottom[top_id])
1176 {
1177 // In-place computation
1178 m_log.WriteLine(layer_param.name + " -> " + blob_name + " (in-place)");
1179 int nIdx = blob_name_to_idx[blob_name];
1180 m_rgcolTopVecs[layer_id].Add(m_colBlobs[nIdx]);
1181 m_rgrgnTopIdVecs[layer_id].Add(nIdx);
1182 }
1183 else if (blob_name_to_idx != null && blob_name_to_idx.ContainsKey(blob_name))
1184 {
1185 // If we are not doing in-place computation but have duplicated blobs,
1186 // raise an error.
1187 m_log.FAIL("Top blob '" + blob_name + "' produced by multiple sources.");
1188 }
1189 else
1190 {
1191 // Normal output.
1192 if (m_log.IsEnabled)
1193 {
1194 if (layer_param != null)
1195 m_log.WriteLine(layer_param.name + " -> " + blob_name);
1196 else
1197 m_log.WriteLine("Input " + top_id.ToString() + " -> " + blob_name);
1198 }
1199
1200 Blob<T> blob_pointer = new Blob<T>(m_cuda, m_log);
1201 blob_pointer.Name = blob_name;
1202
1203
1204 //---------------------------------------------------
1205 // When sharing this net with another (e.g. the run
1206 // net shares the blobs of the training net to
1207 // help conserve gpu memory.) do not share the input
1208 // blob or the output blob for sharing the input
1209 // blob will change the batch size, and sharing the
1210 // output blob will cause the training layer's loss
1211 // layer to be overwritten, which we do not want.
1212 //
1213 // NOTE: The blob sharing only works when the network
1214 // using the shared nodes uses sizes that are less
1215 // than or equal to those of the shared node. In
1216 // the case of the run network this is not a problem
1217 // for its batch size is 1 whereas the training net
1218 // has a batch size of 1 or greater.
1219 //
1220 // When sharing the training net with the testing
1221 // net, blobs are only shared when the training
1222 // net batch size is >= to the testing nets.
1223 //----------------------------------------------------
1224 if (m_sharedNet != null && layer_id >= 0 && layer_id < param.layer.Count() - 1)
1225 m_sharedNet.blobs.Share(blob_pointer, null, false);
1226
1227 int blob_id = m_colBlobs.Count;
1228 m_colBlobs.Add(blob_pointer);
1229 m_rgstrBlobNames.Add(blob_name);
1230 m_rgbBlobNeedBackward.Add(false);
1231
1232 if (blob_name_to_idx != null)
1233 blob_name_to_idx[blob_name] = blob_id;
1234
1235 if (layer_id == -1)
1236 {
1237 // Set the (explicitly specified) dimensions of the input blob.
1238 if (param.input_dim.Count > 0)
1239 {
1240 blob_pointer.Reshape(param.input_dim[top_id * 4 + 0],
1241 param.input_dim[top_id * 4 + 1],
1242 param.input_dim[top_id * 4 + 2],
1243 param.input_dim[top_id * 4 + 3]);
1244 }
1245 else
1246 {
1247 blob_pointer.Reshape(param.input_shape[top_id]);
1248 }
1249
1250 m_rgnNetInputBlobIndices.Add(blob_id);
1251 m_colNetInputBlobs.Add(blob_pointer);
1252 }
1253 else
1254 {
1255 m_rgrgnTopIdVecs[layer_id].Add(blob_id);
1256 m_rgcolTopVecs[layer_id].Add(blob_pointer);
1257 }
1258 }
1259
1260 if (available_blobs != null)
1261 available_blobs.Add(blob_name);
1262 }
1263
1272 protected int AppendBottom(NetParameter param, int layer_id, int bottom_id, List<string> available_blobs, DictionaryEx<string, int> blob_name_to_idx)
1273 {
1274 LayerParameter layer_param = param.layer[layer_id];
1275 string blob_name = layer_param.bottom[bottom_id];
1276
1277 if (!available_blobs.Contains(blob_name))
1278 m_log.FAIL("Unknown bottom blob '" + blob_name + "' (layer '" + layer_param.name + "', bottom index " + bottom_id.ToString() + ")");
1279
1280 int blob_id = blob_name_to_idx[blob_name];
1281 m_log.WriteLine(m_rgstrLayerNames[layer_id] + " <- " + blob_name);
1282
1283 m_rgcolBottomVecs[layer_id].Add(m_colBlobs[blob_id]);
1284 m_rgrgnBottomIdVecs[layer_id].Add(blob_id);
1285 available_blobs.Remove(blob_name);
1286
1287 bool need_backward = m_rgbBlobNeedBackward[blob_id];
1288 // Check if the backpropagation on bottom_id should be skipped
1289 if (layer_param.propagate_down.Count > 0)
1290 need_backward = layer_param.propagate_down[bottom_id];
1291
1292 m_rgrgbBottomNeedBackward[layer_id].Add(need_backward);
1293
1294 return blob_id;
1295 }
1296
1303 protected void AppendParam(NetParameter param, int layer_id, int param_id)
1304 {
1305 LayerParameter layer_param = m_rgLayers[layer_id].layer_param;
1306 int param_size = layer_param.parameters.Count;
1307 string param_name = (param_size > param_id) ? layer_param.parameters[param_id].name : "";
1308
1309 if (param_name.Length > 0)
1310 m_rgstrParamDisplayNames.Add(param_name);
1311 else
1312 m_rgstrParamDisplayNames.Add(param_id.ToString());
1313
1314 int net_param_id = m_colParams.Count;
1315
1316 // Apply freeze learning if set.
1317 Blob<T> learning_param = m_rgLayers[layer_id].blobs[param_id];
1318 learning_param.freeze_learning = m_rgLayers[layer_id].layer_param.freeze_learning;
1319
1320 m_colParams.Add(learning_param);
1321 m_rgrgnParamIdVecs[layer_id].Add(net_param_id);
1322 m_rgParamLayerIndices.Add(new KeyValuePair<int ,int>(layer_id, param_id));
1323
1324 ParamSpec default_param_spec = new ParamSpec();
1325 ParamSpec param_spec = (layer_param.parameters.Count > param_id) ? layer_param.parameters[param_id] : default_param_spec;
1326
1327 if (param_size == 0 || param_name.Length == 0 || (param_name.Length > 0 && !m_rgParamNamesIndex.ContainsKey(param_name)))
1328 {
1329 // This layer 'owns' this parameter blob -- it is either anonymous
1330 // (i.e., not given a param_name) or explicitly given a name that we
1331 // haven't already seen.
1332 m_rgnParamOwners.Add(-1);
1333
1334 if (param_name.Length > 0)
1335 m_rgParamNamesIndex[param_name] = net_param_id;
1336
1337 int learnable_param_id = m_rgnLearnableParamIds.Count;
1338 m_colLearnableParams.Add(m_colParams[net_param_id]);
1339 m_rgnLearnableParamIds.Add(learnable_param_id);
1340 m_rgdfParamsLr.Add(param_spec.lr_mult);
1341 m_rgdfParamsWeightDecay.Add(param_spec.decay_mult);
1342 }
1343 else
1344 {
1345 // Named param blob with name we've seen before: share params
1346 int owner_net_param_id = m_rgParamNamesIndex[param_name];
1347 m_rgnParamOwners.Add(owner_net_param_id);
1348
1349 KeyValuePair<int,int> owner_index = m_rgParamLayerIndices[owner_net_param_id];
1350 int owner_layer_id = owner_index.Key;
1351 int owner_param_id = owner_index.Value;
1352
1353 m_log.WriteLine("Sharing parameters '" + param_name + "' owned by layer '" + m_rgstrLayerNames[owner_layer_id] + "', param index " + owner_param_id.ToString());
1354
1355 Blob<T> this_blob = m_rgLayers[layer_id].blobs[param_id];
1356 Blob<T> owner_blob = m_rgLayers[owner_layer_id].blobs[owner_param_id];
1357 int param_size2 = layer_param.parameters.Count;
1358
1359 if (param_size2 > param_id && (layer_param.parameters[param_id].share_mode == ParamSpec.DimCheckMode.PERMISSIVE))
1360 {
1361 // Permissive dimension checking -- only check counts are thet same.
1362 m_log.CHECK_EQ(this_blob.count(), owner_blob.count(), "Cannot share param '" + param_name + "' owned by layer '" + m_rgstrLayerNames[owner_layer_id] + "' with layer '" + m_rgstrLayerNames[layer_id] + "'; count mismatch. Owner layer param shape is " + owner_blob.shape_string + "; sharing layer shape is " + this_blob.shape_string);
1363 }
1364 else
1365 {
1366 // Strict dimension checking -- all dems must be the same.
1367 m_log.CHECK(Utility.Compare<int>(this_blob.shape(), owner_blob.shape()), "Cannot share param '" + param_name + "' owned by layer '" + m_rgstrLayerNames[owner_layer_id] + "' with layer '" + m_rgstrLayerNames[layer_id] + "'; shape mismatch. Owner layer param shape is " + owner_blob.shape_string + "; sharing layer expects shape " + this_blob.shape_string);
1368 }
1369
1370 int learnable_param_id = m_rgnLearnableParamIds[owner_net_param_id];
1371 m_rgnLearnableParamIds.Add(learnable_param_id);
1372
1373 if (param_spec.lr_mult != 1.0)
1374 {
1375 if (m_rgdfParamsLr[learnable_param_id].HasValue)
1376 m_log.CHECK_EQ(param_spec.lr_mult, m_rgdfParamsLr[learnable_param_id].Value, "Shared param '" + param_name + "' has mismatched lr_mult.");
1377 else
1378 m_rgdfParamsLr[learnable_param_id] = param_spec.lr_mult;
1379 }
1380
1381 if (param_spec.decay_mult != 1.0)
1382 {
1383 if (m_rgdfParamsWeightDecay[learnable_param_id].HasValue)
1384 m_log.CHECK_EQ(param_spec.decay_mult, m_rgdfParamsWeightDecay[learnable_param_id].Value, "Shared param '" + param_name + "' has mismatched decay_mult.");
1385 else
1386 m_rgdfParamsWeightDecay[learnable_param_id] = param_spec.decay_mult;
1387 }
1388 }
1389 }
1390
1402 public double ForwardFromTo(int nStart = 0, int nEnd = int.MaxValue)
1403 {
1404 if (nEnd == int.MaxValue)
1405 nEnd = m_rgLayers.Count - 1;
1406
1407 m_log.CHECK_GE(nStart, 0, "Start must be >= 0.");
1408 m_log.CHECK_LT(nEnd, m_rgLayers.Count, "End must be < the layer count of " + m_rgLayers.Count.ToString());
1409 double dfLoss = 0;
1410
1411 for (int i = nStart; i <= nEnd; i++)
1412 {
1413 double dfLayerLoss = m_rgLayers[i].Forward(m_rgcolBottomVecs[i], m_rgcolTopVecs[i]);
1414 dfLoss += dfLayerLoss;
1415
1416 if (m_bDebugInfo)
1418
1419 //-----------------------------------------------
1420 // Used when debugging.
1421 //-----------------------------------------------
1422 if (m_strBestResultTargetNodeToMask != null && m_rgLayers[i].layer_param.name == m_strBestResultTargetNodeToMask)
1423 {
1424 Blob<T> blob = blob_by_name(m_strBestResultTargetNodeToMask);
1425 if (blob == null)
1426 m_log.FAIL("Could not find the Best Result Target Node '" + m_strBestResultTargetNodeToMask + "'!");
1427
1428 if (m_nBestResultType == BEST_RESULT_TYPE.BY_CHANNEL)
1429 blob.KeepBestResultsByChannel(m_nBestResultCount);
1430 else
1431 blob.KeepBestResultsByWeight(m_nBestResultCount);
1432 }
1433 }
1434
1435 return dfLoss;
1436 }
1437
1446 {
1447 double dfLoss;
1448 return Forward(out dfLoss);
1449 }
1450
1459 public BlobCollection<T> Forward(out double dfLoss)
1460 {
1461 dfLoss = ForwardFromTo();
1462 return m_colNetOutputBlobs;
1463 }
1464
1472 public BlobCollection<T> Forward(BlobCollection<T> colBottom, out double dfLoss, bool bReshape = false)
1473 {
1474 if (m_colNetInputBlobs.Count == 0)
1475 {
1476 if (m_bFirstForwardInputWarning)
1477 {
1478 m_log.WriteLine("WARNING: bottom inputs are ignored, for this net does not take input.");
1479 m_bFirstForwardInputWarning = false;
1480 }
1481 }
1482
1483 // Copy bottom to internal bottom
1484 for (int i = 0; i < colBottom.Count && i < m_colNetInputBlobs.Count; i++)
1485 {
1486 m_colNetInputBlobs[i].CopyFrom(colBottom[i], false, bReshape);
1487 }
1488
1489 return Forward(out dfLoss);
1490 }
1491
1499 public void Backward(int nStart = int.MaxValue, int nEnd = 0)
1500 {
1501 if (nStart == int.MaxValue)
1502 nStart = m_rgLayers.Count - 1;
1503
1504 m_log.CHECK_GE(nEnd, 0, "End must be greater than 0.");
1505 m_log.CHECK_LT(nStart, m_rgLayers.Count, "Start must be less than the number of layers (" + m_rgLayers.Count.ToString() + ")");
1506
1507 for (int i = nStart; i >= nEnd; i--)
1508 {
1509 if (m_rgbLayerNeedBackward[i])
1510 {
1511 m_rgLayers[i].Backward(m_rgcolTopVecs[i], m_rgrgbBottomNeedBackward[i], m_rgcolBottomVecs[i]);
1512
1513 if (m_bDebugInfo)
1515 }
1516 }
1517
1518 if (m_bDebugInfo)
1519 {
1520 double dfAsumData = 0;
1521 double dfAsumDiff = 0;
1522 double dfSumsqData = 0;
1523 double dfSumsqDiff = 0;
1524
1525 for (int i = 0; i < m_colLearnableParams.Count; i++)
1526 {
1527 dfAsumData += Utility.ConvertVal<T>(m_colLearnableParams[i].asum_data());
1528 dfAsumDiff += Utility.ConvertVal<T>(m_colLearnableParams[i].asum_diff());
1529 dfSumsqData += Utility.ConvertVal<T>(m_colLearnableParams[i].sumsq_data());
1530 dfSumsqDiff += Utility.ConvertVal<T>(m_colLearnableParams[i].sumsq_diff());
1531 }
1532
1533 double dfL2NormData = Math.Sqrt(dfSumsqData);
1534 double dfL2NormDiff = Math.Sqrt(dfSumsqDiff);
1535
1536 m_log.WriteLine(" [Backward] All net params (data, diff): L1 norm = (" + dfAsumData.ToString() + ", " + dfAsumDiff.ToString() + "; L2 norm = (" + dfL2NormData.ToString() + ", " + dfL2NormDiff.ToString() + ")");
1537 }
1538 }
1539
1544 protected void InputDebugInfo(int input_id)
1545 {
1546 Blob<T> blob = m_colNetInputBlobs[input_id];
1547 int nIdx = m_rgnNetInputBlobIndices[input_id];
1548 string blob_name = m_rgstrBlobNames[nIdx];
1549 double data_abs_val_mean = Utility.ConvertVal<T>(blob.asum_data()) / blob.count();
1550
1551 m_log.WriteLine(" [Forward] Input " + blob_name + " data: " + data_abs_val_mean.ToString());
1552 }
1553
1558 protected void ForwardDebugInfo(int layer_id)
1559 {
1560 for (int top_id = 0; top_id < m_rgcolTopVecs[layer_id].Count; top_id++)
1561 {
1562 Blob<T> blob = m_rgcolTopVecs[layer_id][top_id];
1563 int nIdx = m_rgrgnTopIdVecs[layer_id][top_id];
1564 string blob_name = m_rgstrBlobNames[nIdx];
1565 double data_asum = Utility.ConvertVal<T>(blob.asum_data());
1566 double data_abs_val_mean = data_asum / blob.count();
1567
1568 m_log.WriteLine(" [Forward] Layer " + m_rgstrLayerNames[layer_id] + ", top blob " + blob_name + " data: " + data_abs_val_mean.ToString() + " asum: " + data_asum.ToString());
1569 }
1570
1571 for (int param_id = 0; param_id < m_rgLayers[layer_id].blobs.Count; param_id++)
1572 {
1573 Blob<T> blob = m_rgLayers[layer_id].blobs[param_id];
1574 int net_param_id = m_rgrgnParamIdVecs[layer_id][param_id];
1575 string blob_name = m_rgstrParamDisplayNames[net_param_id];
1576 double data_asum = Utility.ConvertVal<T>(blob.asum_data());
1577 double data_abs_val_mean = data_asum / blob.count();
1578
1579 m_log.WriteLine(" [Forward] Layer " + m_rgstrLayerNames[layer_id] + ", param blob " + blob_name + " data: " + data_abs_val_mean.ToString() + " asum: " + data_asum.ToString());
1580 }
1581 }
1582
1587 protected void BackwardDebugInfo(int layer_id)
1588 {
1589 BlobCollection<T> bottom_vec = m_rgcolBottomVecs[layer_id];
1590
1591 for (int bottom_id = 0; bottom_id < bottom_vec.Count; bottom_id++)
1592 {
1593 if (!m_rgrgbBottomNeedBackward[layer_id][bottom_id])
1594 continue;
1595
1596 Blob<T> blob = bottom_vec[bottom_id];
1597 int nIdx = m_rgrgnBottomIdVecs[layer_id][bottom_id];
1598 string blob_name = m_rgstrBlobNames[nIdx];
1599 double diff_asum = Utility.ConvertVal<T>(blob.asum_diff());
1600 double diff_abs_val_mean = diff_asum / blob.count();
1601
1602 m_log.WriteLine(" [Backward] Layer " + m_rgstrLayerNames[layer_id] + ", bottom blob " + blob_name + " diff: " + diff_abs_val_mean.ToString() + " asum: " + diff_asum.ToString());
1603 }
1604
1605 for (int param_id = 0; param_id < m_rgLayers[layer_id].blobs.Count; param_id++)
1606 {
1607 if (!m_rgLayers[layer_id].param_propagate_down(param_id))
1608 continue;
1609
1610 Blob<T> blob = m_rgLayers[layer_id].blobs[param_id];
1611 double diff_asum = Utility.ConvertVal<T>(blob.asum_diff());
1612 double diff_abs_val_mean = diff_asum / blob.count();
1613
1614 m_log.WriteLine(" [Backward] Layer " + m_rgstrLayerNames[layer_id] + ", param blob " + param_id.ToString() + " diff: " + diff_abs_val_mean.ToString() + " asum: " + diff_asum.ToString());
1615 }
1616 }
1617
1622 protected void UpdateDebugInfo(int param_id)
1623 {
1624 Blob<T> blob = m_colBlobs[param_id];
1625 int param_owner = m_rgnParamOwners[param_id];
1626 int nIdx = m_rgParamLayerIndices[param_id].Key;
1627 string layer_name = m_rgstrLayerNames[nIdx];
1628 string param_display_name = m_rgstrParamDisplayNames[param_id];
1629 double diff_asum = Utility.ConvertVal<T>(blob.asum_diff());
1630 double diff_abs_val_mean = diff_asum / blob.count();
1631
1632 if (param_owner < 0)
1633 {
1634 double data_abs_val_mean = Utility.ConvertVal<T>(blob.asum_data()) / blob.count();
1635 m_log.WriteLine(" [Update] Layer " + layer_name + ", param " + param_display_name + " data: " + data_abs_val_mean.ToString() + "; diff: " + diff_abs_val_mean.ToString() + " asum: " + diff_asum.ToString());
1636 }
1637 else
1638 {
1639 int nIdx2 = m_rgParamLayerIndices[param_owner].Key;
1640 string owner_layer_name = m_rgstrLayerNames[nIdx2];
1641 int nIdx3 = m_rgnParamOwners[param_id];
1642 string param_display_name_owner = m_rgstrParamDisplayNames[nIdx3];
1643 m_log.WriteLine(" [Update] Layer " + layer_name + ", param blob " + param_display_name + " (owned by layer " + owner_layer_name + ", param " + param_display_name_owner + ") diff: " + diff_abs_val_mean.ToString() + " asum: " + diff_asum.ToString());
1644 }
1645 }
1646
1653 public void ShareTrainedLayersWith(Net<T> srcNet, bool bEnableLog = false)
1654 {
1655 if (srcNet == this)
1656 return;
1657
1658 int num_source_layers = srcNet.layers.Count();
1659
1660 for (int i = 0; i < num_source_layers; i++)
1661 {
1662 Layer<T> source_layer = srcNet.layers[i];
1663 string source_layer_name = srcNet.layer_names[i];
1664 int target_layer_id = 0;
1665
1666 while (target_layer_id != m_rgstrLayerNames.Count && m_rgstrLayerNames[target_layer_id] != source_layer_name)
1667 {
1668 target_layer_id++;
1669 }
1670
1671 if (target_layer_id == m_rgstrLayerNames.Count)
1672 {
1673 if (bEnableLog)
1674 m_log.WriteLine("Ignoring source layer " + source_layer_name, true);
1675 continue;
1676 }
1677
1678 if (bEnableLog)
1679 m_log.WriteLine("Copying source layer " + source_layer_name);
1680
1681 BlobCollection<T> target_blobs = m_rgLayers[target_layer_id].blobs;
1682 m_log.CHECK_EQ(target_blobs.Count, source_layer.blobs.Count, "Incompatible number of blobs for layer " + source_layer_name);
1683
1684 for (int j = 0; j < target_blobs.Count; j++)
1685 {
1686 Blob<T> source_blob = source_layer.blobs[j];
1687 if (!target_blobs[j].reshape_when_sharing)
1688 m_log.CHECK(Utility.Compare<int>(target_blobs[j].shape(), source_blob.shape()), "Cannot share param " + j.ToString() + " weights from layer '" + source_layer_name + "'; shape mismatch. Source param shape is " + source_blob.shape_string + "; target param shape is " + target_blobs[j].shape_string);
1689
1690 target_blobs[j].ShareData(source_blob);
1691 }
1692 }
1693 }
1694
1699 public void CopyInternalBlobsTo(Net<T> dstNet)
1700 {
1701 m_log.CHECK_EQ(m_rgLayers.Count, dstNet.m_rgLayers.Count, "Both networks must have the same number of layers!");
1702
1703 for (int i = 0; i < m_rgLayers.Count; i++)
1704 {
1705 m_log.CHECK_EQ(m_rgLayers[i].internal_blobs.Count, dstNet.m_rgLayers[i].internal_blobs.Count, "Both networks must have the same number of internal blobs at layer " + i.ToString());
1706 dstNet.m_rgLayers[i].internal_blobs.CopyFrom(m_rgLayers[i].internal_blobs);
1707 }
1708 }
1709
1714 public void CopyTrainedLayersTo(Net<T> dstNet)
1715 {
1716 int num_source_layers = layers.Count();
1717
1718 for (int i = 0; i < num_source_layers; i++)
1719 {
1720 Layer<T> source_layer = layers[i];
1721 string source_layer_name = layer_names[i];
1722 int target_layer_id = 0;
1723
1724 while (target_layer_id != dstNet.m_rgstrLayerNames.Count && dstNet.m_rgstrLayerNames[target_layer_id] != source_layer_name)
1725 {
1726 target_layer_id++;
1727 }
1728
1729 if (target_layer_id == dstNet.m_rgstrLayerNames.Count)
1730 {
1731 m_log.WriteLine("Ignoring source layer " + source_layer_name, true);
1732 continue;
1733 }
1734
1735 m_log.WriteLine("Copying source layer " + source_layer_name);
1736 BlobCollection<T> target_blobs = dstNet.m_rgLayers[target_layer_id].blobs;
1737 m_log.CHECK_EQ(target_blobs.Count, source_layer.blobs.Count, "Incompatible number of blobs for layer " + source_layer_name);
1738
1739 for (int j = 0; j < target_blobs.Count; j++)
1740 {
1741 Blob<T> source_blob = source_layer.blobs[j];
1742 m_log.CHECK(Utility.Compare<int>(target_blobs[j].shape(), source_blob.shape()), "Cannot copy param " + j.ToString() + " weights from layer '" + source_layer_name + "'; shape mismatch. Source param shape is " + source_blob.shape_string + "; target param shape is " + target_blobs[j].shape_string);
1743 target_blobs[j].CopyFrom(source_blob);
1744 }
1745 }
1746 }
1747
1754 public void CopyTrainedLayersTo(Net<T> dstNet, DictionaryEx<string, string> rgLayerNames, bool bTranspose)
1755 {
1756 foreach (Layer<T> sourceLayer in m_rgLayers)
1757 {
1758 string source_layer_name = sourceLayer.layer_param.name;
1759
1760 if (rgLayerNames.ContainsKey(source_layer_name))
1761 {
1762 string strTargetLayer = rgLayerNames[source_layer_name];
1763
1764 if (strTargetLayer != null && strTargetLayer.Length > 0)
1765 {
1766 foreach (Layer<T> targetLayer in dstNet.m_rgLayers)
1767 {
1768 if (targetLayer.layer_param.name == strTargetLayer)
1769 {
1770 m_log.WriteLine("Copying source layer " + source_layer_name);
1771 BlobCollection<T> target_blobs = targetLayer.blobs;
1772 m_log.CHECK_EQ(target_blobs.Count, sourceLayer.blobs.Count, "Incompatible number of blobs for layer " + source_layer_name);
1773 int nCount = 1; // currently the bias is ignored.
1774
1775 for (int i = 0; i < nCount; i++)
1776 {
1777 Blob<T> source_blob = sourceLayer.blobs[i];
1778 m_log.CHECK(Utility.Compare<int>(target_blobs[i].shape(), source_blob.shape()), "Cannot copy param " + i.ToString() + " weights from layer '" + source_layer_name + "'; shape mismatch. Source param shape is " + source_blob.shape_string + "; target param shape is " + target_blobs[i].shape_string);
1779
1780 if (bTranspose)
1781 target_blobs[i].CopyFromAndTransposeHeightWidth(source_blob, false);
1782 else
1783 target_blobs[i].CopyFrom(source_blob, false, false);
1784 }
1785 }
1786 }
1787 }
1788 }
1789 }
1790 }
1791
1792
1800 public void Reshape()
1801 {
1802 for (int i = 0; i < m_rgLayers.Count; i++)
1803 {
1804 m_rgLayers[i].SetNetReshapeRequest();
1805 m_rgLayers[i].Reshape(m_rgcolBottomVecs[i], m_rgcolTopVecs[i]);
1806 }
1807 }
1808
1815 {
1816 int num_source_layers = param.layer.Count();
1817
1818 for (int i = 0; i < num_source_layers; i++)
1819 {
1820 LayerParameter source_layer = param.layer[i];
1821 string source_layer_name = source_layer.name;
1822 int target_layer_id = 0;
1823
1824 while (target_layer_id != m_rgstrLayerNames.Count && m_rgstrLayerNames[target_layer_id] != source_layer_name)
1825 {
1826 target_layer_id++;
1827 }
1828
1829 if (target_layer_id == m_rgstrLayerNames.Count)
1830 {
1831 m_log.WriteLine("Ignoring source layer " + source_layer_name, true);
1832 continue;
1833 }
1834
1835 m_log.WriteLine("Copying source layer " + source_layer_name);
1836 BlobCollection<T> target_blobs = m_rgLayers[target_layer_id].blobs;
1837 m_log.CHECK_EQ(target_blobs.Count, source_layer.blobs.Count, "Incompatible number of blobs for layer " + source_layer_name);
1838
1839 for (int j = 0; j < target_blobs.Count; j++)
1840 {
1841 if (!target_blobs[j].ShapeEquals(source_layer.blobs[j]))
1842 {
1843 Blob<T> source_blob = new Blob<T>(m_cuda, m_log);
1844 source_blob.FromProto(source_layer.blobs[j], true);
1845 m_log.FAIL("Cannot copy param " + j.ToString() + " weights from layer " + source_layer_name + "; shape mismatch. Source param shape is " + source_blob.shape_string + "; target param shape is " + target_blobs[j].shape_string + ". To learn this layer's arameters from scratch rather than copying from the saved net, rename the layer.");
1846 }
1847
1848 target_blobs[j].FromProto(source_layer.blobs[j], false);
1849 }
1850 }
1851 }
1852
1857 {
1858 get { return m_param; }
1859 }
1860
1865 public NetParameter ToProto(bool bIncludeBlobs)
1866 {
1867 NetParameter p = m_param.Clone(true);
1868
1869 if (bIncludeBlobs)
1870 {
1871 foreach (Layer<T> layer in m_rgLayers)
1872 {
1873 if (layer.blobs.Count > 0)
1874 {
1875 foreach (LayerParameter lp in p.layer)
1876 {
1877 if (lp.type == layer.layer_param.type &&
1878 lp.name == layer.layer_param.name)
1879 {
1880 foreach (Blob<T> blob in layer.blobs)
1881 {
1882 lp.blobs.Add(blob.ToProto());
1883 }
1884 }
1885 }
1886 }
1887 }
1888 }
1889
1890 return p;
1891 }
1892
1896 public void Update()
1897 {
1898 for (int i = 0; i < m_colLearnableParams.Count; i++)
1899 {
1900 m_colLearnableParams[i].Update();
1901 }
1902 }
1903
1907 public void ClearParamDiffs()
1908 {
1909 for (int i = 0; i < m_colLearnableParams.Count; i++)
1910 {
1911 Blob<T> blob = m_colLearnableParams[i];
1912 blob.SetDiff(0.0);
1913 }
1914 }
1915
1923 public void ShareWeights()
1924 {
1925 for (int i = 0; i < m_colParams.Count; i++)
1926 {
1927 if (m_rgnParamOwners[i] < 0)
1928 continue;
1929
1930 int nIdx = m_rgnParamOwners[i];
1931 m_colParams[i].ShareData(m_colParams[nIdx]);
1932 m_colParams[i].ShareDiff(m_colParams[nIdx]);
1933 }
1934 }
1935
1943 public bool ForwardBackward(BlobCollection<T> colBottom, out double dfLocalLoss, TRAIN_STEP step = TRAIN_STEP.NONE)
1944 {
1945 dfLocalLoss = 0;
1946
1947 if (step != TRAIN_STEP.BACKWARD)
1948 Forward(colBottom, out dfLocalLoss);
1949
1950 if (m_bBreakOnFirstNan)
1951 {
1952 DebugInformation<T> dbgInfo = GetDebugInformation(m_bDetectDetailedNans);
1953 string strType;
1954 string strFirstNan = dbgInfo.DetectFirstNaN(out strType);
1955 if (strFirstNan != null)
1956 return false;
1957 }
1958
1959 if (step != TRAIN_STEP.FORWARD)
1960 {
1961 Backward(int.MaxValue, m_nLastNonFrozenLayerIdx);
1962 }
1963
1964 return true;
1965 }
1966
1970 public string name
1971 {
1972 get { return m_strName; }
1973 }
1974
1978 public List<string> layer_names
1979 {
1980 get { return m_rgstrLayerNames; }
1981 }
1982
1986 public List<string> blob_names
1987 {
1988 get { return m_rgstrBlobNames; }
1989 }
1990
1995 {
1996 get { return m_colBlobs; }
1997 }
1998
2002 public List<Layer<T>> layers
2003 {
2004 get { return m_rgLayers; }
2005 }
2006
2011 {
2012 get { return m_phase; }
2013 }
2014
2019 public List<BlobCollection<T>> bottom_vecs
2020 {
2021 get { return m_rgcolBottomVecs; }
2022 }
2023
2028 public List<BlobCollection<T>> top_vecs
2029 {
2030 get { return m_rgcolTopVecs; }
2031 }
2032
2038 public List<int> top_ids(int layer_id)
2039 {
2040 m_log.CHECK_GE(layer_id, 0, "Invalid layer id.");
2041 m_log.CHECK_LT(layer_id, m_rgrgnTopIdVecs.Count, "Invalid layer id.");
2042 return m_rgrgnTopIdVecs[layer_id];
2043 }
2044
2050 public List<int> bottom_ids(int layer_id)
2051 {
2052 m_log.CHECK_GE(layer_id, 0, "Invalid layer id.");
2053 m_log.CHECK_LT(layer_id, m_rgrgnBottomIdVecs.Count, "Invalid layer id.");
2054 return m_rgrgnBottomIdVecs[layer_id];
2055 }
2056
2060 public List<List<bool>> bottom_need_backward
2061 {
2062 get { return m_rgrgbBottomNeedBackward; }
2063 }
2064
2068 public List<double> blob_loss_weights
2069 {
2070 get { return m_rgdfBlobLossWeights; }
2071 }
2072
2076 public List<bool> layer_need_backward
2077 {
2078 get { return m_rgbLayerNeedBackward; }
2079 }
2080
2085 {
2086 get { return m_colParams; }
2087 }
2088
2094 public BlobCollection<T> layer_blobs(string strLayerName)
2095 {
2096 if (!has_layer(strLayerName))
2097 return null;
2098
2099 Layer<T> layer = layer_by_name(strLayerName);
2100
2101 return layer.internal_blobs;
2102 }
2103
2109 {
2110 m_colLearnableParams = col;
2111 }
2112
2117 {
2118 get { return m_colLearnableParams; }
2119 }
2120
2124 public List<double?> params_lr
2125 {
2126 get { return m_rgdfParamsLr; }
2127 }
2128
2132 public List<double?> params_weight_decay
2133 {
2134 get { return m_rgdfParamsWeightDecay; }
2135 }
2136
2140 public DictionaryEx<string, int> param_names_index
2141 {
2142 get { return m_rgParamNamesIndex; }
2143 }
2144
2148 public List<int> param_owners
2149 {
2150 get { return m_rgnParamOwners; }
2151 }
2152
2156 public List<string> param_display_names
2157 {
2158 get { return m_rgstrParamDisplayNames; }
2159 }
2160
2167 public Blob<T> param_by_name(string strName, bool bThrowExceptionOnError = true)
2168 {
2169 foreach (Blob<T> blob in m_colParams)
2170 {
2171 if (blob.Name == strName)
2172 return blob;
2173 }
2174
2175 if (bThrowExceptionOnError)
2176 m_log.FAIL("Unknown parameter blob name '" + strName + "'");
2177
2178 return null;
2179 }
2180
2184 public int num_inputs
2185 {
2186 get { return m_colNetInputBlobs.Count; }
2187 }
2188
2192 public int num_outputs
2193 {
2194 get { return m_colNetOutputBlobs.Count; }
2195 }
2196
2201 {
2202 get { return m_colNetInputBlobs; }
2203 }
2204
2209 {
2210 get { return m_colNetOutputBlobs; }
2211 }
2212
2216 public List<int> output_blob_indices
2217 {
2218 get { return m_rgnNetOutputBlobIndices; }
2219 }
2220
2224 public List<int> input_blob_indices
2225 {
2226 get { return m_rgnNetInputBlobIndices; }
2227 }
2228
2234 public bool has_blob(string strBlobName)
2235 {
2236 return m_rgBlobNamesIndex.ContainsKey(strBlobName);
2237 }
2238
2245 public Blob<T> blob_by_name(string strName, bool bThrowExceptionOnError = true)
2246 {
2247 Blob<T> blob_ptr = null;
2248
2249 if (has_blob(strName))
2250 {
2251 int nIdx = m_rgBlobNamesIndex[strName];
2252 blob_ptr = m_colBlobs[nIdx];
2253 }
2254 else
2255 {
2256 if (bThrowExceptionOnError)
2257 m_log.FAIL("Unknown blob name " + strName);
2258 }
2259
2260 return blob_ptr;
2261 }
2262
2268 public int blob_index_by_name(string strName)
2269 {
2270 if (!has_blob(strName))
2271 return -1;
2272
2273 return m_rgBlobNamesIndex[strName];
2274 }
2275
2281 public bool has_layer(string strLayer)
2282 {
2283 return m_rgLayerNamesIndex.ContainsKey(strLayer);
2284 }
2285
2292 public Layer<T> layer_by_name(string strLayer, bool bThrowExceptionOnError = true)
2293 {
2294 Layer<T> layer_ptr = null;
2295
2296 if (has_layer(strLayer))
2297 {
2298 int nIdx = m_rgLayerNamesIndex[strLayer];
2299 layer_ptr = m_rgLayers[nIdx];
2300 }
2301 else
2302 {
2303 if (bThrowExceptionOnError)
2304 m_log.FAIL("Unknown layer name " + strLayer);
2305 }
2306
2307 return layer_ptr;
2308 }
2309
2315 public int layer_index_by_name(string strLayer)
2316 {
2317 if (!has_layer(strLayer))
2318 return -1;
2319
2320 return m_rgLayerNamesIndex[strLayer];
2321 }
2322
2330 public void set_debug_info(bool bVal)
2331 {
2332 m_bDebugInfo = bVal;
2333 }
2334
2335
2336
2348 {
2349 // Initialize by copying from the input NetParameter.
2350 NetParameter param_split = param.Clone(false);
2351
2352 DictionaryEx<string, KeyValuePair<int, int>> blob_name_to_last_top_idx = new DictionaryEx<string,KeyValuePair<int,int>>(new KeyValuePair<int,int>(-1, -1));
2353 DictionaryEx<KeyValuePair<int, int>, KeyValuePair<int, int>> bottom_idx_to_source_top_idx = new DictionaryEx<KeyValuePair<int,int>,KeyValuePair<int,int>>(new KeyValuePair<int,int>(-1, -1));
2354 DictionaryEx<KeyValuePair<int, int>, int> top_idx_to_bottom_count = new DictionaryEx<KeyValuePair<int,int>,int>(0);
2355 DictionaryEx<KeyValuePair<int, int>, double> top_idx_to_loss_weight = new DictionaryEx<KeyValuePair<int,int>,double>(0);
2356 DictionaryEx<KeyValuePair<int, int>, int> top_idx_to_bottom_split_idx = new DictionaryEx<KeyValuePair<int,int>,int>(0);
2357 DictionaryEx<int, string> layer_idx_to_layer_name = new DictionaryEx<int,string>("");
2358
2359 layer_idx_to_layer_name[-1] = "input";
2360
2361 // Determine the number of times each blob is used as an input (bottom) blob.
2362
2363 for (int i = 0; i < param.input.Count; i++)
2364 {
2365 string blob_name = param.input[i];
2366 blob_name_to_last_top_idx[blob_name] = new KeyValuePair<int, int>(-1, i);
2367 }
2368
2369 for (int i = 0; i < param.layer.Count; i++)
2370 {
2371 LayerParameter layer_param = param.layer[i];
2372 layer_idx_to_layer_name[i] = layer_param.name;
2373
2374 for (int j = 0; j < layer_param.bottom.Count; j++)
2375 {
2376 string blob_name = layer_param.bottom[j];
2377
2378 if (!blob_name_to_last_top_idx.ContainsKey(blob_name))
2379 m_log.FAIL("Unknown bottom blob '" + blob_name + "' (layer '" + layer_param.name + "', bottom index " + j.ToString() + ")");
2380
2381 KeyValuePair<int, int> bottom_idx = new KeyValuePair<int, int>(i, j);
2382 KeyValuePair<int, int> top_idx = blob_name_to_last_top_idx[blob_name];
2383 bottom_idx_to_source_top_idx[bottom_idx] = top_idx;
2384 top_idx_to_bottom_count[top_idx]++;
2385 }
2386
2387 for (int j = 0; j < layer_param.top.Count; j++)
2388 {
2389 string blob_name = layer_param.top[j];
2390 blob_name_to_last_top_idx[blob_name] = new KeyValuePair<int, int>(i, j);
2391 }
2392
2393 // A use of a top blob as a loss should be handled similarly to the use of
2394 // a top blob as an input (bottom) blob to another layer.
2395 int last_loss = Math.Min(layer_param.loss_weight.Count, layer_param.top.Count);
2396
2397 for (int j = 0; j < last_loss; j++)
2398 {
2399 string blob_name = layer_param.top[j];
2400 KeyValuePair<int, int> top_idx = blob_name_to_last_top_idx[blob_name];
2401 top_idx_to_loss_weight[top_idx] = layer_param.loss_weight[j];
2402
2403 if (top_idx_to_loss_weight[top_idx] != 0)
2404 top_idx_to_bottom_count[top_idx]++;
2405 }
2406 }
2407
2408 // Create split layer for any input blobs used by other layer as bottom
2409 // blobs more than once.
2410 for (int i = 0; i < param.input.Count; i++)
2411 {
2412 int split_count = top_idx_to_bottom_count[new KeyValuePair<int, int>(-1, i)];
2413
2414 if (split_count > 1)
2415 {
2416 string layer_name = layer_idx_to_layer_name[-1];
2417 string blob_name = param.input[i];
2418 double kZeroLossWeight = 0;
2419 LayerParameter split_layer_param = CreateSplitLayer(layer_name, blob_name, i, split_count, kZeroLossWeight);
2420 param_split.layer.Add(split_layer_param);
2421 }
2422 }
2423
2424 for (int i = 0; i < param.layer.Count; i++)
2425 {
2426 LayerParameter layer_param = param.layer[i].Clone(true);
2427 param_split.layer.Add(layer_param);
2428
2429 // Replace any shared bottom blobs with split layer outputs.
2430 for (int j = 0; j < layer_param.bottom.Count; j++)
2431 {
2432 KeyValuePair<int, int> top_idx = bottom_idx_to_source_top_idx[new KeyValuePair<int, int>(i, j)];
2433 int split_count = top_idx_to_bottom_count[top_idx];
2434
2435 if (split_count > 1)
2436 {
2437 string layer_name = layer_idx_to_layer_name[top_idx.Key];
2438 string blob_name = layer_param.bottom[j];
2439
2440 layer_param.bottom[j] = SplitBlobName(layer_name, blob_name, top_idx.Value, top_idx_to_bottom_split_idx[top_idx]++);
2441 }
2442 }
2443
2444 // Create split layer for any top blobs used by other layer as bottom
2445 // blobs more than once.
2446 for (int j = 0; j < layer_param.top.Count; j++)
2447 {
2448 KeyValuePair<int, int> top_idx = new KeyValuePair<int, int>(i, j);
2449 int split_count = top_idx_to_bottom_count[top_idx];
2450
2451 if (split_count > 1)
2452 {
2453 string layer_name = layer_idx_to_layer_name[i];
2454 string blob_name = layer_param.top[j];
2455 double loss_weight = top_idx_to_loss_weight[top_idx];
2456 LayerParameter split_layer_param = CreateSplitLayer(layer_name, blob_name, j, split_count, loss_weight);
2457 param_split.layer.Add(split_layer_param);
2458
2459 if (loss_weight != 0)
2460 {
2461 layer_param.loss_weight.Clear();
2462 top_idx_to_bottom_split_idx[top_idx]++;
2463 }
2464 }
2465 }
2466 }
2467
2468 return param_split;
2469 }
2470
2471 private LayerParameter CreateSplitLayer(string layer_name, string blob_name, int blob_idx, int split_count, double loss_weight)
2472 {
2473 LayerParameter split_layer_param = new LayerParameter(LayerParameter.LayerType.SPLIT, SplitLayerName(layer_name, blob_name, blob_idx));
2474 split_layer_param.bottom.Add(blob_name);
2475
2476 for (int k = 0; k < split_count; k++)
2477 {
2478 split_layer_param.top.Add(SplitBlobName(layer_name, blob_name, blob_idx, k));
2479
2480 if (loss_weight != 0)
2481 {
2482 if (k == 0)
2483 split_layer_param.loss_weight.Add(loss_weight);
2484 else
2485 split_layer_param.loss_weight.Add(0);
2486 }
2487 }
2488
2489 return split_layer_param;
2490 }
2491
2492 private string SplitLayerName(string layer_name, string blob_name, int blob_idx)
2493 {
2494 return blob_name + "_" + layer_name + "_" + blob_idx.ToString() + "_split";
2495 }
2496
2497 private string SplitBlobName(string layer_name, string blob_name, int blob_idx, int split_idx)
2498 {
2499 return blob_name + "_" + layer_name + "_" + blob_idx.ToString() + "_split_" + split_idx.ToString();
2500 }
2501
2510 public void LoadWeights(byte[] rgWeights, IXPersist<T> persist, List<string> inputWtInfo = null, List<string> targetWtInfo = null, string strSkipBlobType = null)
2511 {
2512 if (rgWeights == null)
2513 return;
2514
2515 List<string> rgExpectedShapes = new List<string>();
2516 bool bLoadedDiffs;
2517
2518 foreach (Blob<T> b in m_colLearnableParams)
2519 {
2520 rgExpectedShapes.Add(b.shape_string);
2521 }
2522
2523 if (inputWtInfo != null && inputWtInfo.Count == 0)
2524 inputWtInfo = null;
2525
2526 if (targetWtInfo != null && targetWtInfo.Count == 0)
2527 targetWtInfo = null;
2528
2529 bool bSizeToFit = (inputWtInfo != null && targetWtInfo != null) ? true : false;
2530
2531 persist.LoadWeights(rgWeights, rgExpectedShapes, m_colLearnableParams, bSizeToFit, out bLoadedDiffs, inputWtInfo, targetWtInfo, strSkipBlobType);
2532 m_cuda.SynchronizeDevice();
2533 }
2534
2541 public byte[] SaveWeights(IXPersist<T> persist, bool bSaveDiff = false)
2542 {
2543 foreach (Blob<T> blob in m_colLearnableParams)
2544 {
2545 foreach (Layer<T> layer in m_rgLayers)
2546 {
2547 if (layer.blobs.Contains(blob))
2548 blob.Tag = layer.layer_param.name;
2549 }
2550 }
2551
2552 return persist.SaveWeights(m_colLearnableParams, bSaveDiff);
2553 }
2554
2561 {
2562 foreach (Layer<T> layer in layers)
2563 {
2564 if (layer.blobs.Contains(b))
2565 return layer;
2566 }
2567
2568 return null;
2569 }
2570
2577 {
2578 for (int i = 0; i < layers.Count; i++)
2579 {
2580 if (layers[i].blobs.Contains(b))
2581 return i;
2582 }
2583
2584 return -1;
2585 }
2586
2592 public Blob<T> FindBlob(string strName)
2593 {
2594 foreach (Blob<T> blob in blobs)
2595 {
2596 if (blob.Name == strName)
2597 return blob;
2598 }
2599
2600 foreach (Layer<T> layer in m_rgLayers)
2601 {
2602 foreach (Blob<T> blob in layer.blobs)
2603 {
2604 if (blob.Name == strName)
2605 return blob;
2606 }
2607
2608 foreach (Blob<T> blob in layer.internal_blobs)
2609 {
2610 if (blob.Name == strName)
2611 return blob;
2612 }
2613 }
2614
2615 return null;
2616 }
2617
2624 {
2625 int nLayerIdx = 0;
2626
2627 for (int i = 0; i < m_rgLayers.Count; i++)
2628 {
2629 if (m_rgLayers[i].layer_param.name == strLayer)
2630 {
2631 nLayerIdx = i;
2632 break;
2633 }
2634 }
2635
2636 return m_rgcolBottomVecs[nLayerIdx];
2637 }
2638
2645 {
2646 int nLayerIdx = 0;
2647
2648 for (int i = 0; i < m_rgLayers.Count; i++)
2649 {
2650 if (m_rgLayers[i].layer_param.name == strLayer)
2651 {
2652 nLayerIdx = i;
2653 break;
2654 }
2655 }
2656
2657 return m_rgcolTopVecs[nLayerIdx];
2658 }
2659
2665 {
2666 if (m_rgLayers.Count == 0)
2667 return null;
2668
2669 int nLayerIdx = m_rgLayers.Count - 1;
2670 Layer<T> layer = m_rgLayers[nLayerIdx];
2671
2672 while (nLayerIdx > 0 && (!layer.parent_layer_type.HasValue || layer.parent_layer_type.Value != LayerParameter.LayerType.LOSS))
2673 {
2674 nLayerIdx--;
2675 layer = m_rgLayers[nLayerIdx];
2676 }
2677
2678 if (nLayerIdx > 0)
2679 {
2680 string strBtm = layer.layer_param.bottom[0];
2681 return blob_by_name(strBtm);
2682 }
2683
2684 return null;
2685 }
2686
2693 {
2694 if (m_blobWork == null)
2695 m_blobWork = new common.Blob<T>(m_cuda, m_log);
2696
2697 DebugInformation<T> debugInfo = new DebugInformation<T>(name, m_blobWork, bDetectNans);
2698
2699 for (int i = 0; i < m_rgLayers.Count; i++)
2700 {
2701 debugInfo.Add(m_rgLayers[i], m_rgcolBottomVecs[i], m_rgcolTopVecs[i]);
2702 }
2703
2704 return debugInfo;
2705 }
2706
2711 public string GetDataSource()
2712 {
2713 foreach (LayerParameter lp in m_param.layer)
2714 {
2715 if (lp.type == LayerParameter.LayerType.DATA)
2716 return lp.data_param.source;
2717 }
2718
2719 return null;
2720 }
2721
2729 public bool ReInitializeParameters(WEIGHT_TARGET target, params string[] rgstrLayers)
2730 {
2731 foreach (Layer<T> layer in m_rgLayers)
2732 {
2733 if (rgstrLayers == null || rgstrLayers.Length == 0 || rgstrLayers.Contains(layer.layer_param.name))
2734 {
2735 layer.ReInitializeParameters(target);
2736 }
2737 }
2738
2739 return true;
2740 }
2741
2748 public Layer<T> FindLayer(LayerParameter.LayerType? type, string strName)
2749 {
2750 if (!type.HasValue && string.IsNullOrEmpty(strName))
2751 throw new Exception("You must specify either a layer type or name, or both.");
2752
2753 foreach (Layer<T> layer in m_rgLayers)
2754 {
2755 bool bTypeMatch = !type.HasValue;
2756 bool bNameMatch = string.IsNullOrEmpty(strName);
2757
2758 if (type.HasValue && layer.type == type.Value)
2759 bTypeMatch = true;
2760
2761 if (!string.IsNullOrEmpty(strName) && layer.layer_param.name == strName)
2762 bNameMatch = true;
2763
2764 if (bTypeMatch && bNameMatch)
2765 return layer;
2766 }
2767
2768 return null;
2769 }
2770
2776 public List<Layer<T>> FindLayers(LayerParameter.LayerType type)
2777 {
2778 List<Layer<T>> rgLayers = new List<Layer<T>>();
2779
2780 foreach (Layer<T> layer in m_rgLayers)
2781 {
2782 if (layer.type == type)
2783 rgLayers.Add(layer);
2784 }
2785
2786 return rgLayers;
2787 }
2788
2795 public Layer<T> FindLayer(string strType, string strName)
2796 {
2798 return FindLayer(type, strName);
2799 }
2800
2807 {
2808 for (int i = m_rgLayers.Count - 1; i >= 0; i--)
2809 {
2810 if (m_rgLayers[i].type == type)
2811 return m_rgLayers[i];
2812 }
2813
2814 return null;
2815 }
2816
2821 {
2822 get { return m_cuda; }
2823 }
2824 }
2825}
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
Definition: CancelEvent.cs:17
The Log class provides general output in text form.
Definition: Log.cs:13
void CHECK(bool b, string str)
Test a flag for true.
Definition: Log.cs:227
bool IsEnabled
Returns whether or not the Log is enabled.
Definition: Log.cs:50
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
Definition: Log.cs:394
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
void CHECK_LE(double df1, double df2, string str)
Test whether one number is less than or equal to another.
Definition: Log.cs:263
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
Definition: Log.cs:287
void CHECK_LT(double df1, double df2, string str)
Test whether one number is less than another.
Definition: Log.cs:275
The Utility class provides general utility funtions.
Definition: Utility.cs:35
The BatchInput class stores the mini-batch index and input data.
Definition: BatchInput.cs:13
The BlobCollection contains a list of Blobs.
void Dispose()
Release all resource used by the collection and its Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void Clear(bool bDispose=false)
Remove all items from the collection.
void CopyFrom(BlobCollection< T > bSrc, bool bCopyDiff=false)
Copy the data or diff from another BlobCollection into this one.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
Blob(CudaDnn< T > cuda, Log log, bool bIncludeDiff=true, bool bUseHalfSize=false)
The Blob constructor.
Definition: Blob.cs:64
object Tag
Returns a user defined object associated with the Blob.
Definition: Blob.cs:2770
string shape_string
Returns a string describing the Blob's shape.
Definition: Blob.cs:657
T asum_diff()
Compute the sum of absolute values (L1 norm) of the diff.
Definition: Blob.cs:1718
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442
void FromProto(BlobProto bp, bool bReshape=true)
Create a new Blob from a given BlobProto.
Definition: Blob.cs:1589
BlobProto ToProto(bool bWriteDiff=false)
Writes the Blob to a new BlobProto.
Definition: Blob.cs:1663
bool freeze_learning
Specifies whether or not the diff is applied to the data during Update. When freeze learning = true,...
Definition: Blob.cs:377
List< int > shape()
Returns an array where each element contains the shape of an axis of the Blob.
Definition: Blob.cs:684
T asum_data()
Compute the sum of absolute values (L1 norm) of the data.
Definition: Blob.cs:1706
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
Definition: Blob.cs:402
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
Definition: Blob.cs:1981
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
void ResetGhostMemory()
Resets the ghost memory by enabling it if this instance was configured to use ghost memory.
Definition: CudaDnn.cs:1783
void FreeMemory(long hMem)
Free previously allocated GPU memory.
Definition: CudaDnn.cs:2517
void SynchronizeDevice()
Synchronize the operations on the current device.
Definition: CudaDnn.cs:2093
long AllocMemory(List< double > rg)
Allocate a block of GPU memory and copy a list of doubles to it.
Definition: CudaDnn.cs:2291
void DisableGhostMemory()
Disables the ghost memory, if enabled.
Definition: CudaDnn.cs:1775
The DebugInformation contains information used to help debug the Layers of a Net while it is training...
string DetectFirstNaN(out string strType)
Searches for the first NaN within any of the Layers.
The GetWorkBlobArgs are passed to the Layer::OnGetWorkBlob event which is supported for debugging onl...
Definition: EventArgs.cs:91
Blob< T > Blob
Specifies the blob.
Definition: EventArgs.cs:105
Connects Layer's together into a direct acrylic graph (DAG) specified by a NetParameter
Definition: Net.cs:23
bool has_layer(string strLayer)
Returns whether or not the Net has a given Layer by its name.
Definition: Net.cs:2281
int num_outputs
Returns the number of outputs.
Definition: Net.cs:2193
List< Layer< T > > layers
Returns the layers.
Definition: Net.cs:2003
int layer_index_by_name(string strLayer)
Returns a Layer's index given its name.
Definition: Net.cs:2315
void Reshape()
Reshape all layers from the bottom to the top.
Definition: Net.cs:1800
double ForwardFromTo(int nStart=0, int nEnd=int.MaxValue)
The FromTo variant of forward and backward operate on the (topological) ordering by which the net is ...
Definition: Net.cs:1402
void CopyInternalBlobsTo(Net< T > dstNet)
Copy the internal blobs from one net to another.
Definition: Net.cs:1699
BlobCollection< T > parameters
Returns the parameters.
Definition: Net.cs:2085
List< bool > layer_need_backward
Returns a collection of items that tell whether each layer nees a backward pass or not.
Definition: Net.cs:2077
BlobCollection< T > Forward()
Run forward with the input Blob's already fed separately.
Definition: Net.cs:1445
Layer< T > FindLayer(string strType, string strName)
Find the layer with the matching type, name and or both.
Definition: Net.cs:2795
List< string > layer_names
Returns the layer names.
Definition: Net.cs:1979
bool EnableBreakOnFirstNaN
Enable/disable break the first NaN functionality where training stops immediately upon detecting a Na...
Definition: Net.cs:845
List< int > input_blob_indices
Returns a list of the input Blob indexes.
Definition: Net.cs:2225
bool EnableDetailedNanDetection
Enable/disable whether or not detailed nans are detected - this will make debugging slower and is onl...
Definition: Net.cs:855
void BackwardDebugInfo(int layer_id)
Helper for displaying debug info in Backward.
Definition: Net.cs:1587
string LabelQueryEpochs
Return the label query epochs for the active datasource.
Definition: Net.cs:955
void SetLearnedParameters(BlobCollection< T > col)
Sets the learned parameters.
Definition: Net.cs:2108
BlobCollection< T > input_blobs
Returns the collection of input Blobs.
Definition: Net.cs:2201
int FindLayerIndexOwningBlob(Blob< T > b)
Finds the index of the Layer that owns a given Blob.
Definition: Net.cs:2576
void SetEnablePassthrough(bool bEnable)
Enables/disables passthrough on each layer of the net.
Definition: Net.cs:996
string ActiveLabelCounts
Returns the active label counts observed during training.
Definition: Net.cs:914
bool has_blob(string strBlobName)
Returns whether or not the Net contains a given Blob.
Definition: Net.cs:2234
void CopyTrainedLayersTo(Net< T > dstNet)
Copies the trained layer of this Net to another Net.
Definition: Net.cs:1714
Layer< T > FindLastLayer(LayerParameter.LayerType type)
Find the last layer with the matching type.
Definition: Net.cs:2806
void set_debug_info(bool bVal)
Sets the debug information flag.
Definition: Net.cs:2330
bool ForwardBackward(BlobCollection< T > colBottom, out double dfLocalLoss, TRAIN_STEP step=TRAIN_STEP.NONE)
Runs a Forward pass followed by a Backward pass.
Definition: Net.cs:1943
string BoostQueryHitPercents
Return the boost query hit percentages for the active datasource.
Definition: Net.cs:974
void CopyTrainedLayersFrom(NetParameter param)
For an already initialized net, CopyTrainedLayersFrom copies the already trained layers from another ...
Definition: Net.cs:1814
void ShareWeights()
Shares weight data of owner blobs with shared blobs.
Definition: Net.cs:1923
NetParameter InsertSplits(NetParameter param)
Create a new NetParameter and insert splits into it based on a given NetParameter.
Definition: Net.cs:2347
void CopyTrainedLayersTo(Net< T > dstNet, DictionaryEx< string, string > rgLayerNames, bool bTranspose)
Copies the trained layers of this Net to another Net.
Definition: Net.cs:1754
Layer< T > layer_by_name(string strLayer, bool bThrowExceptionOnError=true)
Returns a Layer given its name.
Definition: Net.cs:2292
void Backward(int nStart=int.MaxValue, int nEnd=0)
The network backward should take no input and output, since it solely computes the gradient w....
Definition: Net.cs:1499
int AppendBottom(NetParameter param, int layer_id, int bottom_id, List< string > available_blobs, DictionaryEx< string, int > blob_name_to_idx)
Append a new bottom blob to the net.
Definition: Net.cs:1272
Phase phase
Returns the network phase: TRAIN or TEST
Definition: Net.cs:2011
string GetDataSource()
Returns the data source used by the network.
Definition: Net.cs:2711
BlobCollection< T > Forward(out double dfLoss)
Run forward with the input Blob's already fed separately.
Definition: Net.cs:1459
bool EnableLayerDebugging
Enable/disable layer debugging which causes each layer to check for NAN/INF on each forward/backward ...
Definition: Net.cs:867
void RestorePhase()
Restore the network phase to its original state.
Definition: Net.cs:831
DebugInformation< T > GetDebugInformation(bool bDetectNans)
Returns the DebugInformation for the Net.
Definition: Net.cs:2692
BlobCollection< T > FindBottomBlobsOfLayer(string strLayer)
Returns the collection of bottom blobs for a given layer.
Definition: Net.cs:2623
virtual void Dispose(bool bDisposing)
Releases all resources (GPU and Host) used by the Net.
Definition: Net.cs:184
List< double?> params_lr
Returns the learnable parameter learning rate multipliers.
Definition: Net.cs:2125
void LoadWeights(byte[] rgWeights, IXPersist< T > persist, List< string > inputWtInfo=null, List< string > targetWtInfo=null, string strSkipBlobType=null)
Loads new weights into the Net.
Definition: Net.cs:2510
int num_inputs
Returns the number of inputs.
Definition: Net.cs:2185
NetParameter ToProto(bool bIncludeBlobs)
Writes the net to a proto.
Definition: Net.cs:1865
void AppendTop(NetParameter param, int layer_id, int top_id, List< string > available_blobs, DictionaryEx< string, int > blob_name_to_idx)
Append a new input or top blob to the net.
Definition: Net.cs:1154
Blob< T > FindBlob(string strName)
Finds a Blob in the Net by name.
Definition: Net.cs:2592
bool StateMeetsRule(NetState state, NetStateRule rule, string strLayer)
Returns whether NetState state meets NetStateRule rule.
Definition: Net.cs:1062
byte[] SaveWeights(IXPersist< T > persist, bool bSaveDiff=false)
Save the weights to a byte array.
Definition: Net.cs:2541
Layer< T > FindLayer(LayerParameter.LayerType? type, string strName)
Find the layer with the matching type, name and or both.
Definition: Net.cs:2748
BlobCollection< T > output_blobs
Returns the collection of output Blobs.
Definition: Net.cs:2209
List< string > blob_names
Returns the blob names.
Definition: Net.cs:1987
void ClearParamDiffs()
Zero out the diffs of all netw parameters. This should be run before Backward.
Definition: Net.cs:1907
Blob< T > param_by_name(string strName, bool bThrowExceptionOnError=true)
Returns a parameter given its name.
Definition: Net.cs:2167
EventHandler< WorkspaceArgs > OnSetWorkspace
Specifies the OnSetWorkspace event that fires when the setWorkspace() function is called by a layer t...
Definition: Net.cs:132
Phase SetPhase(Phase phase)
Change the phase of the network.
Definition: Net.cs:814
List< List< bool > > bottom_need_backward
Returns the collection of lists that tell whether or not the bottom of each layer needs a backward pa...
Definition: Net.cs:2061
void InputDebugInfo(int input_id)
Helper for displaying debug info in Forward about input blobs.
Definition: Net.cs:1544
BlobCollection< T > learnable_parameters
Returns the learnable parameters.
Definition: Net.cs:2117
BlobCollection< T > layer_blobs(string strLayerName)
Returns the collection of Blobs internal to a Layer.
Definition: Net.cs:2094
void AppendParam(NetParameter param, int layer_id, int param_id)
Append a new parameter blob to the net.
Definition: Net.cs:1303
DictionaryEx< string, int > param_names_index
Returns the dictionary look for parameter names to their indexes.
Definition: Net.cs:2141
Blob< T > FindLossBottomBlob()
Find the bottom blob of the Loss layer if it exists, otherwise null is returned.
Definition: Net.cs:2664
List< double > blob_loss_weights
Returns the collection of blob loss weights.
Definition: Net.cs:2069
NetParameter net_param
Returns the net parameter.
Definition: Net.cs:1857
Net(CudaDnn< T > cuda, Log log, NetParameter p, CancelEvent evtCancel, IXDatabaseBase db, Phase phaseOverride=Phase.NONE, AutoResetEvent evtTrainingCompleted=null, Net< T > sharedNet=null, onGetWorkspace getws=null, onSetWorkspace setws=null)
The Net constructor.
Definition: Net.cs:161
void Dispose()
Releases all resources (GPU and Host) used by the Net.
Definition: Net.cs:258
Blob< T > blob_by_name(string strName, bool bThrowExceptionOnError=true)
Returns a blob given its name.
Definition: Net.cs:2245
string name
Returns the network name.
Definition: Net.cs:1971
void ShareTrainedLayersWith(Net< T > srcNet, bool bEnableLog=false)
For an already initialized net, implicitly compies (i.e., using no additional memory) the pre-trained...
Definition: Net.cs:1653
BlobCollection< T > blobs
Returns the blobs.
Definition: Net.cs:1995
BlobCollection< T > Forward(BlobCollection< T > colBottom, out double dfLoss, bool bReshape=false)
Run forward using a set of bottom blobs and return the result.
Definition: Net.cs:1472
int blob_index_by_name(string strName)
Returns the index of a blob given its name.
Definition: Net.cs:2268
string LabelQueryHitPercents
Return the label query hit percentages for the active datasource.
Definition: Net.cs:936
List< int > bottom_ids(int layer_id)
Returns the ids of the bottom blobs of layer i.
Definition: Net.cs:2050
bool ReInitializeParameters(WEIGHT_TARGET target, params string[] rgstrLayers)
Re-initializes the blobs and each of the specified layers by re-running the filler (if any) specified...
Definition: Net.cs:2729
EventHandler< GetIterationArgs > OnGetIteration
Specifies the OnGetIteration event that fires when a layer needs to get the current iteration from th...
Definition: Net.cs:136
Layer< T > FindLayerOwningBlob(Blob< T > b)
Finds the Layer that owns a given Blob.
Definition: Net.cs:2560
void UpdateDebugInfo(int param_id)
Helper for displaying debug info in Update.
Definition: Net.cs:1622
List< Layer< T > > FindLayers(LayerParameter.LayerType type)
Find the layers with the matching type.
Definition: Net.cs:2776
List< int > top_ids(int layer_id)
Returns the ids of the top blobs of layer i.
Definition: Net.cs:2038
List< double?> params_weight_decay
Returns the learnable parameter decay multipliers.
Definition: Net.cs:2133
EventHandler< WorkspaceArgs > OnGetWorkspace
Specifies the OnGetWorkspace event that fires when the getWorkspace() function is called by a layer t...
Definition: Net.cs:128
void ForwardDebugInfo(int layer_id)
Helper for displaying debug info in Forward.
Definition: Net.cs:1558
void Update()
Updates the network weights based on the diff values computed.
Definition: Net.cs:1896
NetParameter FilterNet(NetParameter param)
Removes layers that the user specified should be excluded given the current phase,...
Definition: Net.cs:1020
List< BlobCollection< T > > top_vecs
Returns the top vecs for each layer – usually y ou won't need this unless you do per-layer checks suc...
Definition: Net.cs:2029
BlobCollection< T > FindTopBlobsOfLayer(string strLayer)
Returns the collection of top blobs for a given layer.
Definition: Net.cs:2644
List< int > param_owners
Returns the list of parameter owner indexes.
Definition: Net.cs:2149
List< int > output_blob_indices
Returns a list of the output Blob indexes.
Definition: Net.cs:2217
List< BlobCollection< T > > bottom_vecs
Returns the bottom vecs for each layer – usually you won't need this unless you do per-layer checks s...
Definition: Net.cs:2020
void Init(NetParameter p, Phase phaseOverride=Phase.NONE, AutoResetEvent evtTrainingCompleted=null)
Initialize a network with a NetParameter.
Definition: Net.cs:287
List< string > param_display_names
Returns the list of parameter display names.
Definition: Net.cs:2157
CudaDnn< T > Cuda
Returns the instance of CudaDnn used by this network.
Definition: Net.cs:2821
The TransferInput class is used to transfer get and set input data.
The DataLayer loads data from the IXImageDatabase database. This layer is initialized with the MyCaff...
Definition: DataLayer.cs:24
/b DEPRECIATED (use DataLayer DataLabelMappingParameter instead) The LabelMappingLayer converts origi...
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
EventHandler< WorkspaceArgs > OnGetWorkspace
Specifies the OnGetWorkspace event that fires when the getWorkspace() function is called by a layer t...
Definition: Layer.cs:124
virtual void SetOnDebug(EventHandler< GetWorkBlobArgs< T > > fn)
Set the OnDebug event.
Definition: Layer.cs:370
virtual bool AutoTopBlobs
Return whether "anonymous" top (output) Blobs are created automatically by the Layer.
Definition: Layer.cs:1031
double loss(int nTopIdx)
Returns the scalar loss associated with the top Blob at a given index.
Definition: Layer.cs:908
void SetEnablePassthrough(bool bEnable)
Enables/disables the pass-through mode.
Definition: Layer.cs:1276
virtual bool ReInitializeParameters(WEIGHT_TARGET target)
Re-initialize the parameters of the layer.
Definition: Layer.cs:389
EventHandler< GetIterationArgs > OnGetIteration
Specifies the OnGetIteration event that fires when a layer needs to get the current iteration from th...
Definition: Layer.cs:132
LayerParameter.? LayerType parent_layer_type
Optionally, specifies the parent layer type (e.g. LOSS, etc.)
Definition: Layer.cs:248
LayerParameter.LayerType type
Returns the LayerType of this Layer.
Definition: Layer.cs:927
virtual int ExactNumTopBlobs
Returns the exact number of top (output) Blobs required by the Layer, or -1 if no exact number is req...
Definition: Layer.cs:979
void Dispose()
Releases all GPU and host resources used by the Layer.
Definition: Layer.cs:180
virtual int MinTopBlobs
Returns the minimum number of top (output) Blobs required by the Layer, or -1 if no minimum number is...
Definition: Layer.cs:992
virtual void ResetOnDebug(EventHandler< GetWorkBlobArgs< T > > fn)
Reset the OnDebug event, disabling it.
Definition: Layer.cs:379
EventHandler< WorkspaceArgs > OnSetWorkspace
Specifies the OnSetWorkspace event that fires when the setWorkspace() function is called by a layer t...
Definition: Layer.cs:128
static Layer< T > Create(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel, IXDatabaseBase db=null, TransferInput trxinput=null)
Create a new Layer based on the LayerParameter.
Definition: Layer.cs:1468
LayerParameter layer_param
Returns the LayerParameter for this Layer.
Definition: Layer.cs:899
virtual void ConnectLoss(LossLayer< T > layer)
Called to connect the loss OnLoss event to a specified layer (typically the data layer).
Definition: Layer.cs:240
BlobCollection< T > blobs
Returns the collection of learnable parameter Blobs for the Layer.
Definition: Layer.cs:875
BlobCollection< T > internal_blobs
Returns the collection of internal Blobs used by the Layer.
Definition: Layer.cs:883
The LayerParameterEx class is used when sharing another Net to conserve GPU memory and extends the La...
Definition: Layer.cs:1750
The LossLayer provides an interface for Layer's that take two blobs as input – usually (1) prediction...
Definition: LossLayer.cs:23
string synchronize_with
(optional, default = null) Specifies a secondary (target) dataset to syncrhonize with.
string source
When used with the DATA parameter, specifies the data 'source' within the database....
Specifies whether to use the NVIDIA cuDnn version or Caffe version of a given forward/backward operat...
Engine
Defines the type of engine to use.
Specifies the base parameter for all layers.
List< ParamSpec > parameters
Specifies the ParamSpec parameters of the LayerParameter.
string name
Specifies the name of this LayerParameter.
List< double > loss_weight
Specifies the loss weight.
LayerType type
Specifies the type of this LayerParameter.
List< bool > propagate_down
Specifies whether or not the LayerParameter (or protions of) should be backpropagated.
List< NetStateRule > include
Specifies the NetStateRule's for which this LayerParameter should be included.
int GetParameterCount()
Returns the number of ParamSpec parameters used by the layer.
List< NetStateRule > exclude
Specifies the NetStateRule's for which this LayerParameter should be excluded.
List< string > top
Specifies the active top connections (in the bottom, out the top)
int solver_count
Returns the number of Solvers participating in a multi-GPU session for which the Solver using this La...
int solver_rank
Returns the SolverRank of the Solver using this LayerParameter (if any).
DataParameter data_param
Returns the parameter set when initialized with LayerType.DATA
List< string > bottom
Specifies the active bottom connections (in the bottom, out the top).
LayerType
Specifies the layer type.
override string ToString()
Returns a string representation of the LayerParameter.
static ? LayerType GetType(string strType)
Converts the string type into a LayerType, or null if no match is found.
List< BlobProto > blobs
Specifies the blobs of the LayerParameter.
virtual LayerParameter Clone(bool bCloneBlobs)
Creates a new copy of this instance of the parameter.
Specifies the parameters use to create a Net
Definition: NetParameter.cs:18
NetState state
The current 'state' of the network, including the phase, level and stage. Some layers may be included...
List< string > input
The input blobs to the network.
bool force_backward
Whether the network will force every layer to carry out backward operation. If set False,...
string DebugString()
Returns a debug string for the network.
List< int > input_dim
DEPRECIATED - 4D input dimensions - use 'input_shape' instead. If specified, for each input blob ther...
string name
The name of the network.
Definition: NetParameter.cs:90
List< LayerParameter > layer
The layers that make up the net. Each of their configurations, including connectivity and behavior,...
NetParameter Clone(bool bCloneLayers=true, int? nSolverCount=null, int? nSolverRank=null)
Creates a new copy of this instance of the parameter.
List< BlobShape > input_shape
The shape of the input blobs.
Specifies the NetState which includes the phase, level and stage for which a given Net is to run unde...
Definition: NetState.cs:19
int level
Specifies the level of the NetState.
Definition: NetState.cs:73
Phase phase
Specifies the Phase of the NetState.
Definition: NetState.cs:63
List< string > stage
Specifies the stages of the NetState.
Definition: NetState.cs:83
Specifies a NetStateRule used to determine whether a Net falls within a given include or exclude patt...
Definition: NetStateRule.cs:20
List< string > stage
Customizable sets of stages to include. The net must have ALL of the specified stages and NONE of the...
int? min_level
Set the minimum levels in which the layer should be used. Leave undefined to meet the rule regardless...
Phase phase
Set phase to require the NetState to have a particular phase (TRAIN or TEST) to meet this rule.
Definition: NetStateRule.cs:99
int? max_level
Set the maximum levels in which the layer should be used. Leave undefined to meet the rule regardless...
List< string > not_stage
Customizable sets of stages to exclude. The net must have ALL of the specified stages and NONE of the...
Specifies training parameters (multipliers on global learning constants, and the name of other settin...
Definition: ParamSpec.cs:19
double decay_mult
Specifies the multiplier used on the global weight decay for this parameter.
Definition: ParamSpec.cs:168
DimCheckMode
Defines the dimension check mode.
Definition: ParamSpec.cs:37
double lr_mult
Specifies the multiplier used on the global learning rate for this parameter.
Definition: ParamSpec.cs:158
The IXDatabaseBase interface defines the general interface to the in-memory database.
Definition: Interfaces.cs:444
DB_VERSION GetVersion()
Returns the version of the MyCaffe Image Database being used.
The IXImageDatabaseBase interface defines the general interface to the in-memory image database.
Definition: Interfaces.cs:878
The IXPersist interface is used by the CaffeControl to load and save weights.
Definition: Interfaces.cs:187
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
Phase
Defines the Phase under which to run a Net.
Definition: Interfaces.cs:61
DB_VERSION
Defines the image database version to use.
Definition: Interfaces.cs:397
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
delegate void onSetWorkspace(object sender, WorkspaceArgs e)
Delegate used to set the OnSetworkspace event.
delegate void onGetWorkspace(object sender, WorkspaceArgs e)
Delegate used to set the OnGetworkspace event.
TRAIN_STEP
Defines the training stepping method (if any).
Definition: Interfaces.cs:131
WEIGHT_TARGET
Defines the type of weight to target in re-initializations.
Definition: Interfaces.cs:38
The MyCaffe.data namespace contains dataset creators used to create common testing datasets such as M...
Definition: BinaryFile.cs:16
The MyCaffe.db.image namespace contains all image database related classes.
Definition: Database.cs:18
The MyCaffe.fillers namespace contains all fillers including the Filler class.
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12