MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
TrainerNoisyDqn.cs
1using MyCaffe;
2using MyCaffe.basecode;
3using MyCaffe.common;
4using MyCaffe.data;
5using MyCaffe.layers;
6using MyCaffe.param;
7using MyCaffe.solvers;
8using MyCaffe.trainers;
10using System;
11using System.Collections;
12using System.Collections.Generic;
13using System.Diagnostics;
14using System.Drawing;
15using System.IO;
16using System.Linq;
17using System.Text;
18using System.Threading.Tasks;
19
21{
36 public class TrainerNoisyDqn<T> : IxTrainerRL, IDisposable
37 {
38 IxTrainerCallback m_icallback;
39 CryptoRandom m_random = new CryptoRandom();
40 MyCaffeControl<T> m_mycaffe;
41 PropertySet m_properties;
42
50 public TrainerNoisyDqn(MyCaffeControl<T> mycaffe, PropertySet properties, CryptoRandom random, IxTrainerCallback icallback)
51 {
52 m_icallback = icallback;
53 m_mycaffe = mycaffe;
54 m_properties = properties;
55 m_random = random;
56 }
57
61 public void Dispose()
62 {
63 }
64
69 public bool Initialize()
70 {
71 m_mycaffe.CancelEvent.Reset();
72 m_icallback.OnInitialize(new InitializeArgs(m_mycaffe));
73 return true;
74 }
75
81 public bool Shutdown(int nWait)
82 {
83 if (m_mycaffe != null)
84 {
85 m_mycaffe.CancelEvent.Set();
86 wait(nWait);
87 }
88
89 m_icallback.OnShutdown();
90
91 return true;
92 }
93
94 private void wait(int nWait)
95 {
96 int nWaitInc = 250;
97 int nTotalWait = 0;
98
99 while (nTotalWait < nWait)
100 {
101 m_icallback.OnWait(new WaitArgs(nWaitInc));
102 nTotalWait += nWaitInc;
103 }
104 }
105
111 public ResultCollection RunOne(int nDelay = 1000)
112 {
113 m_mycaffe.CancelEvent.Reset();
114 DqnAgent<T> agent = new DqnAgent<T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.TRAIN);
115 agent.Run(Phase.TEST, 1, ITERATOR_TYPE.ITERATION, TRAIN_STEP.NONE);
116 agent.Dispose();
117 return null;
118 }
119
127 public byte[] Run(int nN, PropertySet runProp, out string type)
128 {
129 m_mycaffe.CancelEvent.Reset();
130 DqnAgent<T> agent = new DqnAgent<T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.RUN);
131 byte[] rgResults = agent.Run(nN, out type);
132 agent.Dispose();
133
134 return rgResults;
135 }
136
143 public bool Test(int nN, ITERATOR_TYPE type)
144 {
145 int nDelay = 1000;
146 string strProp = m_properties.ToString();
147
148 // Turn off the num-skip to run at normal speed.
149 strProp += "EnableNumSkip=False;";
150 PropertySet properties = new PropertySet(strProp);
151
152 m_mycaffe.CancelEvent.Reset();
153 DqnAgent<T> agent = new DqnAgent<T>(m_icallback, m_mycaffe, properties, m_random, Phase.TRAIN);
154 agent.Run(Phase.TEST, nN, type, TRAIN_STEP.NONE);
155
156 agent.Dispose();
157 Shutdown(nDelay);
158
159 return true;
160 }
161
169 public bool Train(int nN, ITERATOR_TYPE type, TRAIN_STEP step)
170 {
171 m_mycaffe.CancelEvent.Reset();
172 DqnAgent<T> agent = new DqnAgent<T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.TRAIN);
173 agent.Run(Phase.TRAIN, nN, type, step);
174 agent.Dispose();
175
176 return false;
177 }
178 }
179
180
185 class DqnAgent<T> : IDisposable
186 {
187 IxTrainerCallback m_icallback;
188 Brain<T> m_brain;
189 PropertySet m_properties;
190 CryptoRandom m_random;
191 float m_fGamma = 0.99f;
192 bool m_bUseRawInput = true;
193 double m_dfBetaStart = 0.4;
194 int m_nBetaFrames = 1000;
195 int m_nMemorySize = 10000;
196 float m_fPriorityAlpha = 0.6f;
197 int m_nUpdateTargetFreq = 1000;
198 MEMTYPE m_memType = MEMTYPE.PRIORITY;
199
200
209 public DqnAgent(IxTrainerCallback icallback, MyCaffeControl<T> mycaffe, PropertySet properties, CryptoRandom random, Phase phase)
210 {
211 m_icallback = icallback;
212 m_brain = new Brain<T>(mycaffe, properties, random, phase);
213 m_properties = properties;
214 m_random = random;
215
216 m_fGamma = (float)properties.GetPropertyAsDouble("Gamma", m_fGamma);
217 m_bUseRawInput = properties.GetPropertyAsBool("UseRawInput", m_bUseRawInput);
218 }
219
223 public void Dispose()
224 {
225 if (m_brain != null)
226 {
227 m_brain.Dispose();
228 m_brain = null;
229 }
230 }
231
232 private double beta_by_frame(int nFrameIdx)
233 {
234 return Math.Min(1.0, m_dfBetaStart + nFrameIdx * (1.0 - m_dfBetaStart) / m_nBetaFrames);
235 }
236
237 private StateBase getData(Phase phase, int nAction, int nIdx)
238 {
239 GetDataArgs args = m_brain.getDataArgs(phase, nAction);
240 m_icallback.OnGetData(args);
241 args.State.Data.Index = nIdx;
242 return args.State;
243 }
244
245 private void updateStatus(int nIteration, int nEpisodeCount, double dfRewardSum, double dfRunningReward, double dfLoss, double dfLearningRate, bool bModelUpdated)
246 {
247 GetStatusArgs args = new GetStatusArgs(0, nIteration, nEpisodeCount, 1000000, dfRunningReward, dfRewardSum, 0, 0, dfLoss, dfLearningRate, bModelUpdated);
248 m_icallback.OnUpdateStatus(args);
249 }
250
257 public byte[] Run(int nIterations, out string type)
258 {
259 IxTrainerCallbackRNN icallback = m_icallback as IxTrainerCallbackRNN;
260 if (icallback == null)
261 throw new Exception("The Run method requires an IxTrainerCallbackRNN interface to convert the results into the native format!");
262
263 StateBase s = getData(Phase.RUN, -1, 0);
264 int nIteration = 0;
265 List<float> rgResults = new List<float>();
266 bool bDifferent;
267
268 while (!m_brain.Cancel.WaitOne(0) && (nIterations == -1 || nIteration < nIterations))
269 {
270 // Preprocess the observation.
271 SimpleDatum x = m_brain.Preprocess(s, m_bUseRawInput, out bDifferent);
272
273 // Forward the policy network and sample an action.
274 int action = m_brain.act(x, s.Clip, s.ActionCount);
275
276 rgResults.Add(s.Data.TimeStamp.ToFileTime());
277 rgResults.Add(s.Data.GetDataAtF(0));
278 rgResults.Add(action);
279
280 nIteration++;
281
282 // Take the next step using the action
283 s = getData(Phase.RUN, action, nIteration);
284 }
285
286 ConvertOutputArgs args = new ConvertOutputArgs(nIterations, rgResults.ToArray());
287 icallback.OnConvertOutput(args);
288
289 type = args.RawType;
290 return args.RawOutput;
291 }
292
293 private bool isAtIteration(int nN, ITERATOR_TYPE type, int nIteration, int nEpisode)
294 {
295 if (nN == -1)
296 return false;
297
298 if (type == ITERATOR_TYPE.EPISODE)
299 {
300 if (nEpisode < nN)
301 return false;
302
303 return true;
304 }
305 else
306 {
307 if (nIteration < nN)
308 return false;
309
310 return true;
311 }
312 }
313
325 public void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step)
326 {
327 IMemoryCollection iMemory = MemoryCollectionFactory.CreateMemory(m_memType, m_nMemorySize, m_fPriorityAlpha);
328 int nIteration = 1;
329 double dfRunningReward = 0;
330 double dfEpisodeReward = 0;
331 int nEpisode = 0;
332 bool bDifferent = false;
333
334 StateBase state = getData(phase, -1, -1);
335 // Preprocess the observation.
336 SimpleDatum x = m_brain.Preprocess(state, m_bUseRawInput, out bDifferent, true);
337
338 // Set the initial target model to the current model.
339 m_brain.UpdateTargetModel();
340
341 while (!m_brain.Cancel.WaitOne(0) && !isAtIteration(nN, type, nIteration, nEpisode))
342 {
343 // Forward the policy network and sample an action.
344 int action = m_brain.act(x, state.Clip, state.ActionCount);
345
346 // Take the next step using the action
347 StateBase state_next = getData(phase, action, nIteration);
348
349 // Preprocess the next observation.
350 SimpleDatum x_next = m_brain.Preprocess(state_next, m_bUseRawInput, out bDifferent);
351 if (!bDifferent)
352 m_brain.Log.WriteLine("WARNING: The current state is the same as the previous state!");
353
354 // Build up episode memory, using reward for taking the action.
355 iMemory.Add(new MemoryItem(state, x, action, state_next, x_next, state_next.Reward, state_next.Done, nIteration, nEpisode));
356 dfEpisodeReward += state_next.Reward;
357
358 // Do the training
359 if (iMemory.Count > m_brain.BatchSize)
360 {
361 double dfBeta = beta_by_frame(nIteration);
362 MemoryCollection rgSamples = iMemory.GetSamples(m_random, m_brain.BatchSize, dfBeta);
363 m_brain.Train(nIteration, rgSamples, state.ActionCount);
364 iMemory.Update(rgSamples);
365
366 if (nIteration % m_nUpdateTargetFreq == 0)
367 m_brain.UpdateTargetModel();
368 }
369
370 if (state_next.Done)
371 {
372 // Update reward running
373 dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01;
374
375 nEpisode++;
376 updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward, 0, 0, m_brain.GetModelUpdated());
377
378 state = getData(phase, -1, -1);
379 x = m_brain.Preprocess(state, m_bUseRawInput, out bDifferent, true);
380 dfEpisodeReward = 0;
381 }
382 else
383 {
384 state = state_next;
385 x = x_next;
386 }
387
388 nIteration++;
389 }
390
391 iMemory.CleanUp();
392 }
393 }
394
399 class Brain<T> : IDisposable, IxTrainerGetDataCallback
400 {
401 MyCaffeControl<T> m_mycaffe;
402 Solver<T> m_solver;
403 Net<T> m_netOutput;
404 Net<T> m_netTarget;
405 PropertySet m_properties;
406 CryptoRandom m_random;
407 SimpleDatum m_sdLast = null;
408 DataTransformer<T> m_transformer;
409 MemoryLossLayer<T> m_memLoss;
410 Blob<T> m_blobActions = null;
411 Blob<T> m_blobQValue = null;
412 Blob<T> m_blobNextQValue = null;
413 Blob<T> m_blobExpectedQValue = null;
414 Blob<T> m_blobDone = null;
415 Blob<T> m_blobLoss = null;
416 Blob<T> m_blobWeights = null;
417 BlobCollection<T> m_colAccumulatedGradients = new BlobCollection<T>();
418 bool m_bUseAcceleratedTraining = false;
419 double m_dfLearningRate;
420 int m_nMiniBatch = 1;
421 float m_fGamma = 0.99f;
422 int m_nBatchSize = 32;
423 MemoryCollection m_rgSamples;
424 int m_nActionCount = 2;
425 bool m_bModelUpdated = false;
426 Font m_font = null;
427 Dictionary<Color, Tuple<Brush, Brush, Pen, Brush>> m_rgStyle = new Dictionary<Color, Tuple<Brush, Brush, Pen, Brush>>();
428 List<SimpleDatum> m_rgX = new List<SimpleDatum>();
429 float[] m_rgOverlay = null;
430
431
439 public Brain(MyCaffeControl<T> mycaffe, PropertySet properties, CryptoRandom random, Phase phase)
440 {
441 m_mycaffe = mycaffe;
442 m_solver = mycaffe.GetInternalSolver();
443 m_netOutput = mycaffe.GetInternalNet(phase);
444 m_netTarget = new Net<T>(m_mycaffe.Cuda, m_mycaffe.Log, m_netOutput.net_param, m_mycaffe.CancelEvent, null, phase);
445 m_properties = properties;
446 m_random = random;
447
448 Blob<T> data = m_netOutput.blob_by_name("data");
449 if (data == null)
450 m_mycaffe.Log.FAIL("Missing the expected input 'data' blob!");
451
452 m_nBatchSize = data.num;
453
454 Blob<T> logits = m_netOutput.blob_by_name("logits");
455 if (logits == null)
456 m_mycaffe.Log.FAIL("Missing the expected input 'logits' blob!");
457
458 m_nActionCount = logits.channels;
459
460 m_transformer = m_mycaffe.DataTransformer;
461 if (m_transformer == null)
462 {
464 int nC = m_mycaffe.CurrentProject.Dataset.TrainingSource.Channels;
465 int nH = m_mycaffe.CurrentProject.Dataset.TrainingSource.Height;
466 int nW = m_mycaffe.CurrentProject.Dataset.TrainingSource.Width;
467 m_transformer = new DataTransformer<T>(m_mycaffe.Cuda, m_mycaffe.Log, trans_param, phase, nC, nH, nW);
468 }
469 m_blobActions = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log, false);
470 m_blobQValue = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log);
471 m_blobNextQValue = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log);
472 m_blobExpectedQValue = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log);
473 m_blobDone = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log, false);
474 m_blobLoss = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log);
475 m_blobWeights = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log, false);
476
477 m_fGamma = (float)properties.GetPropertyAsDouble("Gamma", m_fGamma);
478
479 m_memLoss = m_netOutput.FindLastLayer(LayerParameter.LayerType.MEMORY_LOSS) as MemoryLossLayer<T>;
480 if (m_memLoss == null)
481 m_mycaffe.Log.FAIL("Missing the expected MEMORY_LOSS layer!");
482
483 double? dfRate = mycaffe.CurrentProject.GetSolverSettingAsNumeric("base_lr");
484 if (dfRate.HasValue)
485 m_dfLearningRate = dfRate.Value;
486
487 m_nMiniBatch = m_properties.GetPropertyAsInt("MiniBatch", m_nMiniBatch);
488 m_bUseAcceleratedTraining = properties.GetPropertyAsBool("UseAcceleratedTraining", false);
489
490 if (m_nMiniBatch > 1)
491 {
492 m_colAccumulatedGradients = m_netOutput.learnable_parameters.Clone();
493 m_colAccumulatedGradients.SetDiff(0);
494 }
495 }
496
497 private void dispose(ref Blob<T> b)
498 {
499 if (b != null)
500 {
501 b.Dispose();
502 b = null;
503 }
504 }
505
509 public void Dispose()
510 {
511 dispose(ref m_blobActions);
512 dispose(ref m_blobQValue);
513 dispose(ref m_blobNextQValue);
514 dispose(ref m_blobExpectedQValue);
515 dispose(ref m_blobDone);
516 dispose(ref m_blobLoss);
517 dispose(ref m_blobWeights);
518
519 if (m_colAccumulatedGradients != null)
520 {
521 m_colAccumulatedGradients.Dispose();
522 m_colAccumulatedGradients = null;
523 }
524
525 if (m_netTarget != null)
526 {
527 m_netTarget.Dispose();
528 m_netTarget = null;
529 }
530
531 if (m_font != null)
532 {
533 m_font.Dispose();
534 m_font = null;
535 }
536
537 foreach (KeyValuePair<Color, Tuple<Brush, Brush, Pen, Brush>> kv in m_rgStyle)
538 {
539 kv.Value.Item1.Dispose();
540 kv.Value.Item2.Dispose();
541 kv.Value.Item3.Dispose();
542 kv.Value.Item4.Dispose();
543 }
544
545 m_rgStyle.Clear();
546 }
547
554 public GetDataArgs getDataArgs(Phase phase, int nAction)
555 {
556 bool bReset = (nAction == -1) ? true : false;
557 return new GetDataArgs(phase, 0, m_mycaffe, m_mycaffe.Log, m_mycaffe.CancelEvent, bReset, nAction, true, false, false, this);
558 }
559
563 public int BatchSize
564 {
565 get { return m_nBatchSize; }
566 }
567
571 public Log Log
572 {
573 get { return m_mycaffe.Log; }
574 }
575
580 {
581 get { return m_mycaffe.CancelEvent; }
582 }
583
592 public SimpleDatum Preprocess(StateBase s, bool bUseRawInput, out bool bDifferent, bool bReset = false)
593 {
594 bDifferent = false;
595
596 SimpleDatum sd = new SimpleDatum(s.Data, true);
597
598 if (!bUseRawInput)
599 {
600 if (bReset)
601 m_sdLast = null;
602
603 if (m_sdLast == null)
604 sd.Zero();
605 else
606 bDifferent = sd.Sub(m_sdLast);
607
608 m_sdLast = new SimpleDatum(s.Data, true);
609 }
610 else
611 {
612 bDifferent = true;
613 }
614
615 sd.Tag = bReset;
616
617 return sd;
618 }
619
628 public int act(SimpleDatum sd, SimpleDatum sdClip, int nActionCount)
629 {
630 setData(m_netOutput, sd, sdClip);
631 m_netOutput.ForwardFromTo(0, m_netOutput.layers.Count - 2);
632
633 Blob<T> output = m_netOutput.blob_by_name("logits");
634 if (output == null)
635 throw new Exception("Missing expected 'logits' blob!");
636
637 // Choose greedy action
638 return argmax(Utility.ConvertVecF<T>(output.mutable_cpu_data));
639 }
640
645 public bool GetModelUpdated()
646 {
647 bool bModelUpdated = m_bModelUpdated;
648 m_bModelUpdated = false;
649 return bModelUpdated;
650 }
651
655 public void UpdateTargetModel()
656 {
657 m_mycaffe.Log.Enable = false;
658 m_netOutput.CopyTrainedLayersTo(m_netTarget);
659 m_netOutput.CopyInternalBlobsTo(m_netTarget);
660 m_mycaffe.Log.Enable = true;
661 m_bModelUpdated = true;
662 }
663
670 public void Train(int nIteration, MemoryCollection rgSamples, int nActionCount)
671 {
672 m_rgSamples = rgSamples;
673
674 if (m_nActionCount != nActionCount)
675 throw new Exception("The logit output of '" + m_nActionCount.ToString() + "' does not match the action count of '" + nActionCount.ToString() + "'!");
676
677 // Get next_q_values
678 m_mycaffe.Log.Enable = false;
679 setNextStateData(m_netTarget, rgSamples);
680 m_netTarget.ForwardFromTo(0, m_netTarget.layers.Count - 2);
681
682 setCurrentStateData(m_netOutput, rgSamples);
683 m_memLoss.OnGetLoss += m_memLoss_ComputeTdLoss;
684
685 if (m_nMiniBatch == 1)
686 {
687 m_solver.Step(1);
688 }
689 else
690 {
691 m_solver.Step(1, TRAIN_STEP.NONE, true, m_bUseAcceleratedTraining, true, true);
692 m_colAccumulatedGradients.Accumulate(m_mycaffe.Cuda, m_netOutput.learnable_parameters, true);
693
694 if (nIteration % m_nMiniBatch == 0)
695 {
696 m_netOutput.learnable_parameters.CopyFrom(m_colAccumulatedGradients, true);
697 m_colAccumulatedGradients.SetDiff(0);
698 m_dfLearningRate = m_solver.ApplyUpdate(nIteration);
699 m_netOutput.ClearParamDiffs();
700 }
701 }
702
703 m_memLoss.OnGetLoss -= m_memLoss_ComputeTdLoss;
704 m_mycaffe.Log.Enable = true;
705
706 resetNoise(m_netOutput);
707 resetNoise(m_netTarget);
708 }
709
715 private void m_memLoss_ComputeTdLoss(object sender, MemoryLossLayerGetLossArgs<T> e)
716 {
717 MemoryCollection rgMem = m_rgSamples;
718
719 Blob<T> q_values = m_netOutput.blob_by_name("logits");
720 Blob<T> next_q_values = m_netTarget.blob_by_name("logits");
721
722 float[] rgActions = rgMem.GetActionsAsOneHotVector(m_nActionCount);
723 m_blobActions.ReshapeLike(q_values);
724 m_blobActions.mutable_cpu_data = Utility.ConvertVec<T>(rgActions);
725 m_blobQValue.ReshapeLike(q_values);
726
727 // q_value = q_values.gather(1, action.unsqueeze(1)).squeeze(1)
728 m_mycaffe.Cuda.mul(m_blobActions.count(), m_blobActions.gpu_data, q_values.gpu_data, m_blobQValue.mutable_gpu_data);
729 reduce_sum_axis1(m_blobQValue);
730
731 // next_q_value = next_q_values.max(1)[0]
732 m_blobNextQValue.CopyFrom(next_q_values, false, true);
733 reduce_argmax_axis1(m_blobNextQValue);
734
735 // expected_q_values
736 float[] rgRewards = rgMem.GetRewards();
737 m_blobExpectedQValue.ReshapeLike(m_blobQValue);
738 m_blobExpectedQValue.mutable_cpu_data = Utility.ConvertVec<T>(rgRewards);
739
740 float[] rgDone = rgMem.GetInvertedDoneAsOneHotVector();
741 m_blobDone.ReshapeLike(m_blobQValue);
742 m_blobDone.mutable_cpu_data = Utility.ConvertVec<T>(rgDone);
743
744 m_mycaffe.Cuda.mul(m_blobNextQValue.count(), m_blobNextQValue.gpu_data, m_blobDone.gpu_data, m_blobExpectedQValue.mutable_gpu_diff); // next_q_val * (1- done)
745 m_mycaffe.Cuda.mul_scalar(m_blobExpectedQValue.count(), m_fGamma, m_blobExpectedQValue.mutable_gpu_diff); // gamma * ^
746 m_mycaffe.Cuda.add(m_blobExpectedQValue.count(), m_blobExpectedQValue.gpu_diff, m_blobExpectedQValue.gpu_data, m_blobExpectedQValue.gpu_data); // reward + ^
747
748 // loss = (q_value - expected_q_value.detach()).pow(2)
749 m_blobLoss.ReshapeLike(m_blobQValue);
750 m_mycaffe.Cuda.sub(m_blobQValue.count(), m_blobQValue.gpu_data, m_blobExpectedQValue.gpu_data, m_blobQValue.mutable_gpu_diff); // q_value - expected_q_value
751 m_mycaffe.Cuda.powx(m_blobLoss.count(), m_blobQValue.gpu_diff, 2.0, m_blobLoss.mutable_gpu_data); // (q_value - expected_q_value)^2
752
753 // loss = (q_value - expected_q_value.detach()).pow(2) * weights
754 m_blobWeights.ReshapeLike(m_blobQValue);
755 m_blobWeights.mutable_cpu_data = Utility.ConvertVec<T>(m_rgSamples.Priorities); // weights
756 m_mycaffe.Cuda.mul(m_blobLoss.count(), m_blobLoss.gpu_data, m_blobWeights.gpu_data, m_blobLoss.mutable_gpu_data); // ^ * weights
757
758 // prios = loss + 1e-5
759 m_mycaffe.Cuda.copy(m_blobLoss.count(), m_blobLoss.gpu_data, m_blobLoss.mutable_gpu_diff);
760 m_mycaffe.Cuda.add_scalar(m_blobLoss.count(), 1e-5, m_blobLoss.mutable_gpu_diff);
761 double[] rgPrios = Utility.ConvertVec<T>(m_blobLoss.mutable_cpu_diff);
762
763 for (int i = 0; i < rgPrios.Length; i++)
764 {
765 m_rgSamples.Priorities[i] = rgPrios[i];
766 }
767
768
769 //-------------------------------------------------------
770 // Calculate the gradient - unroll the operations
771 // (autograd - psha! how about manualgrad :-D)
772 //-------------------------------------------------------
773
774 // initial gradient
775 double dfGradient = 1.0;
776 if (m_memLoss.layer_param.loss_weight.Count > 0)
777 dfGradient *= m_memLoss.layer_param.loss_weight[0];
778
779 // mean gradient - expand and divide by batch count
780 dfGradient /= m_blobLoss.count();
781 m_blobLoss.SetDiff(dfGradient);
782
783 // multiplication gradient - multiply by the other side.
784 m_mycaffe.Cuda.mul(m_blobLoss.count(), m_blobLoss.gpu_diff, m_blobWeights.gpu_data, m_blobLoss.mutable_gpu_diff);
785
786 // power gradient - multiply by the exponent.
787 m_mycaffe.Cuda.mul_scalar(m_blobLoss.count(), 2.0, m_blobLoss.mutable_gpu_diff);
788
789 // q_value - expected_q_value gradient
790 m_mycaffe.Cuda.mul(m_blobLoss.count(), m_blobLoss.gpu_diff, m_blobQValue.gpu_diff, m_blobLoss.mutable_gpu_diff);
791
792 // squeeze/gather gradient
793 mul(m_blobLoss, m_blobActions, e.Bottom[0]);
794
795 e.Loss = reduce_mean(m_blobLoss, false);
796 e.EnableLossUpdate = false;
797 }
798
799 private void resetNoise(Net<T> net)
800 {
801 foreach (Layer<T> layer in net.layers)
802 {
803 if (layer.type == LayerParameter.LayerType.INNERPRODUCT)
804 {
806 ((InnerProductLayer<T>)layer).ResetNoise();
807 }
808 }
809 }
810
811 private void mul(Blob<T> val, Blob<T> actions, Blob<T> result)
812 {
813 float[] rgVal = Utility.ConvertVecF<T>(val.mutable_cpu_diff);
814 float[] rgActions = Utility.ConvertVecF<T>(actions.mutable_cpu_data);
815 float[] rgResult = new float[rgActions.Length];
816
817 for (int i = 0; i < actions.num; i++)
818 {
819 float fPred = rgVal[i];
820
821 for (int j = 0; j < actions.channels; j++)
822 {
823 int nIdx = (i * actions.channels) + j;
824 rgResult[nIdx] = rgActions[nIdx] * fPred;
825 }
826 }
827
828 result.mutable_cpu_diff = Utility.ConvertVec<T>(rgResult);
829 }
830
831 private float reduce_mean(Blob<T> b, bool bDiff)
832 {
833 float[] rg = Utility.ConvertVecF<T>((bDiff) ? b.mutable_cpu_diff : b.mutable_cpu_data);
834 float fSum = rg.Sum(p => p);
835 return fSum / rg.Length;
836 }
837
838 private void reduce_sum_axis1(Blob<T> b)
839 {
840 int nNum = b.shape(0);
841 int nActions = b.shape(1);
842 int nInnerCount = b.count(2);
843 float[] rg = Utility.ConvertVecF<T>(b.mutable_cpu_data);
844 float[] rgSum = new float[nNum * nInnerCount];
845
846 for (int i = 0; i < nNum; i++)
847 {
848 for (int j = 0; j < nInnerCount; j++)
849 {
850 float fSum = 0;
851
852 for (int k = 0; k < nActions; k++)
853 {
854 int nIdx = (i * nActions * nInnerCount) + (k * nInnerCount);
855 fSum += rg[nIdx + j];
856 }
857
858 int nIdxR = i * nInnerCount;
859 rgSum[nIdxR + j] = fSum;
860 }
861 }
862
863 b.Reshape(nNum, nInnerCount, 1, 1);
864 b.mutable_cpu_data = Utility.ConvertVec<T>(rgSum);
865 }
866
867 private void reduce_argmax_axis1(Blob<T> b)
868 {
869 int nNum = b.shape(0);
870 int nActions = b.shape(1);
871 int nInnerCount = b.count(2);
872 float[] rg = Utility.ConvertVecF<T>(b.mutable_cpu_data);
873 float[] rgMax = new float[nNum * nInnerCount];
874
875 for (int i = 0; i < nNum; i++)
876 {
877 for (int j = 0; j < nInnerCount; j++)
878 {
879 float fMax = -float.MaxValue;
880
881 for (int k = 0; k < nActions; k++)
882 {
883 int nIdx = (i * nActions * nInnerCount) + (k * nInnerCount);
884 fMax = Math.Max(fMax, rg[nIdx + j]);
885 }
886
887 int nIdxR = i * nInnerCount;
888 rgMax[nIdxR + j] = fMax;
889 }
890 }
891
892 b.Reshape(nNum, nInnerCount, 1, 1);
893 b.mutable_cpu_data = Utility.ConvertVec<T>(rgMax);
894 }
895
896 private int argmax(float[] rgProb, int nActionCount, int nSampleIdx)
897 {
898 float[] rgfProb = new float[nActionCount];
899
900 for (int j = 0; j < nActionCount; j++)
901 {
902 int nIdx = (nSampleIdx * nActionCount) + j;
903 rgfProb[j] = rgProb[nIdx];
904 }
905
906 return argmax(rgfProb);
907 }
908
909 private int argmax(float[] rgfAprob)
910 {
911 double fMax = -float.MaxValue;
912 int nIdx = 0;
913
914 for (int i = 0; i < rgfAprob.Length; i++)
915 {
916 if (rgfAprob[i] == fMax)
917 {
918 if (m_random.NextDouble() > 0.5)
919 nIdx = i;
920 }
921 else if (fMax < rgfAprob[i])
922 {
923 fMax = rgfAprob[i];
924 nIdx = i;
925 }
926 }
927
928 return nIdx;
929 }
930
931 private void setData(Net<T> net, SimpleDatum sdData, SimpleDatum sdClip)
932 {
933 SimpleDatum[] rgData = new SimpleDatum[] { sdData };
934 SimpleDatum[] rgClip = null;
935
936 if (sdClip != null)
937 rgClip = new SimpleDatum[] { sdClip };
938
939 setData(net, rgData, rgClip);
940 }
941
942 private void setCurrentStateData(Net<T> net, MemoryCollection rgSamples)
943 {
944 List<SimpleDatum> rgData0 = rgSamples.GetCurrentStateData();
945 List<SimpleDatum> rgClip0 = rgSamples.GetCurrentStateClip();
946
947 SimpleDatum[] rgData = rgData0.ToArray();
948 SimpleDatum[] rgClip = (rgClip0 != null) ? rgClip0.ToArray() : null;
949
950 setData(net, rgData, rgClip);
951 }
952
953 private void setNextStateData(Net<T> net, MemoryCollection rgSamples)
954 {
955 List<SimpleDatum> rgData1 = rgSamples.GetNextStateData();
956 List<SimpleDatum> rgClip1 = rgSamples.GetNextStateClip();
957
958 SimpleDatum[] rgData = rgData1.ToArray();
959 SimpleDatum[] rgClip = (rgClip1 != null) ? rgClip1.ToArray() : null;
960
961 setData(net, rgData, rgClip);
962 }
963
964 private void setData(Net<T> net, SimpleDatum[] rgData, SimpleDatum[] rgClip)
965 {
966 Blob<T> data = net.blob_by_name("data");
967
968 data.Reshape(rgData.Length, data.channels, data.height, data.width);
969 m_transformer.Transform(rgData, data, m_mycaffe.Cuda, m_mycaffe.Log);
970
971 if (rgClip != null)
972 {
973 Blob<T> clip = net.blob_by_name("clip");
974
975 if (clip != null)
976 {
977 clip.Reshape(rgClip.Length, rgClip[0].Channels, rgClip[0].Height, rgClip[0].Width);
978 m_transformer.Transform(rgClip, clip, m_mycaffe.Cuda, m_mycaffe.Log, true);
979 }
980 }
981 }
982
987 public void OnOverlay(OverlayArgs e)
988 {
989 Blob<T> logits = m_netOutput.blob_by_name("logits");
990 if (logits == null)
991 return;
992
993 if (logits.num == 1)
994 m_rgOverlay = Utility.ConvertVecF<T>(logits.mutable_cpu_data);
995
996 if (m_rgOverlay == null)
997 return;
998
999 using (Graphics g = Graphics.FromImage(e.DisplayImage))
1000 {
1001 int nBorder = 30;
1002 int nWid = e.DisplayImage.Width - (nBorder * 2);
1003 int nWid1 = nWid / m_rgOverlay.Length;
1004 int nHt1 = (int)(e.DisplayImage.Height * 0.3);
1005 int nX = nBorder;
1006 int nY = e.DisplayImage.Height - nHt1;
1007 ColorMapper clrMap = new ColorMapper(0, m_rgOverlay.Length + 1, Color.Black, Color.Red);
1008 float fMax = -float.MaxValue;
1009 int nMaxIdx = 0;
1010 float fMin1 = m_rgOverlay.Min(p => p);
1011 float fMax1 = m_rgOverlay.Max(p => p);
1012
1013 for (int i=0; i<m_rgOverlay.Length; i++)
1014 {
1015 if (fMin1 < 0 || fMax1 > 1)
1016 m_rgOverlay[i] = (m_rgOverlay[i] - fMin1) / (fMax1 - fMin1);
1017
1018 if (m_rgOverlay[i] > fMax)
1019 {
1020 fMax = m_rgOverlay[i];
1021 nMaxIdx = i;
1022 }
1023 }
1024
1025 for (int i = 0; i < m_rgOverlay.Length; i++)
1026 {
1027 drawProbabilities(g, nX, nY, nWid1, nHt1, i, m_rgOverlay[i], fMin1, fMax1, clrMap.GetColor(i + 1), (i == nMaxIdx) ? true : false);
1028 nX += nWid1;
1029 }
1030 }
1031 }
1032
1033 private void drawProbabilities(Graphics g, int nX, int nY, int nWid, int nHt, int nAction, float fProb, float fMin, float fMax, Color clr, bool bMax)
1034 {
1035 string str = "";
1036
1037 if (m_font == null)
1038 m_font = new Font("Century Gothic", 9.0f);
1039
1040 if (!m_rgStyle.ContainsKey(clr))
1041 {
1042 Color clr1 = Color.FromArgb(128, clr);
1043 Brush br1 = new SolidBrush(clr1);
1044 Color clr2 = Color.FromArgb(64, clr);
1045 Pen pen = new Pen(clr2, 1.0f);
1046 Brush br2 = new SolidBrush(clr2);
1047 Brush brBright = new SolidBrush(clr);
1048 m_rgStyle.Add(clr, new Tuple<Brush, Brush, Pen, Brush>(br1, br2, pen, brBright));
1049 }
1050
1051 Brush brBack = m_rgStyle[clr].Item1;
1052 Brush brFront = m_rgStyle[clr].Item2;
1053 Brush brTop = m_rgStyle[clr].Item4;
1054 Pen penLine = m_rgStyle[clr].Item3;
1055
1056 if (fMin != 0 || fMax != 0)
1057 {
1058 str = "Action " + nAction.ToString() + " (" + fProb.ToString("N7") + ")";
1059 }
1060 else
1061 {
1062 str = "Action " + nAction.ToString() + " - No Probabilities";
1063 }
1064
1065 SizeF sz = g.MeasureString(str, m_font);
1066
1067 int nY1 = (int)(nY + (nHt - sz.Height));
1068 int nX1 = (int)(nX + (nWid / 2) - (sz.Width / 2));
1069 g.DrawString(str, m_font, (bMax) ? brTop : brFront, new Point(nX1, nY1));
1070
1071 if (fMin != 0 || fMax != 0)
1072 {
1073 float fX = nX;
1074 float fWid = nWid ;
1075 nHt -= (int)sz.Height;
1076
1077 float fHt = nHt * fProb;
1078 float fHt1 = nHt - fHt;
1079 RectangleF rc1 = new RectangleF(fX, nY + fHt1, fWid, fHt);
1080 g.FillRectangle(brBack, rc1);
1081 g.DrawRectangle(penLine, rc1.X, rc1.Y, rc1.Width, rc1.Height);
1082 }
1083 }
1084
1089 public void SaveWeights(string strFile)
1090 {
1091 if (File.Exists(strFile))
1092 File.Delete(strFile);
1093
1094 using (StreamWriter sw = new StreamWriter(strFile))
1095 {
1096 save(sw, m_netOutput.layer_by_name("linear") as InnerProductLayer<T>);
1097 save(sw, m_netOutput.layer_by_name("noisy1") as InnerProductLayer<T>);
1098 save(sw, m_netOutput.layer_by_name("noisy2") as InnerProductLayer<T>);
1099 save(sw, m_netTarget.layer_by_name("linear") as InnerProductLayer<T>);
1100 save(sw, m_netTarget.layer_by_name("noisy1") as InnerProductLayer<T>);
1101 save(sw, m_netTarget.layer_by_name("noisy2") as InnerProductLayer<T>);
1102 }
1103 }
1104
1105 private void save(StreamWriter sw, InnerProductLayer<T> layer)
1106 {
1107 for (int i = 0; i < layer.blobs.Count; i++)
1108 {
1109 float[] rgf = Utility.ConvertVecF<T>(layer.blobs[i].mutable_cpu_data);
1110 string strLine = "";
1111
1112 for (int j = 0; j < rgf.Length; j++)
1113 {
1114 strLine += rgf[j].ToString() + ",";
1115 }
1116
1117 sw.WriteLine(strLine.TrimEnd(','));
1118 }
1119
1120 for (int i = 0; i < layer.internal_blobs.Count; i++)
1121 {
1122 float[] rgf = Utility.ConvertVecF<T>(layer.internal_blobs[i].mutable_cpu_data);
1123 string strLine = "";
1124
1125 for (int j = 0; j < rgf.Length; j++)
1126 {
1127 strLine += rgf[j].ToString() + ",";
1128 }
1129
1130 sw.WriteLine(strLine.TrimEnd(','));
1131 }
1132 }
1133
1138 public void LoadWeights(string strFile)
1139 {
1140 if (!File.Exists(strFile))
1141 return;
1142
1143 using (StreamReader sr = new StreamReader(strFile))
1144 {
1145 load(sr, m_netOutput.layer_by_name("linear") as InnerProductLayer<T>);
1146 load(sr, m_netOutput.layer_by_name("noisy1") as InnerProductLayer<T>);
1147 load(sr, m_netOutput.layer_by_name("noisy2") as InnerProductLayer<T>);
1148 load(sr, m_netTarget.layer_by_name("linear") as InnerProductLayer<T>);
1149 load(sr, m_netTarget.layer_by_name("noisy1") as InnerProductLayer<T>);
1150 load(sr, m_netTarget.layer_by_name("noisy2") as InnerProductLayer<T>);
1151 }
1152 }
1153
1154 private void load(StreamReader sr, InnerProductLayer<T> layer)
1155 {
1156 for (int i = 0; i < layer.blobs.Count; i++)
1157 {
1158 List<float> rgf = new List<float>();
1159 string strLine = sr.ReadLine();
1160 string[] rgstr = strLine.Split(',');
1161
1162 for (int j = 0; j < rgstr.Length; j++)
1163 {
1164 rgf.Add(BaseParameter.ParseFloat(rgstr[j]));
1165 }
1166
1167 layer.blobs[i].mutable_cpu_data = Utility.ConvertVec<T>(rgf.ToArray());
1168 }
1169
1170 for (int i = 0; i < layer.internal_blobs.Count; i++)
1171 {
1172 List<float> rgf = new List<float>();
1173 string strLine = sr.ReadLine();
1174 string[] rgstr = strLine.Split(',');
1175
1176 for (int j = 0; j < rgstr.Length; j++)
1177 {
1178 rgf.Add(BaseParameter.ParseFloat(rgstr[j]));
1179 }
1180
1181 layer.internal_blobs[i].mutable_cpu_data = Utility.ConvertVec<T>(rgf.ToArray());
1182 }
1183 }
1184 }
1185}
The MyCaffeControl is the main object used to manage all training, testing and running of the MyCaffe...
CancelEvent CancelEvent
Returns the CancelEvent used.
Net< T > GetInternalNet(Phase phase=Phase.RUN)
Returns the internal net based on the Phase specified: TRAIN, TEST or RUN.
Solver< T > GetInternalSolver()
Get the internal solver.
ProjectEx CurrentProject
Returns the name of the currently loaded project.
The BaseParameter class is the base class for all other parameter classes.
static float ParseFloat(string strVal)
Parse float values using the US culture if the decimal separator = '.', then using the native culture...
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
Definition: CancelEvent.cs:17
void Reset()
Resets the event clearing any signaled state.
Definition: CancelEvent.cs:279
bool WaitOne(int nMs=int.MaxValue)
Waits for the signal state to occur.
Definition: CancelEvent.cs:290
CancelEvent()
The CancelEvent constructor.
Definition: CancelEvent.cs:28
void Set()
Sets the event to the signaled state.
Definition: CancelEvent.cs:270
The ColorMapper maps a value within a number range, to a Color within a color scheme.
Definition: ColorMapper.cs:14
Color GetColor(double dfVal)
Find the color using a binary search algorithm.
Definition: ColorMapper.cs:350
The CryptoRandom is a random number generator that can use either the standard .Net Random objec or t...
Definition: CryptoRandom.cs:14
double NextDouble()
Returns a random double within the range .
Definition: CryptoRandom.cs:83
The Log class provides general output in text form.
Definition: Log.cs:13
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
Log(string strSrc)
The Log constructor.
Definition: Log.cs:33
double? GetSolverSettingAsNumeric(string strParam)
Get a setting from the solver descriptor as a double value.
Definition: ProjectEx.cs:470
Specifies a key-value pair of properties.
Definition: PropertySet.cs:16
int GetPropertyAsInt(string strName, int nDefault=0)
Returns a property as an integer value.
Definition: PropertySet.cs:287
bool GetPropertyAsBool(string strName, bool bDefault=false)
Returns a property as a boolean value.
Definition: PropertySet.cs:267
double GetPropertyAsDouble(string strName, double dfDefault=0)
Returns a property as an double value.
Definition: PropertySet.cs:307
override string ToString()
Returns the string representation of the properties.
Definition: PropertySet.cs:325
The SimpleDatum class holds a data input within host memory.
Definition: SimpleDatum.cs:161
float GetDataAtF(int nIdx)
Returns the item at a specified index in the float type.
bool Sub(SimpleDatum sd, bool bSetNegativeToZero=false)
Subtract the data of another SimpleDatum from this one, so this = this - sd.
void Zero()
Zero out all data in the datum but keep the size and other settings.
DateTime TimeStamp
Get/set the Timestamp.
object Tag
Specifies user data associated with the SimpleDatum.
Definition: SimpleDatum.cs:901
int Channels
Return the number of channels of the data.
int Index
Returns the index of the SimpleDatum.
The Utility class provides general utility funtions.
Definition: Utility.cs:35
static double[] ConvertVec(float[] rgf)
Convert an array of float to an array of generics.
Definition: Utility.cs:550
The BlobCollection contains a list of Blobs.
void Dispose()
Release all resource used by the collection and its Blobs.
void Accumulate(CudaDnn< T > cuda, BlobCollection< T > src, bool bAccumulateDiff)
Accumulate the diffs from one BlobCollection into another.
void SetDiff(double df)
Set all blob diff to the value specified.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
Definition: Blob.cs:800
int height
DEPRECIATED; legacy shape accessor height: use shape(2) instead.
Definition: Blob.cs:808
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555
T[] mutable_cpu_diff
Get diff from the GPU and bring it over to the host, or Set diff from the Host and send it over to th...
Definition: Blob.cs:1511
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487
T[] mutable_cpu_data
Get data from the GPU and bring it over to the host, or Set data from the Host and send it over to th...
Definition: Blob.cs:1461
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
Definition: Blob.cs:903
int width
DEPRECIATED; legacy shape accessor width: use shape(3) instead.
Definition: Blob.cs:816
List< int > shape()
Returns an array where each element contains the shape of an axis of the Blob.
Definition: Blob.cs:684
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1541
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
Definition: Blob.cs:1981
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
Definition: Blob.cs:792
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
Connects Layer's together into a direct acrylic graph (DAG) specified by a NetParameter
Definition: Net.cs:23
List< Layer< T > > layers
Returns the layers.
Definition: Net.cs:2003
double ForwardFromTo(int nStart=0, int nEnd=int.MaxValue)
The FromTo variant of forward and backward operate on the (topological) ordering by which the net is ...
Definition: Net.cs:1402
void CopyInternalBlobsTo(Net< T > dstNet)
Copy the internal blobs from one net to another.
Definition: Net.cs:1699
void CopyTrainedLayersTo(Net< T > dstNet)
Copies the trained layer of this Net to another Net.
Definition: Net.cs:1714
Layer< T > FindLastLayer(LayerParameter.LayerType type)
Find the last layer with the matching type.
Definition: Net.cs:2806
Layer< T > layer_by_name(string strLayer, bool bThrowExceptionOnError=true)
Returns a Layer given its name.
Definition: Net.cs:2292
virtual void Dispose(bool bDisposing)
Releases all resources (GPU and Host) used by the Net.
Definition: Net.cs:184
void ClearParamDiffs()
Zero out the diffs of all netw parameters. This should be run before Backward.
Definition: Net.cs:1907
BlobCollection< T > learnable_parameters
Returns the learnable parameters.
Definition: Net.cs:2117
NetParameter net_param
Returns the net parameter.
Definition: Net.cs:1857
Blob< T > blob_by_name(string strName, bool bThrowExceptionOnError=true)
Returns a blob given its name.
Definition: Net.cs:2245
The ResultCollection contains the result of a given CaffeControl::Run.
Applies common transformations to the input data, such as scaling, mirroring, subtracting the image m...
DataTransformer(CudaDnn< T > cuda, Log log, TransformationParameter p, Phase phase, int nC, int nH, int nW, SimpleDatum imgMean=null)
The DataTransformer constructor.
void Transform(List< Datum > rgDatum, Blob< T > blobTransformed, CudaDnn< T > cuda, Log log)
Transforms a list of Datum and places the transformed data into a Blob.
The InnerProductLayer, also know as a 'fully-connected' layer, computes the inner product with a set ...
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
LayerParameter.LayerType type
Returns the LayerType of this Layer.
Definition: Layer.cs:927
LayerParameter layer_param
Returns the LayerParameter for this Layer.
Definition: Layer.cs:899
BlobCollection< T > blobs
Returns the collection of learnable parameter Blobs for the Layer.
Definition: Layer.cs:875
BlobCollection< T > internal_blobs
Returns the collection of internal Blobs used by the Layer.
Definition: Layer.cs:883
The MemoryLossLayerGetLossArgs class is passed to the OnGetLoss event.
bool EnableLossUpdate
Get/set enabling the loss update within the backpropagation pass.
double Loss
Get/set the externally calculated total loss.
BlobCollection< T > Bottom
Specifies the bottom passed in during the forward pass.
The MemoryLossLayer provides a method of performing a custom loss functionality. Similar to the Memor...
EventHandler< MemoryLossLayerGetLossArgs< T > > OnGetLoss
The OnGetLoss event fires during each forward pass. The value returned is saved, and applied on the b...
bool enable_noise
Enable/disable noise in the inner-product layer (default = false).
Specifies the base parameter for all layers.
List< double > loss_weight
Specifies the loss weight.
InnerProductParameter inner_product_param
Returns the parameter set when initialized with LayerType.INNERPRODUCT
LayerType
Specifies the layer type.
Stores parameters used to apply transformation to the data layer's data.
An interface for classes that perform optimization on Nets - this class serves as the base class for ...
Definition: Solver.cs:28
bool Step(int nIters, TRAIN_STEP step=TRAIN_STEP.NONE, bool bZeroDiffs=true, bool bApplyUpdates=true, bool bDisableOutput=false, bool bDisableProgress=false, double? dfLossOverride=null, bool? bAllowSnapshot=null)
Steps a set of iterations through a training cycle.
Definition: Solver.cs:818
abstract double ApplyUpdate(int nIterationOverride=-1)
Make and apply the update value for the current iteration.
The ConvertOutputArgs is passed to the OnConvertOutput event.
Definition: EventArgs.cs:311
byte[] RawOutput
Specifies the raw output byte stream.
Definition: EventArgs.cs:356
string RawType
Specifies the type of the raw output byte stream.
Definition: EventArgs.cs:348
The GetDataArgs is passed to the OnGetData event to retrieve data.
Definition: EventArgs.cs:402
StateBase State
Specifies the state data of the observations.
Definition: EventArgs.cs:517
The GetStatusArgs is passed to the OnGetStatus event.
Definition: EventArgs.cs:166
The InitializeArgs is passed to the OnInitialize event.
Definition: EventArgs.cs:90
The OverlayArgs is passed ot the OnOverlay event, optionally fired just before displaying a gym image...
Definition: EventArgs.cs:376
Bitmap DisplayImage
Get/set the display image.
Definition: EventArgs.cs:392
The StateBase is the base class for the state of each observation - this is defined by actual trainer...
Definition: StateBase.cs:16
bool Done
Get/set whether the state is done or not.
Definition: StateBase.cs:72
double Reward
Get/set the reward of the state.
Definition: StateBase.cs:63
SimpleDatum Data
Returns other data associated with the state.
Definition: StateBase.cs:98
int ActionCount
Returns the number of actions.
Definition: StateBase.cs:90
SimpleDatum Clip
Returns the clip data assoicated with the state.
Definition: StateBase.cs:116
The WaitArgs is passed to the OnWait event.
Definition: EventArgs.cs:65
The MemoryCollectionFactory is used to create various memory collection types.
static IMemoryCollection CreateMemory(MEMTYPE type, int nMax, float fAlpha=0, string strFile=null)
CreateMemory creates the memory collection type based on the MEMTYPE parameter.
The memory collection stores a set of memory items.
float[] GetInvertedDoneAsOneHotVector()
Returns the inverted done (1 - done) values as a one-hot vector.
List< SimpleDatum > GetNextStateClip()
Returns the list of clip items associated with the next state.
double[] Priorities
Get/set the priorities associated with the collection (if any).
List< SimpleDatum > GetCurrentStateData()
Returns the list of data items associated with the current state.
float[] GetActionsAsOneHotVector(int nActionCount)
Returns the action items as a set of one-hot vectors.
List< SimpleDatum > GetCurrentStateClip()
Returns the list of clip items associated with the current state.
float[] GetRewards()
Returns the rewards as a vector.
List< SimpleDatum > GetNextStateData()
Returns the list of data items associated with the next state.
The MemoryItem stores the information about a given cycle.
The Brain uses the instance of MyCaffe (e.g. the open project) to run new actions and train the netwo...
Brain(MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, Phase phase)
The constructor.
void UpdateTargetModel()
The UpdateTargetModel transfers the trained layers from the active Net to the target Net.
int BatchSize
Returns the batch size defined by the model.
void SaveWeights(string strFile)
Save the weight and bias values to file.
CancelEvent Cancel
Returns the Cancel event used to cancel all MyCaffe tasks.
void OnOverlay(OverlayArgs e)
The OnOverlay callback is called just before displaying the gym image, thus allowing for an overlay t...
void Train(int nIteration, MemoryCollection rgSamples, int nActionCount)
Train the model at the current iteration.
GetDataArgs getDataArgs(Phase phase, int nAction)
Returns the GetDataArgs used to retrieve new data from the envrionment implemented by derived parent ...
bool GetModelUpdated()
Get whether or not the model has been udpated or not.
int act(SimpleDatum sd, SimpleDatum sdClip, int nActionCount)
Returns the action from running the model. The action returned is either randomly selected (when usin...
SimpleDatum Preprocess(StateBase s, bool bUseRawInput, out bool bDifferent, bool bReset=false)
Preprocesses the data.
void Dispose()
Release all resources used by the Brain.
void LoadWeights(string strFile)
Load the weight and bias values from file.
The DqnAgent both builds episodes from the envrionment and trains on them using the Brain.
void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step)
The Run method provides the main loop that performs the following steps: 1.) get state 2....
void Dispose()
Release all resources used.
DqnAgent(IxTrainerCallback icallback, MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, Phase phase)
The constructor.
byte[] Run(int nIterations, out string type)
Run the action on a set number of iterations and return the results with no training.
The TrainerNoisyDqn implements the Noisy-DQN algorithm as described by Google Dopamine DNQAgent,...
bool Test(int nN, ITERATOR_TYPE type)
Run the test cycle - currently this is not implemented.
ResultCollection RunOne(int nDelay=1000)
Run a single cycle on the environment after the delay.
TrainerNoisyDqn(MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, IxTrainerCallback icallback)
The constructor.
bool Train(int nN, ITERATOR_TYPE type, TRAIN_STEP step)
Train the network using a modified PG training algorithm optimized for GPU use.
void Dispose()
Release all resources used.
bool Shutdown(int nWait)
Shutdown the trainer.
byte[] Run(int nN, PropertySet runProp, out string type)
Run a set of iterations and return the resuts.
The IxTrainerCallback provides functions used by each trainer to 'call-back' to the parent for inform...
Definition: Interfaces.cs:303
The IxTrainerCallbackRNN provides functions used by each trainer to 'call-back' to the parent for inf...
Definition: Interfaces.cs:348
void OnConvertOutput(ConvertOutputArgs e)
The OnConvertOutput callback fires from within the Run method and is used to convert the network's ou...
The IxTrainerGetDataCallback interface is called right after rendering the output image and just befo...
Definition: Interfaces.cs:335
The IxTrainerRL interface is implemented by each RL Trainer.
Definition: Interfaces.cs:257
The IMemoryCollection interface is implemented by all memory collection types.
Definition: Interfaces.cs:37
void Update(MemoryCollection rgSamples)
Updates the memory collection - currently only used by the Prioritized memory collection to update it...
int Count
Returns the number of items in the memory collection.
Definition: Interfaces.cs:59
void CleanUp()
Performs final clean-up tasks.
void Add(MemoryItem m)
Add a new item to the memory collection.
MemoryCollection GetSamples(CryptoRandom random, int nCount, double dfBeta)
Retrieve a set of samples from the collection.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
Phase
Defines the Phase under which to run a Net.
Definition: Interfaces.cs:61
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
TRAIN_STEP
Defines the training stepping method (if any).
Definition: Interfaces.cs:131
The MyCaffe.data namespace contains dataset creators used to create common testing datasets such as M...
Definition: BinaryFile.cs:16
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe.solvers namespace contains all solver classes, including the base Solver.
MEMTYPE
Specifies the type of memory collection to use.
Definition: Interfaces.cs:14
The MyCaffe.trainers namespace contains all reinforcement and recurrent learning trainers.
ITERATOR_TYPE
Specifies the iterator type to use.
Definition: Interfaces.cs:22
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12