MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
TrainerNoisyDqn.cs
1using MyCaffe.basecode;
2using MyCaffe.common;
3using MyCaffe.data;
4using MyCaffe.layers;
5using MyCaffe.param;
6using MyCaffe.solvers;
8using System;
9using System.Collections;
10using System.Collections.Generic;
11using System.Diagnostics;
12using System.Drawing;
13using System.Linq;
14using System.Text;
15using System.Threading.Tasks;
16
18{
33 public class TrainerNoisyDqn<T> : IxTrainerRL, IDisposable
34 {
35 IxTrainerCallback m_icallback;
36 CryptoRandom m_random = new CryptoRandom();
37 MyCaffeControl<T> m_mycaffe;
38 PropertySet m_properties;
39
47 public TrainerNoisyDqn(MyCaffeControl<T> mycaffe, PropertySet properties, CryptoRandom random, IxTrainerCallback icallback)
48 {
49 m_icallback = icallback;
50 m_mycaffe = mycaffe;
51 m_properties = properties;
52 m_random = random;
53 }
54
58 public void Dispose()
59 {
60 }
61
66 public bool Initialize()
67 {
68 m_mycaffe.CancelEvent.Reset();
69 m_icallback.OnInitialize(new InitializeArgs(m_mycaffe));
70 return true;
71 }
72
78 public bool Shutdown(int nWait)
79 {
80 if (m_mycaffe != null)
81 {
82 m_mycaffe.CancelEvent.Set();
83 wait(nWait);
84 }
85
86 m_icallback.OnShutdown();
87
88 return true;
89 }
90
91 private void wait(int nWait)
92 {
93 int nWaitInc = 250;
94 int nTotalWait = 0;
95
96 while (nTotalWait < nWait)
97 {
98 m_icallback.OnWait(new WaitArgs(nWaitInc));
99 nTotalWait += nWaitInc;
100 }
101 }
102
108 public ResultCollection RunOne(int nDelay = 1000)
109 {
110 m_mycaffe.CancelEvent.Reset();
111 DqnAgent<T> agent = new DqnAgent<T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.TRAIN);
112 agent.Run(Phase.TEST, 1, ITERATOR_TYPE.ITERATION, TRAIN_STEP.NONE);
113 agent.Dispose();
114 return null;
115 }
116
124 public byte[] Run(int nN, PropertySet runProp, out string type)
125 {
126 m_mycaffe.CancelEvent.Reset();
127 DqnAgent<T> agent = new DqnAgent<T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.RUN);
128 byte[] rgResults = agent.Run(nN, out type);
129 agent.Dispose();
130
131 return rgResults;
132 }
133
140 public bool Test(int nN, ITERATOR_TYPE type)
141 {
142 int nDelay = 1000;
143 string strProp = m_properties.ToString();
144
145 // Turn off the num-skip to run at normal speed.
146 strProp += "EnableNumSkip=False;";
147 PropertySet properties = new PropertySet(strProp);
148
149 m_mycaffe.CancelEvent.Reset();
150 DqnAgent<T> agent = new DqnAgent<T>(m_icallback, m_mycaffe, properties, m_random, Phase.TRAIN);
151 agent.Run(Phase.TEST, nN, type, TRAIN_STEP.NONE);
152
153 agent.Dispose();
154 Shutdown(nDelay);
155
156 return true;
157 }
158
166 public bool Train(int nN, ITERATOR_TYPE type, TRAIN_STEP step)
167 {
168 m_mycaffe.CancelEvent.Reset();
169 DqnAgent<T> agent = new DqnAgent<T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.TRAIN);
170 agent.Run(Phase.TRAIN, nN, type, step);
171 agent.Dispose();
172
173 return false;
174 }
175 }
176
177
182 class DqnAgent<T> : IDisposable
183 {
184 IxTrainerCallback m_icallback;
185 Brain<T> m_brain;
186 PropertySet m_properties;
187 CryptoRandom m_random;
188 float m_fGamma = 0.95f;
189 bool m_bUseRawInput = true;
190 int m_nMaxMemory = 10000;
191 int m_nTrainingUpdateFreq = 1000;
192 int m_nExplorationNum = 50000;
193 int m_nEpsSteps = 0;
194 double m_dfEpsStart = 0;
195 double m_dfEpsEnd = 0;
196 double m_dfEpsDelta = 0;
197 double m_dfExplorationRate = 0;
198 STATE m_state = STATE.EXPLORING;
199 double m_dfBetaStart = 0.4;
200 int m_nBetaFrames = 1000;
201 int m_nMemorySize = 10000;
202 float m_fPriorityAlpha = 0.6f;
203 MEMTYPE m_memType = MEMTYPE.PRIORITY;
204
205 enum STATE
206 {
207 EXPLORING,
208 TRAINING
209 }
210
211
220 public DqnAgent(IxTrainerCallback icallback, MyCaffeControl<T> mycaffe, PropertySet properties, CryptoRandom random, Phase phase)
221 {
222 m_icallback = icallback;
223 m_brain = new Brain<T>(mycaffe, properties, random, phase);
224 m_properties = properties;
225 m_random = random;
226
227 m_fGamma = (float)properties.GetPropertyAsDouble("Gamma", m_fGamma);
228 m_bUseRawInput = properties.GetPropertyAsBool("UseRawInput", m_bUseRawInput);
229 m_nMaxMemory = properties.GetPropertyAsInt("MaxMemory", m_nMaxMemory);
230 m_nTrainingUpdateFreq = properties.GetPropertyAsInt("TrainingUpdateFreq", m_nTrainingUpdateFreq);
231 m_nExplorationNum = properties.GetPropertyAsInt("ExplorationNum", m_nExplorationNum);
232 m_nEpsSteps = properties.GetPropertyAsInt("EpsSteps", m_nEpsSteps);
233 m_dfEpsStart = properties.GetPropertyAsDouble("EpsStart", m_dfEpsStart);
234 m_dfEpsEnd = properties.GetPropertyAsDouble("EpsEnd", m_dfEpsEnd);
235 m_dfEpsDelta = (m_dfEpsStart - m_dfEpsEnd) / m_nEpsSteps;
236 m_dfExplorationRate = m_dfEpsStart;
237
238 if (m_dfEpsStart < 0 || m_dfEpsStart > 1)
239 throw new Exception("The 'EpsStart' is out of range - please specify a real number in the range [0,1]");
240
241 if (m_dfEpsEnd < 0 || m_dfEpsEnd > 1)
242 throw new Exception("The 'EpsEnd' is out of range - please specify a real number in the range [0,1]");
243
244 if (m_dfEpsEnd > m_dfEpsStart)
245 throw new Exception("The 'EpsEnd' must be less than the 'EpsStart' value.");
246 }
247
251 public void Dispose()
252 {
253 if (m_brain != null)
254 {
255 m_brain.Dispose();
256 m_brain = null;
257 }
258 }
259
260 private StateBase getData(Phase phase, int nAction, int nIdx)
261 {
262 GetDataArgs args = m_brain.getDataArgs(phase, nAction);
263 m_icallback.OnGetData(args);
264 args.State.Data.Index = nIdx;
265 return args.State;
266 }
267
268
269 private int getAction(int nIteration, SimpleDatum sd, SimpleDatum sdClip, int nActionCount, TRAIN_STEP step)
270 {
271 if (step == TRAIN_STEP.NONE)
272 {
273 switch (m_state)
274 {
275 case STATE.EXPLORING:
276 return m_random.Next(nActionCount);
277
278 case STATE.TRAINING:
279 if (m_dfExplorationRate > m_dfEpsEnd)
280 m_dfExplorationRate -= m_dfEpsDelta;
281
282 if (m_random.NextDouble() < m_dfExplorationRate)
283 return m_random.Next(nActionCount);
284 break;
285 }
286 }
287
288 return m_brain.act(sd, sdClip, nActionCount);
289 }
290
291 private void updateStatus(int nIteration, int nEpisodeCount, double dfRewardSum, double dfRunningReward, double dfLoss, double dfLearningRate, bool bModelUpdated)
292 {
293 GetStatusArgs args = new GetStatusArgs(0, nIteration, nEpisodeCount, 1000000, dfRunningReward, dfRewardSum, m_dfExplorationRate, 0, dfLoss, dfLearningRate, bModelUpdated);
294 m_icallback.OnUpdateStatus(args);
295 }
296
303 public byte[] Run(int nIterations, out string type)
304 {
305 IxTrainerCallbackRNN icallback = m_icallback as IxTrainerCallbackRNN;
306 if (icallback == null)
307 throw new Exception("The Run method requires an IxTrainerCallbackRNN interface to convert the results into the native format!");
308
309 StateBase s = getData(Phase.RUN, -1, 0);
310 int nIteration = 0;
311 List<float> rgResults = new List<float>();
312 bool bDifferent;
313
314 while (!m_brain.Cancel.WaitOne(0) && (nIterations == -1 || nIteration < nIterations))
315 {
316 // Preprocess the observation.
317 SimpleDatum x = m_brain.Preprocess(s, m_bUseRawInput, out bDifferent);
318
319 // Forward the policy network and sample an action.
320 int action = m_brain.act(x, s.Clip, s.ActionCount);
321
322 rgResults.Add(s.Data.TimeStamp.ToFileTime());
323 rgResults.Add(s.Data.GetDataAtF(0));
324 rgResults.Add(action);
325
326 nIteration++;
327
328 // Take the next step using the action
329 s = getData(Phase.RUN, action, nIteration);
330 }
331
332 ConvertOutputArgs args = new ConvertOutputArgs(nIterations, rgResults.ToArray());
333 icallback.OnConvertOutput(args);
334
335 type = args.RawType;
336 return args.RawOutput;
337 }
338
339 private bool isAtIteration(int nN, ITERATOR_TYPE type, int nIteration, int nEpisode)
340 {
341 if (nN == -1)
342 return false;
343
344 if (type == ITERATOR_TYPE.EPISODE)
345 {
346 if (nEpisode < nN)
347 return false;
348
349 return true;
350 }
351 else
352 {
353 if (nIteration < nN)
354 return false;
355
356 return true;
357 }
358 }
359
360 private double beta_by_frame(int nFrameIdx)
361 {
362 return Math.Min(1.0, m_dfBetaStart + nFrameIdx * (1.0 - m_dfBetaStart) / m_nBetaFrames);
363 }
364
365
377 public void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step)
378 {
379 IMemoryCollection iMemory = MemoryCollectionFactory.CreateMemory(m_memType, m_nMemorySize, m_fPriorityAlpha);
380 int nIteration = 1;
381 double dfRunningReward = 0;
382 double dfEpisodeReward = 0;
383 int nEpisode = 0;
384 bool bDifferent = false;
385
386 StateBase state = getData(phase, -1, -1);
387 // Preprocess the observation.
388 SimpleDatum x = m_brain.Preprocess(state, m_bUseRawInput, out bDifferent, true);
389
390 // Set the initial target model to the current model.
391 m_brain.UpdateTargetModel();
392
393 while (!m_brain.Cancel.WaitOne(0) && !isAtIteration(nN, type, nIteration, nEpisode))
394 {
395 if (nIteration > m_nExplorationNum && iMemory.Count > m_brain.BatchSize)
396 m_state = STATE.TRAINING;
397
398 // Forward the policy network and sample an action.
399 int action = getAction(nIteration, x, state.Clip, state.ActionCount, step);
400
401 // Take the next step using the action
402 StateBase state_next = getData(phase, action, nIteration);
403
404 // Preprocess the next observation.
405 SimpleDatum x_next = m_brain.Preprocess(state_next, m_bUseRawInput, out bDifferent);
406 if (!bDifferent)
407 m_brain.Log.WriteLine("WARNING: The current state is the same as the previous state!");
408
409 // Build up episode memory, using reward for taking the action.
410 iMemory.Add(new MemoryItem(state, x, action, state_next, x_next, state_next.Reward, state_next.Done, nIteration, nEpisode));
411 dfEpisodeReward += state_next.Reward;
412
413 // Do the training
414 if (m_state == STATE.TRAINING)
415 {
416 double dfBeta = beta_by_frame(nIteration + 1);
417 MemoryCollection rgSamples = iMemory.GetSamples(m_random, m_brain.BatchSize, dfBeta);
418 m_brain.Train(nIteration, rgSamples, state.ActionCount);
419 iMemory.Update(rgSamples);
420
421 if (nIteration % m_nTrainingUpdateFreq == 0)
422 m_brain.UpdateTargetModel();
423 }
424
425 if (state_next.Done)
426 {
427 // Update reward running
428 dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01;
429
430 nEpisode++;
431 updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward, 0, 0, m_brain.GetModelUpdated());
432
433 state = getData(phase, -1, -1);
434 x = m_brain.Preprocess(state, m_bUseRawInput, out bDifferent, true);
435 dfEpisodeReward = 0;
436 }
437 else
438 {
439 state = state_next;
440 x = x_next;
441 }
442
443 nIteration++;
444 }
445
446 iMemory.CleanUp();
447 }
448 }
449
454 class Brain<T> : IDisposable, IxTrainerGetDataCallback
455 {
456 MyCaffeControl<T> m_mycaffe;
457 Solver<T> m_solver;
458 Net<T> m_netOutput;
459 Net<T> m_netTarget;
460 PropertySet m_properties;
461 CryptoRandom m_random;
462 SimpleDatum m_sdLast = null;
463 DataTransformer<T> m_transformer;
464 MemoryLossLayer<T> m_memLoss;
465 Blob<T> m_blobActions = null;
466 Blob<T> m_blobQValue = null;
467 Blob<T> m_blobNextQValue = null;
468 Blob<T> m_blobExpectedQValue = null;
469 Blob<T> m_blobDone = null;
470 Blob<T> m_blobLoss = null;
471 Blob<T> m_blobWeights = null;
472 BlobCollection<T> m_colAccumulatedGradients = new BlobCollection<T>();
473 bool m_bUseAcceleratedTraining = false;
474 double m_dfLearningRate;
475 int m_nMiniBatch = 1;
476 float m_fGamma = 0.99f;
477 int m_nFramesPerX = 4;
478 int m_nStackPerX = 4;
479 int m_nBatchSize = 32;
480 MemoryCollection m_rgSamples;
481 int m_nActionCount = 3;
482 bool m_bModelUpdated = false;
483 Font m_font = null;
484 Dictionary<Color, Tuple<Brush, Brush, Pen, Brush>> m_rgStyle = new Dictionary<Color, Tuple<Brush, Brush, Pen, Brush>>();
485 List<SimpleDatum> m_rgX = new List<SimpleDatum>();
486 float[] m_rgOverlay = null;
487
488
496 public Brain(MyCaffeControl<T> mycaffe, PropertySet properties, CryptoRandom random, Phase phase)
497 {
498 m_mycaffe = mycaffe;
499 m_solver = mycaffe.GetInternalSolver();
500 m_netOutput = mycaffe.GetInternalNet(phase);
501 m_netTarget = new Net<T>(m_mycaffe.Cuda, m_mycaffe.Log, m_netOutput.net_param, m_mycaffe.CancelEvent, null, phase);
502 m_properties = properties;
503 m_random = random;
504
505 Blob<T> data = m_netOutput.blob_by_name("data");
506 if (data == null)
507 m_mycaffe.Log.FAIL("Missing the expected input 'data' blob!");
508
509 m_nFramesPerX = data.channels;
510 m_nBatchSize = data.num;
511
512 Blob<T> logits = m_netOutput.blob_by_name("logits");
513 if (logits == null)
514 m_mycaffe.Log.FAIL("Missing the expected input 'logits' blob!");
515
516 m_nActionCount = logits.channels;
517
518 m_transformer = m_mycaffe.DataTransformer;
519 if (m_transformer == null)
520 {
522 int nC = m_mycaffe.CurrentProject.Dataset.TrainingSource.Channels;
523 int nH = m_mycaffe.CurrentProject.Dataset.TrainingSource.Height;
524 int nW = m_mycaffe.CurrentProject.Dataset.TrainingSource.Width;
525 m_transformer = new DataTransformer<T>(m_mycaffe.Cuda, m_mycaffe.Log, trans_param, phase, nC, nH, nW);
526 }
527
528 for (int i = 0; i < m_nFramesPerX; i++)
529 {
530 m_transformer.param.mean_value.Add(255 / 2); // center each frame
531 }
532
533 m_transformer.param.scale = 1.0 / 255; // normalize
534 m_transformer.Update();
535
536 m_blobActions = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log, false);
537 m_blobQValue = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log);
538 m_blobNextQValue = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log);
539 m_blobExpectedQValue = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log);
540 m_blobDone = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log, false);
541 m_blobLoss = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log);
542 m_blobWeights = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log, false);
543
544 m_fGamma = (float)properties.GetPropertyAsDouble("Gamma", m_fGamma);
545
546 m_memLoss = m_netOutput.FindLastLayer(LayerParameter.LayerType.MEMORY_LOSS) as MemoryLossLayer<T>;
547 if (m_memLoss == null)
548 m_mycaffe.Log.FAIL("Missing the expected MEMORY_LOSS layer!");
549
550 double? dfRate = mycaffe.CurrentProject.GetSolverSettingAsNumeric("base_lr");
551 if (dfRate.HasValue)
552 m_dfLearningRate = dfRate.Value;
553
554 m_nMiniBatch = m_properties.GetPropertyAsInt("MiniBatch", m_nMiniBatch);
555 m_bUseAcceleratedTraining = properties.GetPropertyAsBool("UseAcceleratedTraining", false);
556
557 if (m_nMiniBatch > 1)
558 {
559 m_colAccumulatedGradients = m_netOutput.learnable_parameters.Clone();
560 m_colAccumulatedGradients.SetDiff(0);
561 }
562 }
563
564 private void dispose(ref Blob<T> b)
565 {
566 if (b != null)
567 {
568 b.Dispose();
569 b = null;
570 }
571 }
572
576 public void Dispose()
577 {
578 dispose(ref m_blobActions);
579 dispose(ref m_blobQValue);
580 dispose(ref m_blobNextQValue);
581 dispose(ref m_blobExpectedQValue);
582 dispose(ref m_blobDone);
583 dispose(ref m_blobLoss);
584 dispose(ref m_blobWeights);
585
586 if (m_colAccumulatedGradients != null)
587 {
588 m_colAccumulatedGradients.Dispose();
589 m_colAccumulatedGradients = null;
590 }
591
592 if (m_netTarget != null)
593 {
594 m_netTarget.Dispose();
595 m_netTarget = null;
596 }
597
598 if (m_font != null)
599 {
600 m_font.Dispose();
601 m_font = null;
602 }
603
604 foreach (KeyValuePair<Color, Tuple<Brush, Brush, Pen, Brush>> kv in m_rgStyle)
605 {
606 kv.Value.Item1.Dispose();
607 kv.Value.Item2.Dispose();
608 kv.Value.Item3.Dispose();
609 kv.Value.Item4.Dispose();
610 }
611
612 m_rgStyle.Clear();
613 }
614
621 public GetDataArgs getDataArgs(Phase phase, int nAction)
622 {
623 bool bReset = (nAction == -1) ? true : false;
624 return new GetDataArgs(phase, 0, m_mycaffe, m_mycaffe.Log, m_mycaffe.CancelEvent, bReset, nAction, true, false, false, this);
625 }
626
630 public int FrameStack
631 {
632 get { return m_nFramesPerX; }
633 }
634
638 public int BatchSize
639 {
640 get { return m_nBatchSize; }
641 }
642
646 public Log Log
647 {
648 get { return m_mycaffe.Log; }
649 }
650
655 {
656 get { return m_mycaffe.CancelEvent; }
657 }
658
667 public SimpleDatum Preprocess(StateBase s, bool bUseRawInput, out bool bDifferent, bool bReset = false)
668 {
669 bDifferent = false;
670
671 SimpleDatum sd = new SimpleDatum(s.Data, true);
672
673 if (!bUseRawInput)
674 {
675 if (bReset)
676 m_sdLast = null;
677
678 if (m_sdLast == null)
679 sd.Zero();
680 else
681 bDifferent = sd.Sub(m_sdLast);
682
683 m_sdLast = new SimpleDatum(s.Data, true);
684 }
685 else
686 {
687 bDifferent = true;
688 }
689
690 sd.Tag = bReset;
691
692 if (bReset)
693 {
694 m_rgX = new List<SimpleDatum>();
695
696 for (int i = 0; i < m_nFramesPerX * m_nStackPerX; i++)
697 {
698 m_rgX.Add(sd);
699 }
700 }
701 else
702 {
703 m_rgX.Add(sd);
704 m_rgX.RemoveAt(0);
705 }
706
707 SimpleDatum[] rgSd = new SimpleDatum[m_nStackPerX];
708
709 for (int i=0; i<m_nStackPerX; i++)
710 {
711 int nIdx = ((m_nStackPerX - i) * m_nFramesPerX) - 1;
712 rgSd[i] = m_rgX[nIdx];
713 }
714
715 return new SimpleDatum(rgSd.ToList(), true);
716 }
717
726 public int act(SimpleDatum sd, SimpleDatum sdClip, int nActionCount)
727 {
728 setData(m_netOutput, sd, sdClip);
729 m_netOutput.ForwardFromTo(0, m_netOutput.layers.Count - 2);
730
731 Blob<T> output = m_netOutput.blob_by_name("logits");
732 if (output == null)
733 throw new Exception("Missing expected 'logits' blob!");
734
735 // Choose greedy action
736 return argmax(Utility.ConvertVecF<T>(output.mutable_cpu_data));
737 }
738
743 public bool GetModelUpdated()
744 {
745 bool bModelUpdated = m_bModelUpdated;
746 m_bModelUpdated = false;
747 return bModelUpdated;
748 }
749
753 public void UpdateTargetModel()
754 {
755 m_mycaffe.Log.Enable = false;
756 m_netOutput.CopyTrainedLayersTo(m_netTarget);
757 m_netOutput.CopyInternalBlobsTo(m_netTarget);
758 m_mycaffe.Log.Enable = true;
759 m_bModelUpdated = true;
760 }
761
768 public void Train(int nIteration, MemoryCollection rgSamples, int nActionCount)
769 {
770 m_rgSamples = rgSamples;
771
772 if (m_nActionCount != nActionCount)
773 throw new Exception("The logit output of '" + m_nActionCount.ToString() + "' does not match the action count of '" + nActionCount.ToString() + "'!");
774
775 // Get next_q_values
776 m_mycaffe.Log.Enable = false;
777 setNextStateData(m_netTarget, rgSamples);
778 m_netTarget.ForwardFromTo(0, m_netTarget.layers.Count - 2);
779
780 setCurrentStateData(m_netOutput, rgSamples);
781 m_memLoss.OnGetLoss += m_memLoss_ComputeTdLoss;
782
783 if (m_nMiniBatch == 1)
784 {
785 m_solver.Step(1);
786 }
787 else
788 {
789 m_solver.Step(1, TRAIN_STEP.NONE, true, m_bUseAcceleratedTraining, true, true);
790 m_colAccumulatedGradients.Accumulate(m_mycaffe.Cuda, m_netOutput.learnable_parameters, true);
791
792 if (nIteration % m_nMiniBatch == 0)
793 {
794 m_netOutput.learnable_parameters.CopyFrom(m_colAccumulatedGradients, true);
795 m_colAccumulatedGradients.SetDiff(0);
796 m_dfLearningRate = m_solver.ApplyUpdate(nIteration);
797 m_netOutput.ClearParamDiffs();
798 }
799 }
800
801 m_memLoss.OnGetLoss -= m_memLoss_ComputeTdLoss;
802 m_mycaffe.Log.Enable = true;
803
804 resetNoise(m_netOutput);
805 resetNoise(m_netTarget);
806 }
807
813 private void m_memLoss_ComputeTdLoss(object sender, MemoryLossLayerGetLossArgs<T> e)
814 {
815 MemoryCollection rgMem = m_rgSamples;
816
817 Blob<T> q_values = m_netOutput.blob_by_name("logits");
818 Blob<T> next_q_values = m_netTarget.blob_by_name("logits");
819
820 float[] rgActions = rgMem.GetActionsAsOneHotVector(m_nActionCount);
821 m_blobActions.ReshapeLike(q_values);
822 m_blobActions.mutable_cpu_data = Utility.ConvertVec<T>(rgActions);
823 m_blobQValue.ReshapeLike(q_values);
824
825 // q_value = q_values.gather(1, action.unsqueeze(1)).squeeze(1)
826 m_mycaffe.Cuda.mul(m_blobActions.count(), m_blobActions.gpu_data, q_values.gpu_data, m_blobQValue.mutable_gpu_data);
827 reduce_sum_axis1(m_blobQValue);
828
829 // next_q_value = next_q_values.max(1)[0]
830 m_blobNextQValue.CopyFrom(next_q_values, false, true);
831 reduce_argmax_axis1(m_blobNextQValue);
832
833 // expected_q_values
834 float[] rgRewards = rgMem.GetRewards();
835 m_blobExpectedQValue.ReshapeLike(m_blobQValue);
836 m_blobExpectedQValue.mutable_cpu_data = Utility.ConvertVec<T>(rgRewards);
837
838 float[] rgDone = rgMem.GetInvertedDoneAsOneHotVector();
839 m_blobDone.ReshapeLike(m_blobQValue);
840 m_blobDone.mutable_cpu_data = Utility.ConvertVec<T>(rgDone);
841
842 m_mycaffe.Cuda.mul(m_blobNextQValue.count(), m_blobNextQValue.gpu_data, m_blobDone.gpu_data, m_blobExpectedQValue.mutable_gpu_diff); // next_q_val * (1- done)
843 m_mycaffe.Cuda.mul_scalar(m_blobExpectedQValue.count(), m_fGamma, m_blobExpectedQValue.mutable_gpu_diff); // gamma * ^
844 m_mycaffe.Cuda.add(m_blobExpectedQValue.count(), m_blobExpectedQValue.gpu_diff, m_blobExpectedQValue.gpu_data, m_blobExpectedQValue.gpu_data); // reward + ^
845
846 // loss = (q_value - expected_q_value.detach()).pow(2)
847 m_blobLoss.ReshapeLike(m_blobQValue);
848 m_mycaffe.Cuda.sub(m_blobQValue.count(), m_blobQValue.gpu_data, m_blobExpectedQValue.gpu_data, m_blobQValue.mutable_gpu_diff); // q_value - expected_q_value
849 m_mycaffe.Cuda.powx(m_blobLoss.count(), m_blobQValue.gpu_diff, 2.0, m_blobLoss.mutable_gpu_data); // (q_value - expected_q_value)^2
850
851 // loss = (q_value - expected_q_value.detach()).pow(2) * weights
852 m_blobWeights.ReshapeLike(m_blobQValue);
853 m_blobWeights.mutable_cpu_data = Utility.ConvertVec<T>(m_rgSamples.Priorities); // weights
854 m_mycaffe.Cuda.mul(m_blobLoss.count(), m_blobLoss.gpu_data, m_blobWeights.gpu_data, m_blobLoss.mutable_gpu_data); // ^ * weights
855
856 // prios = loss + 1e-5
857 m_mycaffe.Cuda.copy(m_blobLoss.count(), m_blobLoss.gpu_data, m_blobLoss.mutable_gpu_diff);
858 m_mycaffe.Cuda.add_scalar(m_blobLoss.count(), 1e-5, m_blobLoss.mutable_gpu_diff);
859 double[] rgPrios = Utility.ConvertVec<T>(m_blobLoss.mutable_cpu_diff);
860
861 for (int i = 0; i < rgPrios.Length; i++)
862 {
863 m_rgSamples.Priorities[i] = rgPrios[i];
864 }
865
866
867 //-------------------------------------------------------
868 // Calculate the gradient - unroll the operations
869 // (autograd - psha! how about manualgrad :-D)
870 //-------------------------------------------------------
871
872 // initial gradient
873 double dfGradient = 1.0;
874 if (m_memLoss.layer_param.loss_weight.Count > 0)
875 dfGradient *= m_memLoss.layer_param.loss_weight[0];
876
877 // mean gradient - expand and divide by batch count
878 dfGradient /= m_blobLoss.count();
879 m_blobLoss.SetDiff(dfGradient);
880
881 // multiplication gradient - multiply by the other side.
882 m_mycaffe.Cuda.mul(m_blobLoss.count(), m_blobLoss.gpu_diff, m_blobWeights.gpu_data, m_blobLoss.mutable_gpu_diff);
883
884 // power gradient - multiply by the exponent.
885 m_mycaffe.Cuda.mul_scalar(m_blobLoss.count(), 2.0, m_blobLoss.mutable_gpu_diff);
886
887 // q_value - expected_q_value gradient
888 m_mycaffe.Cuda.mul(m_blobLoss.count(), m_blobLoss.gpu_diff, m_blobQValue.gpu_diff, m_blobLoss.mutable_gpu_diff);
889
890 // squeeze/gather gradient
891 mul(m_blobLoss, m_blobActions, e.Bottom[0]);
892
893 e.Loss = reduce_mean(m_blobLoss, false);
894 e.EnableLossUpdate = false;
895 }
896
897 private void resetNoise(Net<T> net)
898 {
899 foreach (Layer<T> layer in net.layers)
900 {
901 if (layer.type == LayerParameter.LayerType.INNERPRODUCT)
902 {
904 ((InnerProductLayer<T>)layer).ResetNoise();
905 }
906 }
907 }
908
909 private void mul(Blob<T> val, Blob<T> actions, Blob<T> result)
910 {
911 float[] rgVal = Utility.ConvertVecF<T>(val.mutable_cpu_diff);
912 float[] rgActions = Utility.ConvertVecF<T>(actions.mutable_cpu_data);
913 float[] rgResult = new float[rgActions.Length];
914
915 for (int i = 0; i < actions.num; i++)
916 {
917 float fPred = rgVal[i];
918
919 for (int j = 0; j < actions.channels; j++)
920 {
921 int nIdx = (i * actions.channels) + j;
922 rgResult[nIdx] = rgActions[nIdx] * fPred;
923 }
924 }
925
926 result.mutable_cpu_diff = Utility.ConvertVec<T>(rgResult);
927 }
928
929 private float reduce_mean(Blob<T> b, bool bDiff)
930 {
931 float[] rg = Utility.ConvertVecF<T>((bDiff) ? b.mutable_cpu_diff : b.mutable_cpu_data);
932 float fSum = rg.Sum(p => p);
933 return fSum / rg.Length;
934 }
935
936 private void reduce_sum_axis1(Blob<T> b)
937 {
938 int nNum = b.shape(0);
939 int nActions = b.shape(1);
940 int nInnerCount = b.count(2);
941 float[] rg = Utility.ConvertVecF<T>(b.mutable_cpu_data);
942 float[] rgSum = new float[nNum * nInnerCount];
943
944 for (int i = 0; i < nNum; i++)
945 {
946 for (int j = 0; j < nInnerCount; j++)
947 {
948 float fSum = 0;
949
950 for (int k = 0; k < nActions; k++)
951 {
952 int nIdx = (i * nActions * nInnerCount) + (k * nInnerCount);
953 fSum += rg[nIdx + j];
954 }
955
956 int nIdxR = i * nInnerCount;
957 rgSum[nIdxR + j] = fSum;
958 }
959 }
960
961 b.Reshape(nNum, nInnerCount, 1, 1);
962 b.mutable_cpu_data = Utility.ConvertVec<T>(rgSum);
963 }
964
965 private void reduce_argmax_axis1(Blob<T> b)
966 {
967 int nNum = b.shape(0);
968 int nActions = b.shape(1);
969 int nInnerCount = b.count(2);
970 float[] rg = Utility.ConvertVecF<T>(b.mutable_cpu_data);
971 float[] rgMax = new float[nNum * nInnerCount];
972
973 for (int i = 0; i < nNum; i++)
974 {
975 for (int j = 0; j < nInnerCount; j++)
976 {
977 float fMax = -float.MaxValue;
978
979 for (int k = 0; k < nActions; k++)
980 {
981 int nIdx = (i * nActions * nInnerCount) + (k * nInnerCount);
982 fMax = Math.Max(fMax, rg[nIdx + j]);
983 }
984
985 int nIdxR = i * nInnerCount;
986 rgMax[nIdxR + j] = fMax;
987 }
988 }
989
990 b.Reshape(nNum, nInnerCount, 1, 1);
991 b.mutable_cpu_data = Utility.ConvertVec<T>(rgMax);
992 }
993
994 private int argmax(float[] rgProb, int nActionCount, int nSampleIdx)
995 {
996 float[] rgfProb = new float[nActionCount];
997
998 for (int j = 0; j < nActionCount; j++)
999 {
1000 int nIdx = (nSampleIdx * nActionCount) + j;
1001 rgfProb[j] = rgProb[nIdx];
1002 }
1003
1004 return argmax(rgfProb);
1005 }
1006
1007 private int argmax(float[] rgfAprob)
1008 {
1009 double fMax = -float.MaxValue;
1010 int nIdx = 0;
1011
1012 for (int i = 0; i < rgfAprob.Length; i++)
1013 {
1014 if (rgfAprob[i] == fMax)
1015 {
1016 if (m_random.NextDouble() > 0.5)
1017 nIdx = i;
1018 }
1019 else if (fMax < rgfAprob[i])
1020 {
1021 fMax = rgfAprob[i];
1022 nIdx = i;
1023 }
1024 }
1025
1026 return nIdx;
1027 }
1028
1029 private void setData(Net<T> net, SimpleDatum sdData, SimpleDatum sdClip)
1030 {
1031 SimpleDatum[] rgData = new SimpleDatum[] { sdData };
1032 SimpleDatum[] rgClip = null;
1033
1034 if (sdClip != null)
1035 rgClip = new SimpleDatum[] { sdClip };
1036
1037 setData(net, rgData, rgClip);
1038 }
1039
1040 private void setCurrentStateData(Net<T> net, MemoryCollection rgSamples)
1041 {
1042 List<SimpleDatum> rgData0 = rgSamples.GetCurrentStateData();
1043 List<SimpleDatum> rgClip0 = rgSamples.GetCurrentStateClip();
1044
1045 SimpleDatum[] rgData = rgData0.ToArray();
1046 SimpleDatum[] rgClip = (rgClip0 != null) ? rgClip0.ToArray() : null;
1047
1048 setData(net, rgData, rgClip);
1049 }
1050
1051 private void setNextStateData(Net<T> net, MemoryCollection rgSamples)
1052 {
1053 List<SimpleDatum> rgData1 = rgSamples.GetNextStateData();
1054 List<SimpleDatum> rgClip1 = rgSamples.GetNextStateClip();
1055
1056 SimpleDatum[] rgData = rgData1.ToArray();
1057 SimpleDatum[] rgClip = (rgClip1 != null) ? rgClip1.ToArray() : null;
1058
1059 setData(net, rgData, rgClip);
1060 }
1061
1062 private void setData(Net<T> net, SimpleDatum[] rgData, SimpleDatum[] rgClip)
1063 {
1064 Blob<T> data = net.blob_by_name("data");
1065
1066 data.Reshape(rgData.Length, data.channels, data.height, data.width);
1067 m_transformer.Transform(rgData, data, m_mycaffe.Cuda, m_mycaffe.Log);
1068
1069 if (rgClip != null)
1070 {
1071 Blob<T> clip = net.blob_by_name("clip");
1072
1073 if (clip != null)
1074 {
1075 clip.Reshape(rgClip.Length, rgClip[0].Channels, rgClip[0].Height, rgClip[0].Width);
1076 m_transformer.Transform(rgClip, clip, m_mycaffe.Cuda, m_mycaffe.Log, true);
1077 }
1078 }
1079 }
1080
1085 public void OnOverlay(OverlayArgs e)
1086 {
1087 Blob<T> logits = m_netOutput.blob_by_name("logits");
1088 if (logits == null)
1089 return;
1090
1091 if (logits.num == 1)
1092 m_rgOverlay = Utility.ConvertVecF<T>(logits.mutable_cpu_data);
1093
1094 if (m_rgOverlay == null)
1095 return;
1096
1097 using (Graphics g = Graphics.FromImage(e.DisplayImage))
1098 {
1099 int nBorder = 30;
1100 int nWid = e.DisplayImage.Width - (nBorder * 2);
1101 int nWid1 = nWid / m_rgOverlay.Length;
1102 int nHt1 = (int)(e.DisplayImage.Height * 0.3);
1103 int nX = nBorder;
1104 int nY = e.DisplayImage.Height - nHt1;
1105 ColorMapper clrMap = new ColorMapper(0, m_rgOverlay.Length + 1, Color.Black, Color.Red);
1106 float fMax = -float.MaxValue;
1107 int nMaxIdx = 0;
1108 float fMin1 = m_rgOverlay.Min(p => p);
1109 float fMax1 = m_rgOverlay.Max(p => p);
1110
1111 for (int i=0; i<m_rgOverlay.Length; i++)
1112 {
1113 if (fMin1 < 0 || fMax1 > 1)
1114 m_rgOverlay[i] = (m_rgOverlay[i] - fMin1) / (fMax1 - fMin1);
1115
1116 if (m_rgOverlay[i] > fMax)
1117 {
1118 fMax = m_rgOverlay[i];
1119 nMaxIdx = i;
1120 }
1121 }
1122
1123 for (int i = 0; i < m_rgOverlay.Length; i++)
1124 {
1125 drawProbabilities(g, nX, nY, nWid1, nHt1, i, m_rgOverlay[i], fMin1, fMax1, clrMap.GetColor(i + 1), (i == nMaxIdx) ? true : false);
1126 nX += nWid1;
1127 }
1128 }
1129 }
1130
1131 private void drawProbabilities(Graphics g, int nX, int nY, int nWid, int nHt, int nAction, float fProb, float fMin, float fMax, Color clr, bool bMax)
1132 {
1133 string str = "";
1134
1135 if (m_font == null)
1136 m_font = new Font("Century Gothic", 9.0f);
1137
1138 if (!m_rgStyle.ContainsKey(clr))
1139 {
1140 Color clr1 = Color.FromArgb(128, clr);
1141 Brush br1 = new SolidBrush(clr1);
1142 Color clr2 = Color.FromArgb(64, clr);
1143 Pen pen = new Pen(clr2, 1.0f);
1144 Brush br2 = new SolidBrush(clr2);
1145 Brush brBright = new SolidBrush(clr);
1146 m_rgStyle.Add(clr, new Tuple<Brush, Brush, Pen, Brush>(br1, br2, pen, brBright));
1147 }
1148
1149 Brush brBack = m_rgStyle[clr].Item1;
1150 Brush brFront = m_rgStyle[clr].Item2;
1151 Brush brTop = m_rgStyle[clr].Item4;
1152 Pen penLine = m_rgStyle[clr].Item3;
1153
1154 if (fMin != 0 || fMax != 0)
1155 {
1156 str = "Action " + nAction.ToString() + " (" + fProb.ToString("N7") + ")";
1157 }
1158 else
1159 {
1160 str = "Action " + nAction.ToString() + " - No Probabilities";
1161 }
1162
1163 SizeF sz = g.MeasureString(str, m_font);
1164
1165 int nY1 = (int)(nY + (nHt - sz.Height));
1166 int nX1 = (int)(nX + (nWid / 2) - (sz.Width / 2));
1167 g.DrawString(str, m_font, (bMax) ? brTop : brFront, new Point(nX1, nY1));
1168
1169 if (fMin != 0 || fMax != 0)
1170 {
1171 float fX = nX;
1172 float fWid = nWid ;
1173 nHt -= (int)sz.Height;
1174
1175 float fHt = nHt * fProb;
1176 float fHt1 = nHt - fHt;
1177 RectangleF rc1 = new RectangleF(fX, nY + fHt1, fWid, fHt);
1178 g.FillRectangle(brBack, rc1);
1179 g.DrawRectangle(penLine, rc1.X, rc1.Y, rc1.Width, rc1.Height);
1180 }
1181 }
1182 }
1183}
The MyCaffeControl is the main object used to manage all training, testing and running of the MyCaffe...
CancelEvent CancelEvent
Returns the CancelEvent used.
Net< T > GetInternalNet(Phase phase=Phase.RUN)
Returns the internal net based on the Phase specified: TRAIN, TEST or RUN.
Solver< T > GetInternalSolver()
Get the internal solver.
ProjectEx CurrentProject
Returns the name of the currently loaded project.
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
Definition: CancelEvent.cs:17
void Reset()
Resets the event clearing any signaled state.
Definition: CancelEvent.cs:279
bool WaitOne(int nMs=int.MaxValue)
Waits for the signal state to occur.
Definition: CancelEvent.cs:290
CancelEvent()
The CancelEvent constructor.
Definition: CancelEvent.cs:28
void Set()
Sets the event to the signaled state.
Definition: CancelEvent.cs:270
The ColorMapper maps a value within a number range, to a Color within a color scheme.
Definition: ColorMapper.cs:14
Color GetColor(double dfVal)
Find the color using a binary search algorithm.
Definition: ColorMapper.cs:350
The CryptoRandom is a random number generator that can use either the standard .Net Random objec or t...
Definition: CryptoRandom.cs:14
int Next(int nMinVal, int nMaxVal, bool bMaxInclusive=true)
Returns a random int within the range
double NextDouble()
Returns a random double within the range .
Definition: CryptoRandom.cs:83
The Log class provides general output in text form.
Definition: Log.cs:13
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
Log(string strSrc)
The Log constructor.
Definition: Log.cs:33
double? GetSolverSettingAsNumeric(string strParam)
Get a setting from the solver descriptor as a double value.
Definition: ProjectEx.cs:470
Specifies a key-value pair of properties.
Definition: PropertySet.cs:16
int GetPropertyAsInt(string strName, int nDefault=0)
Returns a property as an integer value.
Definition: PropertySet.cs:287
bool GetPropertyAsBool(string strName, bool bDefault=false)
Returns a property as a boolean value.
Definition: PropertySet.cs:267
double GetPropertyAsDouble(string strName, double dfDefault=0)
Returns a property as an double value.
Definition: PropertySet.cs:307
override string ToString()
Returns the string representation of the properties.
Definition: PropertySet.cs:325
The SimpleDatum class holds a data input within host memory.
Definition: SimpleDatum.cs:161
float GetDataAtF(int nIdx)
Returns the item at a specified index in the float type.
bool Sub(SimpleDatum sd, bool bSetNegativeToZero=false)
Subtract the data of another SimpleDatum from this one, so this = this - sd.
void Zero()
Zero out all data in the datum but keep the size and other settings.
DateTime TimeStamp
Get/set the Timestamp.
object Tag
Specifies user data associated with the SimpleDatum.
Definition: SimpleDatum.cs:901
int Channels
Return the number of channels of the data.
int Index
Returns the index of the SimpleDatum.
The Utility class provides general utility funtions.
Definition: Utility.cs:35
static double[] ConvertVec(float[] rgf)
Convert an array of float to an array of generics.
Definition: Utility.cs:550
The BlobCollection contains a list of Blobs.
void Dispose()
Release all resource used by the collection and its Blobs.
void Accumulate(CudaDnn< T > cuda, BlobCollection< T > src, bool bAccumulateDiff)
Accumulate the diffs from one BlobCollection into another.
void SetDiff(double df)
Set all blob diff to the value specified.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
Definition: Blob.cs:800
int height
DEPRECIATED; legacy shape accessor height: use shape(2) instead.
Definition: Blob.cs:808
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555
T[] mutable_cpu_diff
Get diff from the GPU and bring it over to the host, or Set diff from the Host and send it over to th...
Definition: Blob.cs:1511
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487
T[] mutable_cpu_data
Get data from the GPU and bring it over to the host, or Set data from the Host and send it over to th...
Definition: Blob.cs:1461
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
Definition: Blob.cs:903
int width
DEPRECIATED; legacy shape accessor width: use shape(3) instead.
Definition: Blob.cs:816
List< int > shape()
Returns an array where each element contains the shape of an axis of the Blob.
Definition: Blob.cs:684
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1541
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
Definition: Blob.cs:1981
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
Definition: Blob.cs:792
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
Connects Layer's together into a direct acrylic graph (DAG) specified by a NetParameter
Definition: Net.cs:23
List< Layer< T > > layers
Returns the layers.
Definition: Net.cs:2003
double ForwardFromTo(int nStart=0, int nEnd=int.MaxValue)
The FromTo variant of forward and backward operate on the (topological) ordering by which the net is ...
Definition: Net.cs:1402
void CopyInternalBlobsTo(Net< T > dstNet)
Copy the internal blobs from one net to another.
Definition: Net.cs:1699
void CopyTrainedLayersTo(Net< T > dstNet)
Copies the trained layer of this Net to another Net.
Definition: Net.cs:1714
Layer< T > FindLastLayer(LayerParameter.LayerType type)
Find the last layer with the matching type.
Definition: Net.cs:2806
virtual void Dispose(bool bDisposing)
Releases all resources (GPU and Host) used by the Net.
Definition: Net.cs:184
void ClearParamDiffs()
Zero out the diffs of all netw parameters. This should be run before Backward.
Definition: Net.cs:1907
BlobCollection< T > learnable_parameters
Returns the learnable parameters.
Definition: Net.cs:2117
NetParameter net_param
Returns the net parameter.
Definition: Net.cs:1857
Blob< T > blob_by_name(string strName, bool bThrowExceptionOnError=true)
Returns a blob given its name.
Definition: Net.cs:2245
The ResultCollection contains the result of a given CaffeControl::Run.
Applies common transformations to the input data, such as scaling, mirroring, subtracting the image m...
DataTransformer(CudaDnn< T > cuda, Log log, TransformationParameter p, Phase phase, int nC, int nH, int nW, SimpleDatum imgMean=null)
The DataTransformer constructor.
void Update(int nDataSize=0, SimpleDatum imgMean=null)
Resync the transformer with changes in its parameter.
void Transform(List< Datum > rgDatum, Blob< T > blobTransformed, CudaDnn< T > cuda, Log log)
Transforms a list of Datum and places the transformed data into a Blob.
TransformationParameter param
Returns the TransformationParameter used.
The InnerProductLayer, also know as a 'fully-connected' layer, computes the inner product with a set ...
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
LayerParameter.LayerType type
Returns the LayerType of this Layer.
Definition: Layer.cs:927
LayerParameter layer_param
Returns the LayerParameter for this Layer.
Definition: Layer.cs:899
The MemoryLossLayerGetLossArgs class is passed to the OnGetLoss event.
bool EnableLossUpdate
Get/set enabling the loss update within the backpropagation pass.
double Loss
Get/set the externally calculated total loss.
BlobCollection< T > Bottom
Specifies the bottom passed in during the forward pass.
The MemoryLossLayer provides a method of performing a custom loss functionality. Similar to the Memor...
EventHandler< MemoryLossLayerGetLossArgs< T > > OnGetLoss
The OnGetLoss event fires during each forward pass. The value returned is saved, and applied on the b...
bool enable_noise
Enable/disable noise in the inner-product layer (default = false).
Specifies the base parameter for all layers.
List< double > loss_weight
Specifies the loss weight.
InnerProductParameter inner_product_param
Returns the parameter set when initialized with LayerType.INNERPRODUCT
LayerType
Specifies the layer type.
Stores parameters used to apply transformation to the data layer's data.
List< double > mean_value
If specified can be repeated once (would subtract it from all the channels or can be repeated the sam...
double scale
For data pre-processing, we can do simple scaling and subtracting the data mean, if provided....
An interface for classes that perform optimization on Nets - this class serves as the base class for ...
Definition: Solver.cs:28
bool Step(int nIters, TRAIN_STEP step=TRAIN_STEP.NONE, bool bZeroDiffs=true, bool bApplyUpdates=true, bool bDisableOutput=false, bool bDisableProgress=false, double? dfLossOverride=null, bool? bAllowSnapshot=null)
Steps a set of iterations through a training cycle.
Definition: Solver.cs:818
abstract double ApplyUpdate(int nIterationOverride=-1)
Make and apply the update value for the current iteration.
The ConvertOutputArgs is passed to the OnConvertOutput event.
Definition: EventArgs.cs:311
byte[] RawOutput
Specifies the raw output byte stream.
Definition: EventArgs.cs:356
string RawType
Specifies the type of the raw output byte stream.
Definition: EventArgs.cs:348
The GetDataArgs is passed to the OnGetData event to retrieve data.
Definition: EventArgs.cs:402
StateBase State
Specifies the state data of the observations.
Definition: EventArgs.cs:517
The InitializeArgs is passed to the OnInitialize event.
Definition: EventArgs.cs:90
The OverlayArgs is passed ot the OnOverlay event, optionally fired just before displaying a gym image...
Definition: EventArgs.cs:376
Bitmap DisplayImage
Get/set the display image.
Definition: EventArgs.cs:392
The StateBase is the base class for the state of each observation - this is defined by actual trainer...
Definition: StateBase.cs:16
bool Done
Get/set whether the state is done or not.
Definition: StateBase.cs:72
double Reward
Get/set the reward of the state.
Definition: StateBase.cs:63
SimpleDatum Data
Returns other data associated with the state.
Definition: StateBase.cs:98
int ActionCount
Returns the number of actions.
Definition: StateBase.cs:90
SimpleDatum Clip
Returns the clip data assoicated with the state.
Definition: StateBase.cs:116
The WaitArgs is passed to the OnWait event.
Definition: EventArgs.cs:65
The MemoryCollectionFactory is used to create various memory collection types.
static IMemoryCollection CreateMemory(MEMTYPE type, int nMax, float fAlpha=0, string strFile=null)
CreateMemory creates the memory collection type based on the MEMTYPE parameter.
The memory collection stores a set of memory items.
float[] GetInvertedDoneAsOneHotVector()
Returns the inverted done (1 - done) values as a one-hot vector.
List< SimpleDatum > GetNextStateClip()
Returns the list of clip items associated with the next state.
double[] Priorities
Get/set the priorities associated with the collection (if any).
List< SimpleDatum > GetCurrentStateData()
Returns the list of data items associated with the current state.
float[] GetActionsAsOneHotVector(int nActionCount)
Returns the action items as a set of one-hot vectors.
List< SimpleDatum > GetCurrentStateClip()
Returns the list of clip items associated with the current state.
float[] GetRewards()
Returns the rewards as a vector.
List< SimpleDatum > GetNextStateData()
Returns the list of data items associated with the next state.
The MemoryItem stores the information about a given cycle.
The Brain uses the instance of MyCaffe (e.g. the open project) to run new actions and train the netwo...
CancelEvent Cancel
Returns the Cancel event used to cancel all MyCaffe tasks.
void OnOverlay(OverlayArgs e)
The OnOverlay callback is called just before displaying the gym image, thus allowing for an overlay t...
void UpdateTargetModel()
The UpdateTargetModel transfers the trained layers from the active Net to the target Net.
bool GetModelUpdated()
Get whether or not the model has been udpated or not.
Brain(MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, Phase phase)
The constructor.
GetDataArgs getDataArgs(Phase phase, int nAction)
Returns the GetDataArgs used to retrieve new data from the envrionment implemented by derived parent ...
int FrameStack
Specifies the number of frames per X value.
SimpleDatum Preprocess(StateBase s, bool bUseRawInput, out bool bDifferent, bool bReset=false)
Preprocesses the data.
int act(SimpleDatum sd, SimpleDatum sdClip, int nActionCount)
Returns the action from running the model. The action returned is either randomly selected (when usin...
int BatchSize
Returns the batch size defined by the model.
void Dispose()
Release all resources used by the Brain.
void Train(int nIteration, MemoryCollection rgSamples, int nActionCount)
Train the model at the current iteration.
Log Log
Returns the output log.
The DqnAgent both builds episodes from the envrionment and trains on them using the Brain.
DqnAgent(IxTrainerCallback icallback, MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, Phase phase)
The constructor.
void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step)
The Run method provides the main loop that performs the following steps: 1.) get state 2....
byte[] Run(int nIterations, out string type)
Run the action on a set number of iterations and return the results with no training.
void Dispose()
Release all resources used.
The TrainerNoisyDqn implements the Noisy-DQN algorithm as described by Google Dopamine DQNAgent,...
byte[] Run(int nN, PropertySet runProp, out string type)
Run a set of iterations and return the resuts.
TrainerNoisyDqn(MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, IxTrainerCallback icallback)
The constructor.
bool Train(int nN, ITERATOR_TYPE type, TRAIN_STEP step)
Train the network using a modified PG training algorithm optimized for GPU use.
void Dispose()
Release all resources used.
bool Shutdown(int nWait)
Shutdown the trainer.
bool Initialize()
Initialize the trainer.
ResultCollection RunOne(int nDelay=1000)
Run a single cycle on the environment after the delay.
bool Test(int nN, ITERATOR_TYPE type)
Run the test cycle - currently this is not implemented.
The IxTrainerCallback provides functions used by each trainer to 'call-back' to the parent for inform...
Definition: Interfaces.cs:303
The IxTrainerCallbackRNN provides functions used by each trainer to 'call-back' to the parent for inf...
Definition: Interfaces.cs:348
void OnConvertOutput(ConvertOutputArgs e)
The OnConvertOutput callback fires from within the Run method and is used to convert the network's ou...
The IxTrainerGetDataCallback interface is called right after rendering the output image and just befo...
Definition: Interfaces.cs:335
The IxTrainerRL interface is implemented by each RL Trainer.
Definition: Interfaces.cs:257
The IMemoryCollection interface is implemented by all memory collection types.
Definition: Interfaces.cs:37
void Update(MemoryCollection rgSamples)
Updates the memory collection - currently only used by the Prioritized memory collection to update it...
int Count
Returns the number of items in the memory collection.
Definition: Interfaces.cs:59
void CleanUp()
Performs final clean-up tasks.
void Add(MemoryItem m)
Add a new item to the memory collection.
MemoryCollection GetSamples(CryptoRandom random, int nCount, double dfBeta)
Retrieve a set of samples from the collection.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
Phase
Defines the Phase under which to run a Net.
Definition: Interfaces.cs:61
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
TRAIN_STEP
Defines the training stepping method (if any).
Definition: Interfaces.cs:131
The MyCaffe.data namespace contains dataset creators used to create common testing datasets such as M...
Definition: BinaryFile.cs:16
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe.solvers namespace contains all solver classes, including the base Solver.
MEMTYPE
Specifies the type of memory collection to use.
Definition: Interfaces.cs:14
The MyCaffe.trainers namespace contains all reinforcement and recurrent learning trainers.
ITERATOR_TYPE
Specifies the iterator type to use.
Definition: Interfaces.cs:22
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12