MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
DataTemporalLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Data.SqlTypes;
4using System.Diagnostics;
5using System.Drawing;
6using System.IO;
7using System.Linq;
8using System.Reflection;
9using System.Runtime.Remoting.Messaging;
10using System.Security.Cryptography;
11using System.Security.Policy;
12using System.Text;
13using System.Threading;
14using System.Xml.Linq;
15using MyCaffe.basecode;
17using MyCaffe.common;
18using MyCaffe.db.temporal;
19using MyCaffe.param;
20using MyCaffe.param.tft;
21//using SimpleGraphing;
22
23namespace MyCaffe.layers.tft
24{
34 public class DataTemporalLayer<T> : Layer<T>
35 {
37 List<int> m_rgShape = new List<int>(4);
38 uint m_nBatchSize;
39 uint m_nNumHistoricalSteps;
40 uint m_nNumFutureSteps;
41 RawData<T> m_data = null;
42 CancelEvent m_evtCancel;
43 int[,] m_rgIdx = null;
44
54 : base(cuda, log, p)
55 {
56 m_evtCancel = evtCancel;
57 m_type = LayerParameter.LayerType.DATA_TEMPORAL;
58 m_db = db as IXTemporalDatabaseBase;
59 }
60
62 protected override void dispose()
63 {
64 }
65
67 protected override void setup_internal_blobs(BlobCollection<T> col)
68 {
69 if (col.Count > 0)
70 return;
71 }
72
76 public override int ExactNumBottomBlobs
77 {
78 get { return 0; }
79 }
80
84 public override int MinTopBlobs
85 {
86 get { return 6; }
87 }
88
92 public override int MaxTopBlobs
93 {
95 }
96
102 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
103 {
105 m_nNumHistoricalSteps = m_param.data_temporal_param.num_historical_steps;
106 m_nNumFutureSteps = m_param.data_temporal_param.num_future_steps;
107
108 if (m_data == null)
109 {
114 else
115 throw new Exception("Unknown source type: " + m_param.data_temporal_param.source_type.ToString());
116 }
117
118 Phase phase = m_phase;
120 {
121 m_log.WriteLine("INFO: Using forced phase = " + m_param.data_temporal_param.forced_phase.Value.ToString() + ".");
123 }
124
126 throw new Exception("DataTemporalLayer - could not find the data for '" + m_param.data_temporal_param.source + "'. You may need to run the SignalPop AI Designer to create this " + m_param.data_temporal_param.source_type.ToString() + " dataset.");
127
128 int nTotalSize = m_data.GetTotalSize();
129 m_log.CHECK_GE(nTotalSize, m_nBatchSize, "There must be enough items for at least one batch - items found = " + nTotalSize.ToString() + ", batch size = " + m_nBatchSize.ToString());
130 }
131
137 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
138 {
139 int[] rgShape;
140
141 if ((rgShape = m_data.GetShape(DataNpy<T>.OUTPUT_TYPE.STATIC_NUMERIC)) != null)
142 colTop[0].Reshape(rgShape);
143
144 if ((rgShape = m_data.GetShape(DataNpy<T>.OUTPUT_TYPE.STATIC_CATEGORICAL)) != null)
145 colTop[1].Reshape(rgShape);
146
147 if ((rgShape = m_data.GetShape(DataNpy<T>.OUTPUT_TYPE.HISTORICAL_NUMERIC)) != null)
148 colTop[2].Reshape(rgShape);
149
150 if ((rgShape = m_data.GetShape(DataNpy<T>.OUTPUT_TYPE.HISTORICAL_CATEGORICAL)) != null)
151 colTop[3].Reshape(rgShape);
152
153 if ((rgShape = m_data.GetShape(DataNpy<T>.OUTPUT_TYPE.FUTURE_NUMERIC)) != null)
154 colTop[4].Reshape(rgShape);
155
156 if ((rgShape = m_data.GetShape(DataNpy<T>.OUTPUT_TYPE.FUTURE_CATEGORICAL)) != null)
157 colTop[5].Reshape(rgShape);
158
159 if (colTop.Count > 6)
160 {
161 if ((rgShape = m_data.GetShape(DataNpy<T>.OUTPUT_TYPE.TARGET)) != null)
162 {
163 colTop[6].Reshape(rgShape);
164 colTop[6].type = BLOB_TYPE.TARGET;
165 }
166
168 {
169 rgShape[1] = (int)m_nNumHistoricalSteps;
170 colTop[7].Reshape(rgShape);
171 colTop[7].type = BLOB_TYPE.TARGET | BLOB_TYPE.DATA;
172 }
173 }
174 }
175
180 public override void ConnectLoss(LossLayer<T> layer)
181 {
182 layer.OnLoss += Layer_OnLoss;
183 }
184
185 private void Layer_OnLoss(object sender, LossArgs e)
186 {
187 if (m_rgIdx != null)
188 m_data.Add(e, m_rgIdx);
189 }
190
202 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
203 {
204 Phase phase = layer_param.data_temporal_param.forced_phase.GetValueOrDefault(m_phase);
205 m_rgIdx = m_data.LoadBatch(phase, (int)m_nBatchSize, colTop, m_param.data_temporal_param.enable_debug_output, m_param.data_temporal_param.debug_output_path);
206
208 m_log.WriteLine("WARNING: Debugging is enabled with path = " + m_param.data_temporal_param.debug_output_path + " and will slow down training!");
209 }
210
212 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
213 {
214 }
215 }
216
221 abstract class RawData<T>
222 {
226 protected Data<T> m_data;
230 protected Random m_random;
234 protected int m_nBatchSize;
239
245 public RawData(uint? nSeed, bool bOutputTargetHistorical)
246 {
247 m_bOutputTargetHistorical = bOutputTargetHistorical;
248
249 if (nSeed.HasValue)
250 m_random = new Random((int)nSeed.Value);
251 else
252 m_random = new Random();
253 }
254
259 {
260 get { return m_random; }
261 }
262
277 public virtual bool LoadData(Phase phase, string strPath, bool bShuffleData, int nBatchSize, int nHistoricalSteps, int nFutureSteps, double dfPctMaxLoad, int nDripRefreshRateInSec, uint nChunkCount, Log log, CancelEvent evtCancel)
278 {
279 m_nBatchSize = nBatchSize;
280
281 ManualResetEvent evtReady = new ManualResetEvent(false);
282 ManualResetEvent evtDone = new ManualResetEvent(false);
283 Thread threadLoad = new Thread(new ParameterizedThreadStart(loadDataFunction));
284 threadLoad.Start(new DataLoadParameters(phase, strPath, nHistoricalSteps, nFutureSteps, dfPctMaxLoad, nDripRefreshRateInSec, nChunkCount, bShuffleData, log, evtCancel, evtReady, evtDone));
285
286 while (!evtReady.WaitOne(1000))
287 {
288 if (evtCancel.WaitOne(0))
289 return false;
290
291 Thread.Sleep(50);
292 }
293
294 return true;
295 }
296
301 protected virtual void loadDataFunction(object obj)
302 {
303 }
304
314 public virtual int[,] LoadBatch(Phase phase, int nBatchSize, BlobCollection<T> col, bool bEnableDebug = false, string strDebugPath = null)
315 {
316 return m_data.LoadBatch(nBatchSize, col, bEnableDebug, strDebugPath);
317 }
318
324 public virtual void Add(LossArgs e, int[,] rgIdx)
325 {
326 }
327
332 public virtual int GetTotalSize()
333 {
334 return m_data.GetTotalSize();
335 }
336
342 public virtual int[] GetShape(DataNpy<T>.OUTPUT_TYPE ot)
343 {
344 return m_data.GetShape(ot);
345 }
346 }
347
352 class RawSqlData<T> : RawData<T>
353 {
356 bool m_bShuffleData;
357 int m_nHistoricalSteps;
358 int m_nFutureSteps;
359 int m_nDropRefreshReateInSec;
360 Log m_log;
361 Phase m_phase = Phase.NONE;
362 float[] m_rgStaticNum = null;
363 float[] m_rgStaticCat = null;
364 float[] m_rgHistoricalNum = null;
365 float[] m_rgHistoricalCat = null;
366 float[] m_rgFutureNum = null;
367 float[] m_rgFutureCat = null;
368 float[] m_rgTarget = null;
369 float[] m_rgTargetHist = null;
370 int[,] m_rgIdx = null;
371 BatchPerfSet m_batchPerfSet = null;
372
373
381 public RawSqlData(uint? nSeed, bool bOutputTargetHistorical, IXTemporalDatabaseBase db, Log log) : base(nSeed, bOutputTargetHistorical)
382 {
383 m_db = db;
384 m_log = log;
385 }
386
402 public override bool LoadData(Phase phase, string strDataset, bool bShuffleData, int nBatchSize, int nHistoricalSteps, int nFutureSteps, double dfPctMaxLoad, int nDripRefreshRateInSec, uint nChunkCount, Log log, CancelEvent evtCancel)
403 {
404 SettingsCaffe s = null;
405
406 m_phase = phase;
407
408 if (m_db == null)
409 {
410 s = new SettingsCaffe();
411 s.DbLoadMethod = DB_LOAD_METHOD.LOAD_ALL;
412 s.DbLoadLimit = 0;
413
414 PropertySet prop = new PropertySet();
415 prop.SetProperty("NormalizedData", "True");
416 prop.SetProperty("HistoricalSteps", nHistoricalSteps.ToString());
417 prop.SetProperty("FutureSteps", nFutureSteps.ToString());
418
419 m_db = new MyCaffeTemporalDatabase(m_log, prop);
420 }
421
422 m_ds = m_db.GetDatasetByName(strDataset);
423 if (m_ds == null)
424 {
425 m_log.WriteLine("ERROR: Could not find the dataset '" + strDataset + "'!");
426 return false;
427 }
428
429 if (s != null)
430 m_db.InitializeWithDsName1(s, strDataset);
431
432 m_bShuffleData = bShuffleData;
433 m_nBatchSize = nBatchSize;
434 m_nHistoricalSteps = nHistoricalSteps;
435 m_nFutureSteps = nFutureSteps;
436 m_nDropRefreshReateInSec = nDripRefreshRateInSec;
437
438 return true;
439 }
440
441 private float[] getBuffer(BlobCollection<T> col, int nIdx)
442 {
443 if (col.Count <= nIdx)
444 return null;
445
446 int nItemCount = col[nIdx].count();
447 if (nItemCount == 0)
448 return null;
449
450 return new float[nItemCount];
451 }
452
453 private void setBuffer(BlobCollection<T> col, int nIdx, float[] rg)
454 {
455 if (rg == null)
456 return;
457
458 col[nIdx].mutable_cpu_data = Utility.ConvertVec<T>(rg);
459 }
460
466 public override void Add(LossArgs e, int[,] rgIdx)
467 {
468 if (m_batchPerfSet == null)
469 m_batchPerfSet = new BatchPerfSet(m_random, 0.25, (int)m_nBatchSize * 100, 2);
470
471 m_batchPerfSet.Add(e, rgIdx);
472 }
473
483 public override int[,] LoadBatch(Phase phase, int nBatchSize, BlobCollection<T> col, bool bEnableDebug = false, string strDebugPath = null)
484 {
485 SourceDescriptor src = (phase == Phase.TRAIN) ? m_ds.TrainingSource : m_ds.TestingSource;
486 DB_LABEL_SELECTION_METHOD itemSelection = (m_bShuffleData) ? DB_LABEL_SELECTION_METHOD.RANDOM : DB_LABEL_SELECTION_METHOD.NONE;
487 DB_ITEM_SELECTION_METHOD valueSelection = (m_bShuffleData) ? DB_ITEM_SELECTION_METHOD.RANDOM : DB_ITEM_SELECTION_METHOD.NONE;
488
489 if (m_rgStaticNum == null)
490 m_rgStaticNum = getBuffer(col, 0);
491 if (m_rgStaticCat == null)
492 m_rgStaticCat = getBuffer(col, 1);
493 if (m_rgHistoricalNum == null)
494 m_rgHistoricalNum = getBuffer(col, 2);
495 if (m_rgHistoricalCat == null)
496 m_rgHistoricalCat = getBuffer(col, 3);
497 if (m_rgFutureNum == null)
498 m_rgFutureNum = getBuffer(col, 4);
499 if (m_rgFutureCat == null)
500 m_rgFutureCat = getBuffer(col, 5);
501 if (m_rgTarget == null)
502 m_rgTarget = getBuffer(col, 6);
503 if (m_rgTargetHist == null)
504 m_rgTargetHist = getBuffer(col, 7);
505
506 if (m_rgIdx == null)
507 m_rgIdx = new int[nBatchSize,2];
508
509 for (int i = 0; i < nBatchSize; i++)
510 {
511 int? nItemIdx = null;
512 int? nValueIdx = null;
513
514 // When using the batch performance set, the indexes are selected from the set,
515 // seeking to select from the 25% worst performing items.
516 if (m_batchPerfSet != null)
517 m_batchPerfSet.Select(ref nItemIdx, ref nValueIdx);
518
519 SimpleTemporalDatumCollection rgData = m_db.QueryTemporalItem(i, src.ID, ref nItemIdx, ref nValueIdx, itemSelection, valueSelection, bEnableDebug, strDebugPath);
520 if (rgData == null)
521 continue;
522
523 m_rgIdx[i, 0] = nItemIdx.Value;
524 m_rgIdx[i, 1] = nValueIdx.Value;
525
526 SimpleTemporalDatum sdStatNum = rgData[0];
527 SimpleTemporalDatum sdStatCat = rgData[1];
528 SimpleTemporalDatum sdHistNum = rgData[2];
529 SimpleTemporalDatum sdHistCat = rgData[3];
530 SimpleTemporalDatum sdFutureNum = rgData[4];
531 SimpleTemporalDatum sdFutureCat = rgData[5];
532 SimpleTemporalDatum sdTarget = rgData[6];
533 SimpleTemporalDatum sdTargetHist = rgData[7];
534
535 // col[0] = STATIC_NUMERIC
536 if (m_rgStaticNum != null)
537 {
538 float[] rgRawData = sdStatNum.Data;
539 Array.Copy(rgRawData, 0, m_rgStaticNum, i * rgRawData.Length, rgRawData.Length);
540 }
541
542 // col[1] = STATIC_CATEGORICAL
543 if (m_rgStaticCat != null)
544 {
545 float[] rgRawData = sdStatCat.Data;
546 Array.Copy(rgRawData, 0, m_rgStaticCat, i * rgRawData.Length, rgRawData.Length);
547 }
548
549 // col[2] = HISTORICAL_NUMERIC
550 if (m_rgHistoricalNum != null)
551 {
552 float[] rgRawData = sdHistNum.Data;
553 Array.Copy(rgRawData, 0, m_rgHistoricalNum, i * rgRawData.Length, rgRawData.Length);
554 }
555
556 // col[3] = HISTORICAL_CATEGORICAL
557 if (m_rgHistoricalCat != null)
558 {
559 float[] rgRawData = sdHistCat.Data;
560 Array.Copy(rgRawData, 0, m_rgHistoricalCat, i * rgRawData.Length, rgRawData.Length);
561 }
562
563 // col[4] = FUTURE_NUMERIC
564 if (m_rgFutureNum != null)
565 {
566 float[] rgRawData = sdFutureNum.Data;
567 Array.Copy(rgRawData, 0, m_rgFutureNum, i * rgRawData.Length, rgRawData.Length);
568 }
569
570 // col[5] = FUTURE_CATEGORICAL
571 if (m_rgFutureCat != null)
572 {
573 float[] rgRawData = sdFutureCat.Data;
574 Array.Copy(rgRawData, 0, m_rgFutureCat, i * rgRawData.Length, rgRawData.Length);
575 }
576
577 // col[6] = TARGET
578 if (m_rgTarget != null)
579 {
580 float[] rgRawData = sdTarget.Data;
581 Array.Copy(rgRawData, 0, m_rgTarget, i * rgRawData.Length, rgRawData.Length);
582 }
583
584 // col[7] = Historical Target (optional)
585 if (m_rgTargetHist != null)
586 {
587 float[] rgRawData = sdTargetHist.Data;
588 Array.Copy(rgRawData, 0, m_rgTargetHist, i * rgRawData.Length, rgRawData.Length);
589 }
590 }
591
592 setBuffer(col, 0, m_rgStaticNum);
593 setBuffer(col, 1, m_rgStaticCat);
594 setBuffer(col, 2, m_rgHistoricalNum);
595 setBuffer(col, 3, m_rgHistoricalCat);
596 setBuffer(col, 4, m_rgFutureNum);
597 setBuffer(col, 5, m_rgFutureCat);
598 setBuffer(col, 6, m_rgTarget);
599 setBuffer(col, 7, m_rgTargetHist);
600
601 return m_rgIdx;
602 }
603
608 public override int GetTotalSize()
609 {
610 return m_db.GetTotalSize(m_ds.ID, m_phase, m_nHistoricalSteps, m_nFutureSteps);
611 }
612
618 public override int[] GetShape(DataNpy<T>.OUTPUT_TYPE ot)
619 {
620 int nStaticNumCount = 0;
621 int nStaticCatCount = 0;
622 int nObservedNumCount = 0;
623 int nObservedCatCount = 0;
624 int nKnownNumCount = 0;
625 int nKnownCatCount = 0;
626
628 {
630 {
632 nStaticNumCount++;
633 else
634 nStaticCatCount++;
635 }
636
637 else if (vsd.ClassType == ValueStreamDescriptor.STREAM_CLASS_TYPE.OBSERVED)
638 {
640 nObservedNumCount++;
641 else
642 nObservedCatCount++;
643 }
644
646 {
648 nKnownNumCount++;
649 else
650 nKnownCatCount++;
651
652 }
653 }
654
655 switch (ot)
656 {
657 case Data<T>.OUTPUT_TYPE.STATIC_CATEGORICAL:
658 if (nStaticCatCount == 0)
659 return null;
660 return new int[] { m_nBatchSize, nStaticCatCount };
661
662 case Data<T>.OUTPUT_TYPE.STATIC_NUMERIC:
663 if (nStaticNumCount == 0)
664 return null;
665 return new int[] { m_nBatchSize, nStaticNumCount };
666
667 case Data<T>.OUTPUT_TYPE.HISTORICAL_SYNC:
668 return new int[] { m_nBatchSize, m_nHistoricalSteps, 1 };
669
670 case Data<T>.OUTPUT_TYPE.HISTORICAL_CATEGORICAL:
671 if (nKnownCatCount + nObservedCatCount == 0)
672 return null;
673 return new int[] { m_nBatchSize, m_nHistoricalSteps, nKnownCatCount + nObservedCatCount, 1 };
674
675 case Data<T>.OUTPUT_TYPE.HISTORICAL_NUMERIC:
676 if (nKnownNumCount + nObservedNumCount == 0)
677 return null;
678 return new int[] { m_nBatchSize, m_nHistoricalSteps, nKnownNumCount + nObservedNumCount, 1 };
679
680 case Data<T>.OUTPUT_TYPE.FUTURE_SYNC:
681 return new int[] { m_nBatchSize, m_nFutureSteps, 1 };
682
683 case Data<T>.OUTPUT_TYPE.FUTURE_CATEGORICAL:
684 if (nKnownCatCount == 0)
685 return null;
686 return new int[] { m_nBatchSize, m_nFutureSteps, nKnownCatCount, 1 };
687
688 case Data<T>.OUTPUT_TYPE.FUTURE_NUMERIC:
689 if (nKnownNumCount == 0)
690 return null;
691 return new int[] { m_nBatchSize, m_nFutureSteps, nKnownNumCount, 1 };
692
693 case Data<T>.OUTPUT_TYPE.TARGET:
694 return new int[] { m_nBatchSize, m_nFutureSteps, 1, 1 };
695 }
696
697 return null;
698 }
699 }
700
705 class RawFileData<T> : RawData<T>
706 {
712 public RawFileData(uint? nSeed, bool bOutputTargetHistorical) : base(nSeed, bOutputTargetHistorical)
713 {
714 }
715
722 public void VerifyFiles(Phase phase, string strPath)
723 {
724 string strFile;
725 string strType = "train";
726 strPath = strPath.TrimEnd('\\', '/');
727 strPath += "\\";
728
729 if (phase == Phase.TEST)
730 strType = "test";
731 else if (phase == Phase.RUN)
732 strType = "validation";
733
734 strFile = strPath + strType + "_sync.npy";
735 if (!File.Exists(strFile))
736 throw new Exception("Could not find the data file '" + strFile + "'. You may need to run the SignalPop AI Designer Dataset Creator.");
737
738 strFile = strPath + strType + "_schema.xml";
739 if (!File.Exists(strFile))
740 throw new Exception("Could not find the schema file '" + strFile + "'. You may need to run the SignalPop AI Designer Dataset Creator.");
741
742 return;
743 }
744
759 public override bool LoadData(Phase phase, string strPath, bool bShuffleData, int nBatchSize, int nHistoricalSteps, int nFutureSteps, double dfPctMaxLoad, int nDripRefreshRateInSec, uint nChunkCount, Log log, CancelEvent evtCancel)
760 {
761 VerifyFiles(phase, strPath);
762 m_data = new DataNpy<T>(m_random, log, nHistoricalSteps, nFutureSteps, bShuffleData, m_bOutputTargetHistorical);
763 return base.LoadData(phase, strPath, bShuffleData, nBatchSize, nHistoricalSteps, nFutureSteps, dfPctMaxLoad, nDripRefreshRateInSec, nChunkCount, log, evtCancel);
764 }
765
766 protected override void loadDataFunction(object obj)
767 {
768 DataLoadParameters arg = obj as DataLoadParameters;
769 string strPath = arg.Path;
770 Phase phase = arg.Phase;
771 Log log = arg.Log;
772 double dfMaxLoadPct = arg.MaxLoadPercent;
773 int nDripRefreshRateInSec = arg.DripRefreshRateInSec;
774 CancelEvent evtCancel = arg.CancelEvent;
775 ManualResetEvent evtReady = arg.ReadyEvent;
776 ManualResetEvent evtDone = arg.DoneEvent;
777 DataNpy<T> dataChunk = null;
778 int nIteration = 0;
779
780 try
781 {
782 string strType = "train";
783 strPath = strPath.TrimEnd('\\', '/');
784 strPath += "\\";
785
786 if (phase == Phase.TEST)
787 strType = "test";
788 else if (phase == Phase.RUN)
789 strType = "validation";
790
791 dataChunk = new DataNpy<T>(m_data);
792 dataChunk.Open(strPath, strType, m_nBatchSize);
793
794 int nRowIdx = 0;
795 int nRowCount = dataChunk.RowCount;
796 int nMaxLoadCount = (int)(nRowCount * dfMaxLoadPct);
797 int nWaitCount = 0;
798
799 Stopwatch sw = new Stopwatch();
800 sw.Start();
801
802 while (!evtCancel.WaitOne(0))
803 {
804 bool bGoodData = false;
805
806 while (dataChunk.Load(nRowIdx, out bGoodData))
807 {
808 if (!bGoodData)
809 {
810 nRowIdx++;
811 continue;
812 }
813
814 bool bRefreshed = m_data.Add(dataChunk, nMaxLoadCount);
815
816 if (m_data.IsReady)
817 evtReady.Set();
818
819 nRowIdx++;
820
821 if (sw.Elapsed.TotalMilliseconds > 1000)
822 {
823 if (evtCancel.WaitOne(0))
824 {
825 log.WriteLine("Background data loading for '" + strType + "' aborted.");
826 break;
827 }
828
829 double dfPct = (double)nRowIdx / (double)nRowCount;
830 if (nMaxLoadCount > 0)
831 {
832 if (nRowIdx > nMaxLoadCount)
833 dfPct = 1;
834 else
835 dfPct = (double)nRowIdx / (double)nMaxLoadCount;
836 }
837
838 log.WriteLine("Background data loading '" + strType + "' data at " + dfPct.ToString("P") + "...");
839 sw.Restart();
840 }
841
842 if (bRefreshed)
843 {
844 log.WriteLine("Background data loading '" + strType + "' refreshed...");
845
846 // Wait roughly 5 minutes before refreshing the data;
847 nWaitCount = 0;
848 while (!evtCancel.WaitOne(1000))
849 {
850 Thread.Sleep(50);
851 nWaitCount++;
852
853 if (nWaitCount > nDripRefreshRateInSec)
854 break;
855 }
856
857 if (nDripRefreshRateInSec == 0)
858 break;
859 }
860 }
861
862 if (nIteration == 0)
863 log.WriteLine("Background data load completed.");
864
865 if (nDripRefreshRateInSec <= 0)
866 break;
867
868 if (nIteration == 0)
869 log.WriteLine("Starting drip refresing...");
870
871 nIteration++;
872 nWaitCount = 0;
873 while (!evtCancel.WaitOne(1000))
874 {
875 Thread.Sleep(50);
876 nWaitCount++;
877
878 if (nWaitCount > nDripRefreshRateInSec)
879 break;
880 }
881
882 nRowIdx = 0;
883 }
884 }
885 finally
886 {
887 dataChunk.Close();
888 dataChunk.Dispose();
889 evtDone.Set();
890 }
891 }
892 }
893
894#pragma warning disable 1591
895
896 class DataLoadParameters
897 {
898 Phase m_phase;
899 string m_strPath;
900 int m_nNumHistSteps;
901 int m_nNumFutureSteps;
902 double m_dfMaxLoadPct;
903 int m_nDripRrefreshRateInSec;
904 uint m_nChunkCount;
905 bool m_bShuffleData;
906 Log m_log;
907 CancelEvent m_evtCancel;
908 ManualResetEvent m_evtReady;
909 ManualResetEvent m_evtDone;
910
911 public DataLoadParameters(Phase phase, string strPath, int nNumHistSteps, int nNumFutureSteps, double dfMaxLoadPct, int nDripRefreshRateInSec, uint nChunkCount, bool bShuffleData, Log log, CancelEvent evtCancel, ManualResetEvent evtReady, ManualResetEvent evtDone)
912 {
913 m_phase = phase;
914 m_strPath = strPath;
915 m_nNumHistSteps = nNumHistSteps;
916 m_nNumFutureSteps = nNumFutureSteps;
917 m_dfMaxLoadPct = dfMaxLoadPct;
918 m_nDripRrefreshRateInSec = nDripRefreshRateInSec;
919 m_nChunkCount = nChunkCount;
920 m_bShuffleData = bShuffleData;
921 m_log = log;
922 m_evtCancel = evtCancel;
923 m_evtReady = evtReady;
924 m_evtDone = evtDone;
925 }
926
927 public Phase Phase { get { return m_phase; } }
928 public string Path { get { return m_strPath; } }
929 public int HistoricalSteps { get { return m_nNumHistSteps; } }
930 public int FutureSteps { get { return m_nNumFutureSteps; } }
931 public double MaxLoadPercent { get { return m_dfMaxLoadPct; } }
932 public int DripRefreshRateInSec { get { return m_nDripRrefreshRateInSec; } }
933 public uint ChunkCount { get { return m_nChunkCount; } }
934 public bool ShuffleData { get { return m_bShuffleData; } }
935 public Log Log { get { return m_log; } }
936 public CancelEvent CancelEvent { get { return m_evtCancel; } }
937 public ManualResetEvent ReadyEvent { get { return m_evtReady; } }
938 public ManualResetEvent DoneEvent { get { return m_evtDone; } }
939 }
940
941 abstract class Data<T> : IDisposable
942 {
943 protected Random m_random;
944 protected Log m_log;
945 protected int m_nHistoricalSteps;
946 protected int m_nFutureSteps;
947 protected bool m_bShuffleData;
948 protected bool m_bOutputTargetHistorical = false;
949 protected object m_syncObj = new object();
950 protected int m_nRows = 0;
951 protected int m_nBatchSize = 0;
952 protected int m_nTotalSize = 0;
953
954 public enum DATA_TYPE
955 {
956 SYNC,
957 STATIC_NUMERIC,
958 STATIC_CATEGORICAL,
959 OBSERVED_NUMERIC,
960 OBSERVED_CATEGORICAL,
961 KNOWN_NUMERIC,
962 KNOWN_CATEGORICAL
963 }
964
965 public enum OUTPUT_TYPE
966 {
967 STATIC_NUMERIC,
968 STATIC_CATEGORICAL,
969 HISTORICAL_NUMERIC,
970 HISTORICAL_CATEGORICAL,
971 FUTURE_NUMERIC,
972 FUTURE_CATEGORICAL,
973 TARGET,
974 HISTORICAL_SYNC,
975 FUTURE_SYNC,
976 HISTORICAL_TARGET
977 }
978
979 public Data(Random random, Log log, int nHistoricalSteps, int nFutureSteps, bool bShuffleData, bool bOutputTargetHistorical)
980 {
981 m_random = random;
982 m_log = log;
983 m_nHistoricalSteps = nHistoricalSteps;
984 m_nFutureSteps = nFutureSteps;
985 m_bShuffleData = bShuffleData;
986 m_bOutputTargetHistorical = bOutputTargetHistorical;
987 }
988
989 public Data(Data<T> data)
990 {
991 m_random = data.m_random;
992 m_log = data.m_log;
993 m_nHistoricalSteps = data.m_nHistoricalSteps;
994 m_nFutureSteps = data.m_nFutureSteps;
995 m_bShuffleData = data.m_bShuffleData;
996 m_bOutputTargetHistorical = data.m_bOutputTargetHistorical;
997 }
998
999 public void Dispose()
1000 {
1001 Close();
1002 }
1003
1004 public int RowCount
1005 {
1006 get { return m_nRows; }
1007 }
1008
1009 public int GetTotalSize()
1010 {
1011 return m_nTotalSize;
1012 }
1013
1014 public bool IsReady
1015 {
1016 get { return GetTotalSize() >= m_nBatchSize; }
1017 }
1018
1019 public abstract void Open(string strSrc, string strType, int nBatchSize);
1020
1021 public abstract void Close();
1022
1023 public abstract int[,] LoadBatch(int nBatchSize, BlobCollection<T> col, bool bEnableDebug, string strDebugPath);
1024
1025 public abstract int[] GetShape(OUTPUT_TYPE ot);
1026
1027 public abstract bool Add(DataNpy<T> data, int nMaxLoad);
1028 }
1029
1030 //[DEPRECIATED] use SQL_DB type instead.
1031 class DataNpy<T> : Data<T>
1032 {
1033 DataSchema m_schema;
1034 Lookup m_validRanges = new Lookup();
1035 Dictionary<DATA_TYPE, string> m_rgstrFiles = new Dictionary<DATA_TYPE, string>();
1036 Dictionary<DATA_TYPE, List<float[]>> m_rgNumData = new Dictionary<DATA_TYPE, List<float[]>>();
1037 Dictionary<DATA_TYPE, List<long[]>> m_rgCatData = new Dictionary<DATA_TYPE, List<long[]>>();
1038 Dictionary<DATA_TYPE, NumpyFile<float>> m_rgNumFiles = new Dictionary<DATA_TYPE, NumpyFile<float>>();
1039 Dictionary<DATA_TYPE, NumpyFile<long>> m_rgCatFiles = new Dictionary<DATA_TYPE, NumpyFile<long>>();
1040 Dictionary<DATA_TYPE, int> m_rgFields = new Dictionary<DATA_TYPE, int>();
1041 Dictionary<OUTPUT_TYPE, long[]> m_rgBatchSync = new Dictionary<OUTPUT_TYPE, long[]>();
1042 Dictionary<OUTPUT_TYPE, float[]> m_rgBatchBuffers = new Dictionary<OUTPUT_TYPE, float[]>();
1043 int m_nMaxRowIdx = -1;
1044 int m_nRowIdx = 0;
1045 int m_nColIdx = 0;
1046 int m_nTargetFieldIdx = 0;
1047 int m_nIteration = 0;
1048
1049 public DataNpy(Random random, Log log, int nHistoricalSteps, int nFutureSteps, bool bShuffleData, bool bOutputTargetHistorical)
1050 : base(random, log, nHistoricalSteps, nFutureSteps, bShuffleData, bOutputTargetHistorical)
1051 {
1052 }
1053
1054 public DataNpy(Data<T> data)
1055 : base(data)
1056 {
1057 }
1058
1059 public override void Open(string strPath, string strType, int nBatchSize)
1060 {
1061 int nLen;
1062 m_schema = DataSchema.Load(strPath + "\\" + strType + "_schema.xml");
1063 m_nTargetFieldIdx = m_schema.Data.ObservedNum.FindFieldIndex(Field.INPUT_TYPE.TARGET);
1064
1065 m_nIteration = 0;
1066
1067 m_nBatchSize = nBatchSize;
1068 m_rgstrFiles.Add(DATA_TYPE.SYNC, strPath + "\\" + strType + "_sync.npy");
1069 m_rgstrFiles.Add(DATA_TYPE.STATIC_NUMERIC, strPath + "\\" + strType + "_static_num.npy");
1070 m_rgstrFiles.Add(DATA_TYPE.STATIC_CATEGORICAL, strPath + "\\" + strType + "_static_cat.npy");
1071 m_rgstrFiles.Add(DATA_TYPE.OBSERVED_NUMERIC, strPath + "\\" + strType + "_observed_num.npy");
1072 m_rgstrFiles.Add(DATA_TYPE.OBSERVED_CATEGORICAL, strPath + "\\" + strType + "_observed_cat.npy");
1073 m_rgstrFiles.Add(DATA_TYPE.KNOWN_NUMERIC, strPath + "\\" + strType + "_known_num.npy");
1074 m_rgstrFiles.Add(DATA_TYPE.KNOWN_CATEGORICAL, strPath + "\\" + strType + "_known_cat.npy");
1075
1076 // Verify the required files.
1077 if (!File.Exists(m_rgstrFiles[DATA_TYPE.SYNC]))
1078 throw new Exception("Could not find the sync file '" + m_rgstrFiles[DATA_TYPE.SYNC] + "'.");
1079
1080 NumpyFile<long> npySync = new NumpyFile<long>(null);
1081 npySync.OpenRead(m_rgstrFiles[DATA_TYPE.SYNC]);
1082 m_rgCatFiles.Add(DATA_TYPE.SYNC, npySync);
1083 m_rgCatData.Add(DATA_TYPE.SYNC, new List<long[]>());
1084 m_rgFields.Add(DATA_TYPE.SYNC, npySync.Fields);
1085
1086 nLen = nBatchSize * m_nHistoricalSteps * m_rgCatFiles[DATA_TYPE.SYNC].Fields;
1087 m_rgBatchSync.Add(OUTPUT_TYPE.HISTORICAL_SYNC, new long[nLen]);
1088
1089 nLen = nBatchSize * m_nFutureSteps * m_rgCatFiles[DATA_TYPE.SYNC].Fields;
1090 m_rgBatchSync.Add(OUTPUT_TYPE.FUTURE_SYNC, new long[nLen]);
1091
1092 if (!File.Exists(m_rgstrFiles[DATA_TYPE.OBSERVED_NUMERIC]))
1093 throw new Exception("Could not find the sync file '" + m_rgstrFiles[DATA_TYPE.OBSERVED_NUMERIC] + "'.");
1094
1095 NumpyFile<float> npyObsNum = new NumpyFile<float>(null);
1096 npyObsNum.OpenRead(m_rgstrFiles[DATA_TYPE.OBSERVED_NUMERIC]);
1097 m_rgNumFiles.Add(DATA_TYPE.OBSERVED_NUMERIC, npyObsNum);
1098 m_rgNumData.Add(DATA_TYPE.OBSERVED_NUMERIC, new List<float[]>());
1099 m_rgFields.Add(DATA_TYPE.OBSERVED_NUMERIC, npyObsNum.Fields);
1100 m_nRows = npyObsNum.Rows;
1101
1102 int nNumObsFields = m_schema.Data.ObservedNumExplicitCount;
1103 if (nNumObsFields != m_rgNumFiles[DATA_TYPE.OBSERVED_NUMERIC].Fields && nNumObsFields != m_rgNumFiles[DATA_TYPE.OBSERVED_NUMERIC].Fields - 1)
1104 throw new Exception("The number of observed numeric fields in the schema does not match the number of fields in the observed numeric data file.");
1105
1106 nLen = nBatchSize * m_nHistoricalSteps * nNumObsFields;
1107 m_rgBatchBuffers.Add(OUTPUT_TYPE.HISTORICAL_NUMERIC, new float[nLen]);
1108 // The future observed are the target values.
1109 nLen = nBatchSize * m_nFutureSteps * 1;
1110 m_rgBatchBuffers.Add(OUTPUT_TYPE.TARGET, new float[nLen]);
1111
1112 if (m_bOutputTargetHistorical)
1113 {
1114 // The past observed are the target values historical.
1115 nLen = nBatchSize * m_nHistoricalSteps * 1;
1116 m_rgBatchBuffers.Add(OUTPUT_TYPE.HISTORICAL_TARGET, new float[nLen]);
1117 }
1118
1119 if (File.Exists(m_rgstrFiles[DATA_TYPE.OBSERVED_CATEGORICAL]))
1120 {
1121 NumpyFile<long> npyObsCat = new NumpyFile<long>(null);
1122 npyObsCat.OpenRead(m_rgstrFiles[DATA_TYPE.OBSERVED_CATEGORICAL]);
1123 m_rgCatFiles.Add(DATA_TYPE.OBSERVED_CATEGORICAL, npyObsCat);
1124 m_rgCatData.Add(DATA_TYPE.OBSERVED_CATEGORICAL, new List<long[]>());
1125 m_rgFields.Add(DATA_TYPE.OBSERVED_CATEGORICAL, npyObsCat.Fields);
1126
1127 nLen = nBatchSize * m_nHistoricalSteps * m_rgNumFiles[DATA_TYPE.OBSERVED_CATEGORICAL].Fields;
1128 m_rgBatchBuffers.Add(OUTPUT_TYPE.HISTORICAL_CATEGORICAL, new float[nLen]);
1129 }
1130
1131 if (File.Exists(m_rgstrFiles[DATA_TYPE.KNOWN_NUMERIC]))
1132 {
1133 NumpyFile<float> npyKnownNum = new NumpyFile<float>(null);
1134 npyKnownNum.OpenRead(m_rgstrFiles[DATA_TYPE.KNOWN_NUMERIC]);
1135 m_rgNumFiles.Add(DATA_TYPE.KNOWN_NUMERIC, npyKnownNum);
1136 m_rgNumData.Add(DATA_TYPE.KNOWN_NUMERIC, new List<float[]>());
1137 m_rgFields.Add(DATA_TYPE.KNOWN_NUMERIC, npyKnownNum.Fields);
1138
1139 // Observed numeric and known numeric are combined into a single buffer.
1140 nLen = nBatchSize * m_nHistoricalSteps * (m_rgNumFiles[DATA_TYPE.OBSERVED_NUMERIC].Fields + m_rgNumFiles[DATA_TYPE.KNOWN_NUMERIC].Fields);
1141 m_rgBatchBuffers[OUTPUT_TYPE.HISTORICAL_NUMERIC] = new float[nLen];
1142
1143 nLen = nBatchSize * m_nFutureSteps * m_rgNumFiles[DATA_TYPE.KNOWN_NUMERIC].Fields;
1144 m_rgBatchBuffers.Add(OUTPUT_TYPE.FUTURE_NUMERIC, new float[nLen]);
1145 }
1146
1147 if (File.Exists(m_rgstrFiles[DATA_TYPE.KNOWN_CATEGORICAL]))
1148 {
1149 NumpyFile<long> npyKnownCat = new NumpyFile<long>(null);
1150 npyKnownCat.OpenRead(m_rgstrFiles[DATA_TYPE.KNOWN_CATEGORICAL]);
1151 m_rgCatFiles.Add(DATA_TYPE.KNOWN_CATEGORICAL, npyKnownCat);
1152 m_rgCatData.Add(DATA_TYPE.KNOWN_CATEGORICAL, new List<long[]>());
1153 m_rgFields.Add(DATA_TYPE.KNOWN_CATEGORICAL, npyKnownCat.Fields);
1154
1155 nLen = nBatchSize * m_nHistoricalSteps * m_rgCatFiles[DATA_TYPE.KNOWN_CATEGORICAL].Fields;
1156 m_rgBatchBuffers.Add(OUTPUT_TYPE.HISTORICAL_CATEGORICAL, new float[nLen]);
1157 nLen = nBatchSize * m_nFutureSteps * m_rgCatFiles[DATA_TYPE.KNOWN_CATEGORICAL].Fields;
1158 m_rgBatchBuffers.Add(OUTPUT_TYPE.FUTURE_CATEGORICAL, new float[nLen]);
1159 }
1160
1161 if (File.Exists(m_rgstrFiles[DATA_TYPE.STATIC_NUMERIC]))
1162 {
1163 NumpyFile<float> npyStatNum = new NumpyFile<float>(null);
1164 npyStatNum.OpenRead(m_rgstrFiles[DATA_TYPE.STATIC_NUMERIC]);
1165 m_rgNumFiles.Add(DATA_TYPE.STATIC_NUMERIC, npyStatNum);
1166 m_rgNumData.Add(DATA_TYPE.STATIC_NUMERIC, new List<float[]>());
1167 m_rgFields.Add(DATA_TYPE.STATIC_NUMERIC, npyStatNum.Fields);
1168
1169 nLen = nBatchSize * m_rgNumFiles[DATA_TYPE.STATIC_NUMERIC].Fields;
1170 m_rgBatchBuffers.Add(OUTPUT_TYPE.STATIC_NUMERIC, new float[nLen]);
1171 }
1172
1173 if (File.Exists(m_rgstrFiles[DATA_TYPE.STATIC_CATEGORICAL]))
1174 {
1175 NumpyFile<long> npyStatCat = new NumpyFile<long>(null);
1176 npyStatCat.OpenRead(m_rgstrFiles[DATA_TYPE.STATIC_CATEGORICAL]);
1177 m_rgCatFiles.Add(DATA_TYPE.STATIC_CATEGORICAL, npyStatCat);
1178 m_rgCatData.Add(DATA_TYPE.STATIC_CATEGORICAL, new List<long[]>());
1179 m_rgFields.Add(DATA_TYPE.STATIC_CATEGORICAL, npyStatCat.Fields);
1180
1181 nLen = nBatchSize * m_rgCatFiles[DATA_TYPE.STATIC_CATEGORICAL].Fields;
1182 m_rgBatchBuffers.Add(OUTPUT_TYPE.STATIC_CATEGORICAL, new float[nLen]);
1183 }
1184 }
1185
1186 public override void Close()
1187 {
1188 foreach (KeyValuePair<DATA_TYPE, NumpyFile<long>> kvp in m_rgCatFiles)
1189 {
1190 kvp.Value.Close();
1191 }
1192
1193 foreach (KeyValuePair<DATA_TYPE, NumpyFile<float>> kvp in m_rgNumFiles)
1194 {
1195 kvp.Value.Close();
1196 }
1197
1198 m_rgCatFiles.Clear();
1199 m_rgNumFiles.Clear();
1200 m_rgCatData.Clear();
1201 m_rgNumData.Clear();
1202 m_rgBatchBuffers.Clear();
1203 m_rgBatchSync.Clear();
1204 m_rgFields.Clear();
1205 }
1206
1207 private int getMaxRowIdx(int nBatchSize)
1208 {
1209 int nFields = m_rgFields[DATA_TYPE.SYNC];
1210 int nCount = nBatchSize;
1211
1212 for (int i=m_rgCatData[DATA_TYPE.SYNC].Count-1; i>=0; i--)
1213 {
1214 nCount -= m_rgCatData[DATA_TYPE.SYNC][i].Length / nFields;
1215 if (nCount <= 0)
1216 return i;
1217 }
1218
1219 return -1;
1220 }
1221
1222 public bool Load(int nRowIdx, out bool bGoodData)
1223 {
1224 bGoodData = false;
1225
1226 if (nRowIdx >= m_nRows)
1227 return false;
1228
1229 int nStartIdx = m_schema.Lookups[0][nRowIdx].ValidRangeStartIndex;
1230 int nEndIdx = m_schema.Lookups[0][nRowIdx].ValidRangeEndIndex;
1231 int nFields = m_rgFields[DATA_TYPE.SYNC];
1232 if (nStartIdx < 0 || nEndIdx < 0 || (nEndIdx - nStartIdx) < (m_nHistoricalSteps + m_nFutureSteps))
1233 return true;
1234
1235 Dictionary<DATA_TYPE, long[]> cat = new Dictionary<DATA_TYPE, long[]>();
1236 foreach (KeyValuePair<DATA_TYPE, NumpyFile<long>> kvp in m_rgCatFiles)
1237 {
1238 int nStartIdx1 = (kvp.Key == DATA_TYPE.STATIC_CATEGORICAL) ? 0 : nStartIdx;
1239 int nEndIdx1 = (kvp.Key == DATA_TYPE.STATIC_CATEGORICAL) ? 0 : nEndIdx;
1240 long[] rgBuffer = null;
1241 rgBuffer = kvp.Value.LoadRow(rgBuffer, nRowIdx, nStartIdx1, (nEndIdx1 - nStartIdx1) + 1);
1242 cat.Add(kvp.Key, rgBuffer);
1243 if (rgBuffer == null)
1244 return true;
1245 }
1246
1247 Dictionary<DATA_TYPE, float[]> num = new Dictionary<DATA_TYPE, float[]>();
1248 foreach (KeyValuePair<DATA_TYPE, NumpyFile<float>> kvp in m_rgNumFiles)
1249 {
1250 int nStartIdx1 = (kvp.Key == DATA_TYPE.STATIC_NUMERIC) ? 0 : nStartIdx;
1251 int nEndIdx1 = (kvp.Key == DATA_TYPE.STATIC_NUMERIC) ? 0 : nEndIdx;
1252 float[] rgBuffer = null;
1253 rgBuffer = kvp.Value.LoadRow(rgBuffer, nRowIdx, nStartIdx1, (nEndIdx1 - nStartIdx1) + 1);
1254 num.Add(kvp.Key, rgBuffer);
1255 if (rgBuffer == null)
1256 return true;
1257 }
1258
1259 foreach (KeyValuePair<DATA_TYPE, long[]> kvp in cat)
1260 {
1261 m_rgCatData[kvp.Key].Add(kvp.Value);
1262 }
1263
1264 foreach (KeyValuePair<DATA_TYPE, float[]> kvp in num)
1265 {
1266 m_rgNumData[kvp.Key].Add(kvp.Value);
1267 }
1268
1269 m_validRanges.Add(m_schema.Lookups[0][nRowIdx]);
1270
1271 bGoodData = true;
1272
1273 return true;
1274 }
1275
1276 public override bool Add(DataNpy<T> data, int nMaxLoad)
1277 {
1278 bool bRefreshed = false;
1279
1280 lock (m_syncObj)
1281 {
1282 foreach (KeyValuePair<DATA_TYPE, List<float[]>> kv in data.m_rgNumData)
1283 {
1284 if (!m_rgNumData.ContainsKey(kv.Key))
1285 m_rgNumData.Add(kv.Key, new List<float[]>());
1286
1287 m_rgNumData[kv.Key].AddRange(kv.Value);
1288 data.m_rgNumData[kv.Key].Clear();
1289
1290 while (m_rgNumData[kv.Key].Count > nMaxLoad)
1291 {
1292 m_rgNumData[kv.Key].RemoveAt(0);
1293 bRefreshed = true;
1294 }
1295 }
1296
1297 foreach (KeyValuePair<DATA_TYPE, List<long[]>> kv in data.m_rgCatData)
1298 {
1299 if (!m_rgCatData.ContainsKey(kv.Key))
1300 m_rgCatData.Add(kv.Key, new List<long[]>());
1301
1302 m_rgCatData[kv.Key].AddRange(kv.Value);
1303 data.m_rgCatData[kv.Key].Clear();
1304
1305 while (m_rgCatData[kv.Key].Count > nMaxLoad)
1306 {
1307 m_rgCatData[kv.Key].RemoveAt(0);
1308 }
1309 }
1310
1311 foreach (KeyValuePair<DATA_TYPE, int> kv in data.m_rgFields)
1312 {
1313 if (!m_rgFields.ContainsKey(kv.Key))
1314 m_rgFields.Add(kv.Key, kv.Value);
1315 }
1316
1317 foreach (KeyValuePair<OUTPUT_TYPE, long[]> kv in data.m_rgBatchSync)
1318 {
1319 m_rgBatchSync.Add(kv.Key, kv.Value);
1320 }
1321 data.m_rgBatchSync.Clear();
1322
1323 foreach (KeyValuePair<OUTPUT_TYPE, float[]> kv in data.m_rgBatchBuffers)
1324 {
1325 m_rgBatchBuffers.Add(kv.Key, kv.Value);
1326 }
1327 data.m_rgBatchBuffers.Clear();
1328
1329 m_validRanges.Add(data.m_validRanges);
1330 data.m_validRanges.Clear();
1331
1332 m_schema = data.m_schema;
1333 m_nBatchSize = data.m_nBatchSize;
1334 m_nMaxRowIdx = getMaxRowIdx(m_nBatchSize);
1335 m_nRows = m_rgCatData[DATA_TYPE.SYNC].Count;
1336 m_nTargetFieldIdx = data.m_nTargetFieldIdx;
1337 int nFields = m_rgFields[DATA_TYPE.SYNC];
1338 m_nTotalSize = m_rgCatData[DATA_TYPE.SYNC].Sum(p => p.Length) / (m_nHistoricalSteps + m_nFutureSteps) * nFields;
1339 }
1340
1341 return bRefreshed;
1342 }
1343
1344 public override int[] GetShape(OUTPUT_TYPE ot)
1345 {
1346 int nFields = 0;
1347
1348 switch (ot)
1349 {
1350 case OUTPUT_TYPE.STATIC_NUMERIC:
1351 if (m_rgFields.ContainsKey(DATA_TYPE.STATIC_NUMERIC))
1352 return new int[] { m_nBatchSize, m_rgFields[DATA_TYPE.STATIC_NUMERIC] };
1353 break;
1354
1355 case OUTPUT_TYPE.STATIC_CATEGORICAL:
1356 if (m_rgFields.ContainsKey(DATA_TYPE.STATIC_CATEGORICAL))
1357 return new int[] { m_nBatchSize, m_rgFields[DATA_TYPE.STATIC_CATEGORICAL] };
1358 break;
1359
1360 case OUTPUT_TYPE.HISTORICAL_NUMERIC:
1361 nFields = 0;
1362 if (m_rgFields.ContainsKey(DATA_TYPE.OBSERVED_NUMERIC))
1363 nFields += m_schema.Data.ObservedNumExplicitCount;
1364 if (m_rgFields.ContainsKey(DATA_TYPE.KNOWN_NUMERIC))
1365 nFields += m_rgFields[DATA_TYPE.KNOWN_NUMERIC];
1366 if (nFields > 0)
1367 return new int[] { m_nBatchSize, m_nHistoricalSteps, nFields };
1368 break;
1369
1370 case OUTPUT_TYPE.HISTORICAL_CATEGORICAL:
1371 nFields = 0;
1372 if (m_rgFields.ContainsKey(DATA_TYPE.OBSERVED_CATEGORICAL))
1373 nFields += m_rgFields[DATA_TYPE.OBSERVED_CATEGORICAL];
1374 if (m_rgFields.ContainsKey(DATA_TYPE.KNOWN_CATEGORICAL))
1375 nFields += m_rgFields[DATA_TYPE.KNOWN_CATEGORICAL];
1376 if (nFields > 0)
1377 return new int[] { m_nBatchSize, m_nHistoricalSteps, nFields };
1378 break;
1379
1380 case OUTPUT_TYPE.FUTURE_NUMERIC:
1381 if (m_rgFields.ContainsKey(DATA_TYPE.KNOWN_NUMERIC))
1382 return new int[] { m_nBatchSize, m_nFutureSteps, m_rgFields[DATA_TYPE.KNOWN_NUMERIC] };
1383 break;
1384
1385 case OUTPUT_TYPE.FUTURE_CATEGORICAL:
1386 if (m_rgFields.ContainsKey(DATA_TYPE.KNOWN_CATEGORICAL))
1387 return new int[] { m_nBatchSize, m_nFutureSteps, m_rgFields[DATA_TYPE.KNOWN_CATEGORICAL] };
1388 break;
1389
1390 case OUTPUT_TYPE.TARGET:
1391 return new int[] { m_nBatchSize, m_nFutureSteps, 1 };
1392
1393 case OUTPUT_TYPE.HISTORICAL_TARGET:
1394 return new int[] { m_nBatchSize, m_nHistoricalSteps, 1 };
1395
1396 default:
1397 throw new Exception("Unknown output type '" + ot.ToString() + "'!");
1398 }
1399
1400 return null;
1401 }
1402
1403 private void stepNext()
1404 {
1405 if (m_bShuffleData)
1406 {
1407 m_nRowIdx = m_random.Next(m_validRanges.Count);
1408
1409 int nValidRangeCount = m_validRanges[m_nRowIdx].ValidRangeCount;
1410 int nRetry = 0;
1411 while (nRetry < 5 && nValidRangeCount < (m_nHistoricalSteps + m_nFutureSteps))
1412 {
1413 m_nRowIdx = m_random.Next(m_validRanges.Count);
1414 nValidRangeCount = m_validRanges[m_nRowIdx].ValidRangeCount;
1415 nRetry++;
1416 }
1417
1418 if (nRetry == 5)
1419 throw new Exception("Could not find a row with more than " + (m_nHistoricalSteps + m_nFutureSteps).ToString() + " valid ranges!");
1420
1421 m_nColIdx = m_random.Next(nValidRangeCount - (m_nHistoricalSteps + m_nFutureSteps));
1422 }
1423 else
1424 {
1425 m_nColIdx++;
1426 int nValidRangeCount = m_validRanges[m_nRowIdx].ValidRangeCount;
1427 if (m_nColIdx + m_nHistoricalSteps + m_nFutureSteps > nValidRangeCount)
1428 {
1429 m_nRowIdx++;
1430 if (m_nRowIdx >= m_nMaxRowIdx)
1431 m_nRowIdx = 0;
1432
1433 m_nColIdx = 0;
1434 }
1435 }
1436 }
1437
1438 private float[] getBatch(OUTPUT_TYPE ot)
1439 {
1440 if (!m_rgBatchBuffers.ContainsKey(ot))
1441 return null;
1442
1443 return m_rgBatchBuffers[ot];
1444 }
1445
1446 private bool loadSyncBatch(int nIdx, long[] rg, int nStartIdx, int nCount)
1447 {
1448 if (rg == null)
1449 return false;
1450
1451 int nStartIdx1 = m_nColIdx + nStartIdx;
1452 int nFields = m_rgFields[DATA_TYPE.SYNC];
1453 long[] rgSrc = m_rgCatData[DATA_TYPE.SYNC][m_nRowIdx];
1454
1455 if (nStartIdx1 * nFields + nCount * nFields > rgSrc.Length)
1456 return false;
1457
1458 Array.Copy(rgSrc, nStartIdx1 * nFields, rg, nIdx * nCount * nFields, nCount * nFields);
1459
1460 return true;
1461 }
1462
1463 private void loadStaticCatBatch(int nIdx, float[] rg, DATA_TYPE dt)
1464 {
1465 if (rg == null)
1466 return;
1467
1468 int nFields = m_rgFields[dt];
1469 long[] rgSrc = m_rgCatData[dt][m_nRowIdx];
1470
1471 Array.Copy(rgSrc, 0, rg, nIdx * nFields, nFields);
1472 }
1473
1474 private void loadStaticNumBatch(int nIdx, float[] rg, DATA_TYPE dt)
1475 {
1476 if (rg == null)
1477 return;
1478
1479 int nFields = m_rgFields[dt];
1480 float[] rgSrc = m_rgNumData[dt][m_nRowIdx];
1481
1482 Array.Copy(rgSrc, 0, rg, nIdx * nFields, nFields);
1483 }
1484
1485 private void loadCatBatch(int nIdx, float[] rg, int nStartIdx, int nCount, DATA_TYPE dt)
1486 {
1487 if (rg == null)
1488 return;
1489
1490 int nStartIdx1 = m_nColIdx + nStartIdx;
1491 int nFields = m_rgFields[dt];
1492 long[] rgSrc = m_rgCatData[dt][m_nRowIdx];
1493 Array.Copy(rgSrc, nStartIdx1 * nFields, rg, nIdx * nCount * nFields, nCount * nFields);
1494 }
1495
1496 private void loadCatBatch(int nIdx, float[] rg, int nStartIdx, int nCount, DATA_TYPE dt1, DATA_TYPE dt2)
1497 {
1498 if (rg == null)
1499 return;
1500
1501 int nStartIdx1 = m_nColIdx + nStartIdx;
1502 int nFields1 = (m_rgFields.ContainsKey(dt1)) ? m_rgFields[dt1] : 0;
1503 long[] rgSrc1 = (m_rgFields.ContainsKey(dt1)) ? m_rgCatData[dt1][m_nRowIdx] : null;
1504 int nFields2 = (m_rgFields.ContainsKey(dt2)) ? m_rgFields[dt2] : 0;
1505 long[] rgSrc2 = (m_rgFields.ContainsKey(dt2)) ? m_rgCatData[dt2][m_nRowIdx] : null;
1506 int nFields = nFields1 + nFields2;
1507
1508 for (int j = nStartIdx1; j < nStartIdx1 + nCount; j++)
1509 {
1510 for (int k = 0; k < nFields1; k++)
1511 {
1512 int nSrcIdx = j * nFields1 + k;
1513 int nDstIdx = nIdx * nCount * nFields + (j - nStartIdx1) * nFields + k;
1514 rg[nDstIdx] = rgSrc1[nSrcIdx];
1515 }
1516 for (int k = 0; k < nFields2; k++)
1517 {
1518 int nSrcIdx = j * nFields2 + k;
1519 int nDstIdx = nIdx * nCount * nFields + (j - nStartIdx1) * nFields + k + nFields1;
1520 rg[nDstIdx] = rgSrc2[nSrcIdx];
1521 }
1522 }
1523 }
1524
1525 private void loadNumBatch(int nIdx, float[] rg, int nStartIdx, int nCount, DATA_TYPE dt)
1526 {
1527 if (rg == null)
1528 return;
1529
1530 int nStartIdx1 = m_nColIdx + nStartIdx;
1531 int nFields = m_rgFields[dt];
1532 float[] rgSrc = m_rgNumData[dt][m_nRowIdx];
1533 Array.Copy(rgSrc, nStartIdx1 * nFields, rg, nIdx * nCount * nFields, nCount * nFields);
1534 }
1535
1536 private void loadNumBatch(int nIdx, float[] rg, int nStartIdx, int nCount, int nFieldIdx, DATA_TYPE dt)
1537 {
1538 if (rg == null)
1539 return;
1540
1541 int nStartIdx1 = m_nColIdx + nStartIdx;
1542 int nFields = m_rgFields[dt];
1543 float[] rgSrc = m_rgNumData[dt][m_nRowIdx];
1544
1545 for (int i = 0; i < nCount; i++)
1546 {
1547 int nSrcIdx = nStartIdx1 * nFields + i * nFields + nFieldIdx;
1548 int nDstIdx = nIdx * nCount + i;
1549
1550 rg[nDstIdx] = rgSrc[nSrcIdx];
1551 }
1552 }
1553
1554 private int getNumFields(DATA_TYPE dt)
1555 {
1556 if (dt != DATA_TYPE.OBSERVED_NUMERIC)
1557 return m_rgFields[dt];
1558
1559 return m_schema.Data.ObservedNumExplicitCount;
1560 }
1561
1562 private void loadNumBatch(int nIdx, float[] rg, int nStartIdx, int nCount, DATA_TYPE dt1, DATA_TYPE dt2)
1563 {
1564 if (rg == null)
1565 return;
1566
1567 int nStartIdx1 = m_nColIdx + nStartIdx;
1568 int nFields1Explicit = m_rgFields.ContainsKey(dt1) ? getNumFields(dt1) : 0;
1569 int nFields1 = (m_rgFields.ContainsKey(dt1)) ? m_rgFields[dt1] : 0;
1570 float[] rgSrc1 = (m_rgFields.ContainsKey(dt1)) ? m_rgNumData[dt1][m_nRowIdx] : null;
1571 int nFields2 = (m_rgFields.ContainsKey(dt2)) ? m_rgFields[dt2] : 0;
1572 float[] rgSrc2 = (m_rgFields.ContainsKey(dt2)) ? m_rgNumData[dt2][m_nRowIdx] : null;
1573 int nFields = nFields1Explicit + nFields2;
1574
1575 for (int j = nStartIdx1; j < nStartIdx1 + nCount; j++)
1576 {
1577 int nDstIdx = nIdx * nCount * nFields + (j - nStartIdx1) * nFields;
1578 int nDstIdx1 = nDstIdx;
1579
1580 for (int k = 0; k < nFields1; k++)
1581 {
1582 int nSrcIdx = j * nFields1 + k;
1583
1584 if (m_schema.Data.IsObservedNum(k))
1585 {
1586 rg[nDstIdx1] = rgSrc1[nSrcIdx];
1587 nDstIdx1++;
1588 }
1589 }
1590
1591 for (int k = 0; k < nFields2; k++)
1592 {
1593 int nSrcIdx = j * nFields2 + k;
1594 nDstIdx = nIdx * nCount * nFields + (j - nStartIdx1) * nFields + nFields1Explicit + k;
1595 rg[nDstIdx] = rgSrc2[nSrcIdx];
1596 }
1597 }
1598 }
1599
1600 public override int[,] LoadBatch(int nBatchSize, BlobCollection<T> col, bool bEnableDebug, string strDebugPath)
1601 {
1602 lock (m_syncObj)
1603 {
1604 long[] rgHistSync = m_rgBatchSync[OUTPUT_TYPE.HISTORICAL_SYNC];
1605 long[] rgFutSync = m_rgBatchSync[OUTPUT_TYPE.FUTURE_SYNC];
1606 float[] rgStatCat = getBatch(OUTPUT_TYPE.STATIC_CATEGORICAL);
1607 float[] rgStatNum = getBatch(OUTPUT_TYPE.STATIC_NUMERIC);
1608 float[] rgHistCat = getBatch(OUTPUT_TYPE.HISTORICAL_CATEGORICAL);
1609 float[] rgHistNum = getBatch(OUTPUT_TYPE.HISTORICAL_NUMERIC);
1610 float[] rgFutCat = getBatch(OUTPUT_TYPE.FUTURE_CATEGORICAL);
1611 float[] rgFutNum = getBatch(OUTPUT_TYPE.FUTURE_NUMERIC);
1612 float[] rgTarget = getBatch(OUTPUT_TYPE.TARGET);
1613 float[] rgHistTarget = getBatch(OUTPUT_TYPE.HISTORICAL_TARGET);
1614
1615 for (int i = 0; i < nBatchSize; i++)
1616 {
1617 if (loadSyncBatch(i, rgHistSync, 0, m_nHistoricalSteps) &&
1618 loadSyncBatch(i, rgFutSync, m_nHistoricalSteps, m_nFutureSteps))
1619 {
1620 loadStaticCatBatch(i, rgStatCat, DATA_TYPE.STATIC_CATEGORICAL);
1621 loadStaticNumBatch(i, rgStatNum, DATA_TYPE.STATIC_NUMERIC);
1622
1623 loadCatBatch(i, rgHistCat, 0, m_nHistoricalSteps, DATA_TYPE.OBSERVED_CATEGORICAL, DATA_TYPE.KNOWN_CATEGORICAL);
1624 loadNumBatch(i, rgHistNum, 0, m_nHistoricalSteps, DATA_TYPE.OBSERVED_NUMERIC, DATA_TYPE.KNOWN_NUMERIC);
1625
1626 loadCatBatch(i, rgFutCat, m_nHistoricalSteps, m_nFutureSteps, DATA_TYPE.KNOWN_CATEGORICAL);
1627 loadNumBatch(i, rgFutNum, m_nHistoricalSteps, m_nFutureSteps, DATA_TYPE.KNOWN_NUMERIC);
1628
1629 loadNumBatch(i, rgHistTarget, 0, m_nHistoricalSteps, m_nTargetFieldIdx, DATA_TYPE.OBSERVED_NUMERIC);
1630 loadNumBatch(i, rgTarget, m_nHistoricalSteps, m_nFutureSteps, m_nTargetFieldIdx, DATA_TYPE.OBSERVED_NUMERIC);
1631 }
1632
1633 stepNext();
1634 }
1635
1636 if (rgStatNum != null)
1637 col[0].mutable_cpu_data = Utility.ConvertVec<T>(rgStatNum);
1638
1639 if (rgStatCat != null)
1640 col[1].mutable_cpu_data = Utility.ConvertVec<T>(rgStatCat);
1641
1642 if (rgHistNum != null)
1643 col[2].mutable_cpu_data = Utility.ConvertVec<T>(rgHistNum);
1644
1645 if (rgHistCat != null)
1646 col[3].mutable_cpu_data = Utility.ConvertVec<T>(rgHistCat);
1647
1648 if (rgFutNum != null)
1649 col[4].mutable_cpu_data = Utility.ConvertVec<T>(rgFutNum);
1650
1651 if (rgFutCat != null)
1652 col[5].mutable_cpu_data = Utility.ConvertVec<T>(rgFutCat);
1653
1654 if (rgTarget != null)
1655 col[6].mutable_cpu_data = Utility.ConvertVec<T>(rgTarget);
1656
1657 if (rgHistTarget != null)
1658 col[7].mutable_cpu_data = Utility.ConvertVec<T>(rgHistTarget);
1659
1660 if (bEnableDebug)
1661 {
1662 if (Directory.Exists(strDebugPath))
1663 {
1664 //debug(strDebugPath, col[0].shape(), rgStatNum, "stat_num");
1665 //debug(strDebugPath, col[1].shape(), rgStatCat, "stat_cat");
1666 //debug(strDebugPath, col[2].shape(), rgHistNum, "hist_num");
1667 //debug(strDebugPath, col[3].shape(), rgHistCat, "hist_cat");
1668 //debug(strDebugPath, col[4].shape(), rgFutNum, "fut_num");
1669 //debug(strDebugPath, col[5].shape(), rgFutCat, "fut_cat");
1670 //debug(strDebugPath, col[6].shape(), rgTarget, "target");
1671
1672 //if (col.Count > 7)
1673 // debug(strDebugPath, col[7].shape(), rgHistTarget, "hist_target");
1674 }
1675 }
1676
1677 m_nIteration++;
1678
1679 return null;
1680 }
1681 }
1682
1683 //private void debug(string strPath, List<int> rgShape, float[] rgData, string strName)
1684 //{
1685 // if (rgData == null || rgData.Length == 0 || rgShape.Count <= 2)
1686 // return;
1687
1688 // string strFile = strPath + "\\" + m_nIteration.ToString() + "_" + m_nRowIdx.ToString() + "_" + m_nColIdx.ToString() + "_" + strName;
1689
1690 // int nBatch = rgShape[0];
1691 // int nSeq = rgShape[1];
1692 // int nFields = rgShape[2];
1693
1694 // for (int i = 0; i < nFields; i++)
1695 // {
1696 // string strFile1 = strFile + "_field_" + i.ToString() + ".png";
1697 // PlotCollectionSet set = new PlotCollectionSet();
1698
1699 // for (int j = 0; j < nBatch; j++)
1700 // {
1701 // PlotCollection plots = new PlotCollection(strName + "_batch_" + j.ToString());
1702 // for (int k = 0; k < nSeq; k++)
1703 // {
1704 // int nIdx = j * nSeq * nFields + k * nFields + i;
1705 // Plot plot = new Plot(k, rgData[nIdx]);
1706 // plots.Add(plot);
1707 // }
1708
1709 // set.Add(plots);
1710 // }
1711
1712 // Image img = SimpleGraphingControl.QuickRender(set);
1713 // img.Save(strFile1);
1714 // }
1715 //}
1716 }
1717
1718 class BatchPerfSet
1719 {
1720 BatchPerf[] m_rgBatchPerf = new BatchPerf[2];
1721 int m_nSelectIdx = 0;
1722 int m_nLoadIdx = 0;
1723 int m_nSelectFrequency = 1;
1724 int m_nSelectCount = 0;
1725 Random m_random;
1726 double m_dfPctTopSelectionPct = 0.25;
1727 bool m_bActive = false;
1728
1729 public BatchPerfSet(Random rand, double dfPctTopSelectionPct, int nMax, int nSelectFrequency)
1730 {
1731 m_rgBatchPerf[0] = new BatchPerf(nMax, dfPctTopSelectionPct);
1732 m_rgBatchPerf[1] = new BatchPerf(nMax, dfPctTopSelectionPct);
1733 m_nSelectFrequency = nSelectFrequency;
1734 m_dfPctTopSelectionPct = dfPctTopSelectionPct;
1735 m_random = rand;
1736 }
1737
1738 public bool Add(LossArgs e, int[,] rg)
1739 {
1740 if (m_rgBatchPerf[m_nLoadIdx].Add(e, rg))
1741 {
1742 if (m_nLoadIdx == 0)
1743 {
1744 m_rgBatchPerf[1].Clear();
1745 m_nSelectIdx = 0;
1746 m_nLoadIdx = 1;
1747 }
1748 else
1749 {
1750 m_rgBatchPerf[0].Clear();
1751 m_nLoadIdx = 0;
1752 m_nSelectIdx = 1;
1753 }
1754
1755 m_bActive = true;
1756 }
1757 else
1758 {
1759 m_bActive = false;
1760 }
1761
1762 return m_bActive;
1763 }
1764
1765 public bool IsActive
1766 {
1767 get { return m_bActive; }
1768 }
1769
1770 public bool Select(ref int? nIdx1, ref int? nIdx2)
1771 {
1772 m_nSelectCount++;
1773 if (m_nSelectCount % m_nSelectFrequency == 0)
1774 return m_rgBatchPerf[m_nSelectIdx].Select(m_random, m_dfPctTopSelectionPct, ref nIdx1, ref nIdx2);
1775
1776 return false;
1777 }
1778 }
1779
1780 class BatchPerf
1781 {
1782 int m_nMax;
1783 int m_nLastSortCount;
1784 double m_dfTopSelectionPct;
1785 List<Tuple<float, int, int>> m_rgPerformanceItems = new List<Tuple<float, int, int>>();
1786
1787 public BatchPerf(int nMax, double dfPctTopSelectionPct)
1788 {
1789 m_rgPerformanceItems = new List<Tuple<float, int, int>>(nMax + 1);
1790 m_dfTopSelectionPct = dfPctTopSelectionPct;
1791 m_nMax = nMax;
1792 m_nLastSortCount = (int)(nMax * dfPctTopSelectionPct);
1793 }
1794
1795 public bool Add(LossArgs e, int[,] rg)
1796 {
1797 bool bAtMax = false;
1798
1799 for (int i = 0; i < e.Data.Length; i++)
1800 {
1801 if (rg[i,0] == -1 || rg[i, 1] == -1)
1802 continue;
1803
1804 m_rgPerformanceItems.Add(new Tuple<float, int, int>(e.Data[i], rg[i,0], rg[i,1]));
1805 m_nLastSortCount--;
1806
1807 if (m_rgPerformanceItems.Count > m_nMax)
1808 {
1809 m_rgPerformanceItems.RemoveAt(0);
1810 bAtMax = true;
1811 }
1812 }
1813
1814 return bAtMax;
1815 }
1816
1817 public void Clear()
1818 {
1819 m_rgPerformanceItems.Clear();
1820 }
1821
1822 public void Sort()
1823 {
1824 m_rgPerformanceItems = m_rgPerformanceItems.OrderByDescending(p => p.Item1).ToList();
1825 m_nLastSortCount = (int)(m_nMax * m_dfTopSelectionPct);
1826 }
1827
1828 public bool Select(Random rand, double dfPct, ref int? nIdx1, ref int? nIdx2)
1829 {
1830 if (m_rgPerformanceItems.Count < m_nMax)
1831 return false;
1832
1833 if (m_nLastSortCount <= 0)
1834 Sort();
1835
1836 int nCount = (int)(m_rgPerformanceItems.Count * dfPct);
1837 int nIdx = rand.Next(nCount);
1838
1839 nIdx1 = m_rgPerformanceItems[nIdx].Item2;
1840 nIdx2 = m_rgPerformanceItems[nIdx].Item3;
1841
1842 return true;
1843 }
1844 }
1845#pragma warning restore 1591
1846}
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
Definition: CancelEvent.cs:17
bool WaitOne(int nMs=int.MaxValue)
Waits for the signal state to occur.
Definition: CancelEvent.cs:290
CancelEvent()
The CancelEvent constructor.
Definition: CancelEvent.cs:28
The Log class provides general output in text form.
Definition: Log.cs:13
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
Log(string strSrc)
The Log constructor.
Definition: Log.cs:33
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
Definition: Log.cs:287
The LossArgs contains the loss values for a given batch.
Definition: EventArgs.cs:207
float[] Data
Specifies the loss values for a given batch.
Definition: EventArgs.cs:234
Specifies a key-value pair of properties.
Definition: PropertySet.cs:16
void SetProperty(string strName, string strVal)
Sets a property in the property set to a value if it exists, otherwise it adds the new property.
Definition: PropertySet.cs:211
The SettingsCaffe defines the settings used by the MyCaffe CaffeControl.
int DbLoadLimit
Get/set the image database load limit.
DB_LOAD_METHOD DbLoadMethod
Get/set the image database loading method.
The SimpleTemporalDatumCollection manages a collection of SimpleTemporalDatum objects.
Definition: SimpleDatum.cs:91
The SimpleTemporalDatum is used to return temporal data
Definition: SimpleDatum.cs:20
float[] Data
Get/set the data which is of length (Channels * Width * Height).
Definition: SimpleDatum.cs:45
The Utility class provides general utility funtions.
Definition: Utility.cs:35
static double[] ConvertVec(float[] rgf)
Convert an array of float to an array of generics.
Definition: Utility.cs:550
int ID
Get/set the database ID of the item.
The DatasetDescriptor class describes a dataset which contains both a training data source and testin...
SourceDescriptor TrainingSource
Get/set the training data source.
SourceDescriptor TestingSource
Get/set the testing data source.
The SourceDescriptor class contains all information describing a data source.
TemporalDescriptor TemporalDescriptor
Get/set the temporal descriptor (if any).
List< ValueStreamDescriptor > ValueStreamDescriptors
Returns the value stream descriptor.
The value stream descriptor describes a single value stream within a value item.
STREAM_CLASS_TYPE ClassType
Returns the value stream class type.
STREAM_VALUE_TYPE ValueType
Returns the value stream value type.
The BlobCollection contains a list of Blobs.
int Count
Returns the number of items in the collection.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
The NumpyFile reads data from a numpy file in the base type specified.
Definition: NumpyFile.cs:18
void OpenRead(string strFile)
Open the numpy file for reading, and read in the header information.
Definition: NumpyFile.cs:132
int Rows
Returns the number of rows.
Definition: NumpyFile.cs:91
int Fields
Returns the number of fields per column item.
Definition: NumpyFile.cs:107
[Temporal Database] The MyCaffeTemporalDatabase provides an enhanced in-memory temporal database used...
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
Phase m_phase
Specifies the Phase under which the Layer is run.
Definition: Layer.cs:51
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
LayerParameter layer_param
Returns the LayerParameter for this Layer.
Definition: Layer.cs:899
The LossLayer provides an interface for Layer's that take two blobs as input – usually (1) prediction...
Definition: LossLayer.cs:23
EventHandler< LossArgs > OnLoss
Specifies the loss event called on each learning cycle.
Definition: LossLayer.cs:47
The DataTemporalLayer implements the data layer used to load the temporal data into the model.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the top (output) blobs.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Not implemented - data Layers do not perform backward.
DataTemporalLayer(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel, IXDatabaseBase db)
The constructor.
override void dispose()
Releases all GPU and host resources used by the Layer.
override int MinTopBlobs
Returns the exact number of required top (output) Blobs: static_numeric, static_categorical,...
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
override int? MaxTopBlobs
Returns the exact number of required top (output) Blobs: static_numeric, static_categorical,...
override void ConnectLoss(LossLayer< T > layer)
Connect the loss layer to the data layer so that we can rank the data values.
override int ExactNumBottomBlobs
The data layer has no bottom blobs.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
The RawData class is the base class for all raw data types.
RawData(uint? nSeed, bool bOutputTargetHistorical)
The constructor.
int m_nBatchSize
Specifies the batch size.
Data< T > m_data
Specifies the base data object used to store data blocks loaded from disk or database.
bool m_bOutputTargetHistorical
Specifies to output the target historical data.
virtual int[,] LoadBatch(Phase phase, int nBatchSize, BlobCollection< T > col, bool bEnableDebug=false, string strDebugPath=null)
Loads a batch of data items into the BlobCollection.
virtual int GetTotalSize()
Returns the total size of the data.
Random m_random
Specifies the random number generator used to shuffle the data.
virtual bool LoadData(Phase phase, string strPath, bool bShuffleData, int nBatchSize, int nHistoricalSteps, int nFutureSteps, double dfPctMaxLoad, int nDripRefreshRateInSec, uint nChunkCount, Log log, CancelEvent evtCancel)
Loads all data values for the phase specified.
Random Random
Specifies the random number generator used.
virtual void loadDataFunction(object obj)
The virtual load data function override by the derived class to load the data in the background.
virtual void Add(LossArgs e, int[,] rgIdx)
Adds the selected indexes along with the loss data (used by the BatchPerfSet to select worst cases).
virtual int[] GetShape(DataNpy< T >.OUTPUT_TYPE ot)
Returns the shape of a given output type.
The RawFileData object is used to load raw NPY file data.
override bool LoadData(Phase phase, string strPath, bool bShuffleData, int nBatchSize, int nHistoricalSteps, int nFutureSteps, double dfPctMaxLoad, int nDripRefreshRateInSec, uint nChunkCount, Log log, CancelEvent evtCancel)
Loads all data values for the phase specified.
RawFileData(uint? nSeed, bool bOutputTargetHistorical)
The constructor.
override void loadDataFunction(object obj)
The virtual load data function override by the derived class to load the data in the background.
void VerifyFiles(Phase phase, string strPath)
Verify that the data files exist.
The RawSqlData class loads data from a database.
override int[] GetShape(DataNpy< T >.OUTPUT_TYPE ot)
Return the shape of the OUTPUT_TYPE.
RawSqlData(uint? nSeed, bool bOutputTargetHistorical, IXTemporalDatabaseBase db, Log log)
The constructor.
override int GetTotalSize()
Return the total number of blocks available in the current phase.
override bool LoadData(Phase phase, string strDataset, bool bShuffleData, int nBatchSize, int nHistoricalSteps, int nFutureSteps, double dfPctMaxLoad, int nDripRefreshRateInSec, uint nChunkCount, Log log, CancelEvent evtCancel)
Loads all data values for the phase specified.
override int[,] LoadBatch(Phase phase, int nBatchSize, BlobCollection< T > col, bool bEnableDebug=false, string strDebugPath=null)
Load a batch of data to feed into the network.
override void Add(LossArgs e, int[,] rgIdx)
Add the loss data for the batch into the performance data later used to select the worst cases.
Specifies the base parameter for all layers.
DataTemporalParameter data_temporal_param
Returns the parameter set when initialized with LayerType.DATA_TEMPORAL
LayerType
Specifies the layer type.
Specifies the parameters for the DataTemporalLayer (used in TFT models).
bool shuffle_data
Specifies to randomly select from the data (default = true).
SOURCE_TYPE
Defines the type of source data.
bool enable_debug_output
Optionally, specifies to output debug information (slower) on each pass.
string debug_output_path
Specifies the debug output path where debug images are placed when enable_debug_output = true.
virtual uint batch_size
Specifies the batch size of the data.
int drip_refresh_rate_in_sec
Specifies rate the drip refresh occurs in seconds (default = 0, disabled).
uint num_historical_steps
Specifies the number of historical steps
uint num_future_steps
Specifies the number of future steps
double max_load_percent
Specifies the maximum percent of data rows to load (default = 1.0 = 100%).
Phase? forced_phase
Optionally, specifies the phase to use when loading data.
string source
Specifies the data source.
uint chunk_count
Specifies the number of items to load per cycle when background loading (default = 1024).
bool output_target_historical
Optionally, specifies to output a top containing the target historical data.
uint? seed
Specifies the random seed used to shuffle the data. When not specified, the default seed is used.
SOURCE_TYPE source_type
Specifies the type of source data.
The IXDatabaseBase interface defines the general interface to the in-memory database.
Definition: Interfaces.cs:444
Teh IXTemporalDatabaseBase interface defines the general interface to the in-memory temporal database...
Definition: Interfaces.cs:827
The descriptors namespace contains all descriptor used to describe various items stored within the da...
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
DB_ITEM_SELECTION_METHOD
Defines the item (e.g., image or temporal item) selection method.
Definition: Interfaces.cs:278
DB_LOAD_METHOD
Defines how to laod the items into the in-memory database.
Definition: Interfaces.cs:154
Phase
Defines the Phase under which to run a Net.
Definition: Interfaces.cs:61
DB_LABEL_SELECTION_METHOD
Defines the label selection method.
Definition: Interfaces.cs:333
DATA_TYPE
Defines the gym data type.
Definition: Interfaces.cs:135
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
BLOB_TYPE
Defines the tpe of data held by a given Blob.
Definition: Interfaces.cs:62
@ TARGET
The blob contains target data.
The MyCaffe.db.temporal namespace contains all classes used to create the MyCaffeTemporalDatabase in-...
The MyCaffe.layers.tft namespace contains all TFT related layers.
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12