MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
BucketCollection.cs
1using System;
2using System.Collections;
3using System.Collections.Generic;
4using System.Diagnostics;
5using System.IO;
6using System.Linq;
7using System.Text;
8using System.Threading.Tasks;
9
10namespace MyCaffe.basecode
11{
15 public class Bucket
16 {
17 double m_fMin;
18 double m_fMax;
19 double m_fSum;
20 int m_nCount;
21 object m_tag = null;
22
28 public Bucket(double fMin, double fMax)
29 {
30 m_fMin = fMin;
31 m_fMax = fMax;
32 }
33
39 public int Contains(double fVal)
40 {
41 if (fVal < m_fMin)
42 return -1;
43
44 if (fVal >= m_fMax)
45 return 1;
46
47 return 0;
48 }
49
56 public int Add(double fVal, bool bForce = false)
57 {
58 if (!bForce)
59 {
60 int nVal = Contains(fVal);
61 if (nVal != 0)
62 return nVal;
63 }
64
65 m_nCount++;
66 m_fSum += fVal;
67
68 return 0;
69 }
70
74 public int Count
75 {
76 get { return m_nCount; }
77 }
78
82 public double Average
83 {
84 get { return m_fSum / m_nCount; }
85 }
86
90 public double Minimum
91 {
92 get { return m_fMin; }
93 }
94
98 public double Maximum
99 {
100 get { return m_fMax; }
101 }
102
106 public double MidPoint
107 {
108 get { return m_fMin + (m_fMax - m_fMin) / 2.0; }
109 }
110
114 public object Tag
115 {
116 get { return m_tag; }
117 set { m_tag = value; }
118 }
119
124 public override string ToString()
125 {
126 return "[" + m_fMin.ToString("N10") + "," + m_fMax.ToString("N10") + "]-> " + m_nCount.ToString("N0");
127 }
128
133 public void Save(BinaryWriter bw)
134 {
135 bw.Write(m_nCount);
136 bw.Write(m_fSum);
137 bw.Write(m_fMin);
138 bw.Write(m_fMax);
139 }
140
146 public static Bucket Load(BinaryReader br)
147 {
148 int nCount = br.ReadInt32();
149 double dfSum = br.ReadDouble();
150 double dfMin = br.ReadDouble();
151 double dfMax = br.ReadDouble();
152
153 Bucket b = new Bucket(dfMin, dfMax);
154 b.m_nCount = nCount;
155 b.m_fSum = dfSum;
156
157 return b;
158 }
159 }
160
164 public class BucketCollection : IEnumerable<Bucket>
165 {
166 List<Bucket> m_rgBuckets = new List<Bucket>();
167 bool m_bIsDataReal = false;
168
172 public enum OUTPUT_FMT
173 {
177 NONE,
181 TXT,
185 CSV
186 }
187
194 public BucketCollection(double fMin, double fMax, int nCount)
195 {
196 double fRange = fMax - fMin;
197 double fStep = fRange / (double)nCount;
198 double fVal = fMin;
199
200 for (int i = 0; i < nCount; i++)
201 {
202 double dfMax = (i == nCount - 1) ? fMax : Math.Round(fVal + fStep, 9);
203
204 m_rgBuckets.Add(new Bucket(fVal, dfMax));
205 fVal = dfMax;
206 }
207
208 m_bIsDataReal = true;
209 }
210
215 public BucketCollection(List<int> rgVocab)
216 {
217 rgVocab.Sort();
218
219 for (int i = 0; i < rgVocab.Count; i++)
220 {
221 int nVal = rgVocab[i];
222 Bucket b = new Bucket(nVal, nVal + 1);
223 b.Add(nVal);
224 m_rgBuckets.Add(b);
225 }
226
227 m_bIsDataReal = false;
228 }
229
234 public BucketCollection(bool bIsReal)
235 {
236 m_bIsDataReal = bIsReal;
237 }
238
242 public bool IsDataReal
243 {
244 get { return m_bIsDataReal; }
245 set { m_bIsDataReal = value; }
246 }
247
251 public int Count
252 {
253 get { return m_rgBuckets.Count; }
254 }
255
261 public Bucket this[int nIdx]
262 {
263 get { return m_rgBuckets[nIdx]; }
264 }
265
271 {
272 int nMax = 0;
273 int nMaxIdx = -1;
274
275 for (int i = 0; i < m_rgBuckets.Count; i++)
276 {
277 if (nMax < m_rgBuckets[i].Count)
278 {
279 nMax = m_rgBuckets[i].Count;
280 nMaxIdx = i;
281 }
282 }
283
284 if (nMaxIdx == -1)
285 return null;
286
287 return m_rgBuckets[nMaxIdx];
288 }
289
294 public Tuple<double,double> GetRange()
295 {
296 if (m_rgBuckets.Count == 0)
297 return new Tuple<double, double>(0, 0);
298
299 return new Tuple<double, double>(m_rgBuckets[0].Minimum, m_rgBuckets[m_rgBuckets.Count - 1].Maximum);
300 }
301
307 public int Add(double fVal)
308 {
309 for (int i = 0; i < m_rgBuckets.Count; i++)
310 {
311 int nVal = m_rgBuckets[i].Add(fVal);
312 if (nVal == 0)
313 return i;
314
315 if (nVal < 0 && i == 0)
316 {
317 m_rgBuckets[i].Add(fVal, true);
318 return i;
319 }
320
321 if (nVal == 1 && i == m_rgBuckets.Count - 1)
322 {
323 m_rgBuckets[i].Add(fVal, true);
324 return i;
325 }
326 }
327
328 throw new Exception("Failed to find a bucket!");
329 }
330
335 public void Reduce(double dfPct)
336 {
338 int nThreshold = (int)(b.Count * (1.0 - dfPct));
339
340 List<Bucket> rgBuckets = new List<Bucket>();
341 foreach (Bucket b1 in m_rgBuckets)
342 {
343 if (b1.Count > nThreshold)
344 rgBuckets.Add(b1);
345 }
346
347 m_rgBuckets = rgBuckets;
348 }
349
355 public double Translate(double fVal)
356 {
357 for (int i = 0; i < m_rgBuckets.Count; i++)
358 {
359 if (m_rgBuckets[i].Contains(fVal) == 0)
360 return m_rgBuckets[i].Average;
361 }
362
363 return m_rgBuckets[m_rgBuckets.Count - 1].Average;
364 }
365
371 public int FindIndex(double dfVal)
372 {
373 for (int i = 0; i < m_rgBuckets.Count; i++)
374 {
375 if (m_rgBuckets[i].Contains(dfVal) == 0)
376 return i;
377 }
378
379 return m_rgBuckets.Count - 1;
380 }
381
388 public double GetValueAt(int nIdx, bool bUseMidPoint = false)
389 {
390 if (bUseMidPoint)
391 return m_rgBuckets[nIdx].MidPoint;
392 else
393 return m_rgBuckets[nIdx].Average;
394 }
395
407 public static BucketCollection Bucketize(string strName, int nBucketCount, SimpleDatum sd, Log log, CancelEvent evtCancel, double? dfMin = null, double? dfMax = null)
408 {
409 int nIdx = 0;
410 int nChannels = sd.Channels;
411 int nCount = sd.ItemCount / nChannels;
412 int nItemCount = sd.ItemCount;
413 int nOffset = 0;
414 Stopwatch sw = new Stopwatch();
415
416 sw.Start();
417
418 // Calculate the min/max values if not already specified.
419 if (!dfMin.HasValue || !dfMax.HasValue)
420 {
421 dfMin = double.MaxValue;
422 dfMax = -double.MaxValue;
423
424 for (int i = 0; i < nChannels; i++)
425 {
426 for (int j = 0; j < nCount; j++)
427 {
428 double dfVal = sd.GetDataAtD(nOffset + j);
429 dfMin = Math.Min(dfMin.Value, dfVal);
430 dfMax = Math.Max(dfMax.Value, dfVal);
431 nIdx++;
432
433 if (sw.Elapsed.TotalMilliseconds > 1000)
434 {
435 if (evtCancel != null && evtCancel.WaitOne(0))
436 return null;
437
438 double dfPct = (double)nIdx / (double)nItemCount;
439 log.WriteLine("Calculating min/max at " + dfPct.ToString("P") + "...");
440 sw.Restart();
441 }
442 }
443
444 nOffset += nCount;
445 }
446 }
447
448 BucketCollection col = new BucketCollection(dfMin.Value, dfMax.Value, nBucketCount);
449
450 nIdx = 0;
451 nOffset = 0;
452 for (int i = 0; i < nChannels; i++)
453 {
454 for (int j = 0; j < nCount; j++)
455 {
456 double dfVal = sd.GetDataAtD(nOffset + j);
457 col.Add(dfVal);
458 nIdx++;
459
460 if (sw.Elapsed.TotalMilliseconds > 1000)
461 {
462 if (evtCancel != null && evtCancel.WaitOne(0))
463 return null;
464
465 double dfPct = (double)nIdx / (double)nItemCount;
466 log.WriteLine(strName + " at " + dfPct.ToString("P") + "...");
467 sw.Restart();
468 }
469 }
470
471 nOffset += nCount;
472 }
473
474 return col;
475 }
476
485 public bool UnBucketize(string strName, List<double[]> rgrgData, Log log, CancelEvent evtCancel)
486 {
487 int nIdx = 0;
488 int nItemCount = rgrgData.Count * rgrgData[0].Length;
489 Stopwatch sw = new Stopwatch();
490
491 sw.Start();
492
493 for (int i = 0; i < rgrgData.Count; i++)
494 {
495 for (int j = 0; j < rgrgData[i].Length; j++)
496 {
497 double dfVal = rgrgData[i][j];
498 double dfNewVal = Translate(dfVal);
499 rgrgData[i][j] = dfNewVal;
500
501 if (evtCancel != null && evtCancel.WaitOne(0))
502 return false;
503
504 if (sw.Elapsed.TotalMilliseconds > 1000)
505 {
506 double dfPct = (double)nIdx / (double)nItemCount;
507 log.WriteLine(strName + " at " + dfPct.ToString("P") + "...");
508 sw.Restart();
509 }
510
511 nIdx++;
512 }
513 }
514
515 return true;
516 }
517
522 public byte[] ToByteStream()
523 {
524 using (MemoryStream ms = new MemoryStream())
525 using (BinaryWriter bw = new BinaryWriter(ms))
526 {
527 bw.Write(m_bIsDataReal);
528 bw.Write(m_rgBuckets.Count);
529
530 for (int i = 0; i < m_rgBuckets.Count; i++)
531 {
532 m_rgBuckets[i].Save(bw);
533 }
534
535 return ms.ToArray();
536 }
537 }
538
544 public static BucketCollection FromByteStream(byte[] rg)
545 {
546 using (MemoryStream ms = new MemoryStream())
547 using (BinaryReader br = new BinaryReader(ms))
548 {
549 bool bIsReal = br.ReadBoolean();
550 BucketCollection col = new BucketCollection(bIsReal);
551 int nCount = br.ReadInt32();
552
553 for (int i = 0; i < nCount; i++)
554 {
555 Bucket b = Bucket.Load(br);
556 col.m_rgBuckets.Add(b);
557 }
558
559 return col;
560 }
561 }
562
566 public int TotalCount
567 {
568 get
569 {
570 int nCount = 0;
571
572 foreach (Bucket b in m_rgBuckets)
573 {
574 nCount += b.Count;
575 }
576
577 return nCount;
578 }
579 }
580
589 public string ToDistributionString(OUTPUT_FMT fmt = OUTPUT_FMT.NONE, int nMaxDots = 30, string strFmt = "0.00000", bool bIncludePercents = false)
590 {
591 double dfTotalCount = TotalCount;
592 string str = "";
593
594 if (fmt == OUTPUT_FMT.NONE)
595 str += "{";
596 else if (fmt == OUTPUT_FMT.CSV)
597 str += "MINIMUM, MAXIMUM, COUNT" + Environment.NewLine;
598
599 foreach (Bucket b in m_rgBuckets)
600 {
601 double dfPct = (dfTotalCount == 0) ? 0 : (double)b.Count / dfTotalCount;
602
603 if (fmt == OUTPUT_FMT.TXT)
604 {
605 string strDots = "";
606 strDots = strDots.PadRight((int)(nMaxDots * dfPct), '*');
607 str += "[" + b.Minimum.ToString(strFmt) + ", " + b.Maximum.ToString(strFmt) + "] " + strDots + " (" + b.Count.ToString("N0") + ")";
608
609 if (bIncludePercents)
610 str += " " + (dfPct * 100).ToString("N4") + "%";
611
612 str += Environment.NewLine;
613 }
614 else if (fmt == OUTPUT_FMT.CSV)
615 {
616 str += b.Minimum.ToString() + "," + b.Maximum.ToString() + "," + b.Count.ToString() + Environment.NewLine;
617 }
618 else
619 {
620 str += dfPct.ToString("P");
621 str += ",";
622 }
623 }
624
625 if (fmt == OUTPUT_FMT.NONE)
626 {
627 str = str.TrimEnd(',');
628 str += "}";
629 }
630
631 return str;
632 }
633
638 public IEnumerator<Bucket> GetEnumerator()
639 {
640 return m_rgBuckets.GetEnumerator();
641 }
642
647 IEnumerator IEnumerable.GetEnumerator()
648 {
649 return m_rgBuckets.GetEnumerator();
650 }
651 }
652}
The BucketCollection contains a set of Buckets.
int FindIndex(double dfVal)
Finds the index of the Bucket containing the value.
BucketCollection(List< int > rgVocab)
The constructor.
double Translate(double fVal)
Finds the Bucket associated with the value and returns the Bucket's average value.
BucketCollection(bool bIsReal)
The constructor.
Tuple< double, double > GetRange()
Returns the numeric range that all buckets fall into.
static BucketCollection Bucketize(string strName, int nBucketCount, SimpleDatum sd, Log log, CancelEvent evtCancel, double? dfMin=null, double? dfMax=null)
The Bucketize method adds all values within a SimpleDatum to a new BucketCollection.
int TotalCount
Returns the total count across all buckets.
int Count
Returns the number of Buckets.
static BucketCollection FromByteStream(byte[] rg)
Converts a byte stream into a BucketCollection.
bool UnBucketize(string strName, List< double[]> rgrgData, Log log, CancelEvent evtCancel)
The UnBucketize method converts all Data received into their respective Bucket average values.
OUTPUT_FMT
Specifies the output format used when creating a distribution string.
byte[] ToByteStream()
Converts the BucketCollection into a byte stream.
int Add(double fVal)
Finds the correct Bucket and adds the value to it.
bool IsDataReal
Get/set whether or not the Buckets hold Real values.
void Reduce(double dfPct)
Reduces the buckets to only include those that have a count that are within 1.0 - dfPct of the maximu...
BucketCollection(double fMin, double fMax, int nCount)
The constructor.
string ToDistributionString(OUTPUT_FMT fmt=OUTPUT_FMT.NONE, int nMaxDots=30, string strFmt="0.00000", bool bIncludePercents=false)
Returns the distribution of buckets as a percentage for each time a bucket was hit.
Bucket GetBucketWithMaxCount()
Returns the bucket with the highest count.
IEnumerator< Bucket > GetEnumerator()
Returns the enumerator used in foreach loops.
double GetValueAt(int nIdx, bool bUseMidPoint=false)
Returns the average of the Bucket at a given index.
The Bucket class contains the information describing a single range of values within a BucketCollecti...
static Bucket Load(BinaryReader br)
Load a Bucket from a BinaryReader.
object Tag
Get/set a user specified tag.
Bucket(double fMin, double fMax)
The constructor.
int Contains(double fVal)
Tests to see if the Bucket range contains the value.
int Count
Returns the number of items added to the Bucket.
override string ToString()
Returns a string representation of the Bucket.
int Add(double fVal, bool bForce=false)
Attempts to add a new value to the Bucket.
double Maximum
Returns the bucket maximum value.
double Average
Returns the average value of all values added to the Bucket.
void Save(BinaryWriter bw)
Save the Bucket to a BinaryWriter.
double Minimum
Returns the bucket minimum value.
double MidPoint
Returns the bucket midpoint.
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
Definition: CancelEvent.cs:17
bool WaitOne(int nMs=int.MaxValue)
Waits for the signal state to occur.
Definition: CancelEvent.cs:290
The Log class provides general output in text form.
Definition: Log.cs:13
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
The SimpleDatum class holds a data input within host memory.
Definition: SimpleDatum.cs:161
int ItemCount
Returns the number of data items.
int Channels
Return the number of channels of the data.
double GetDataAtD(int nIdx)
Returns the item at a specified index in the double type.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
@ NONE
No training category specified.