MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
BBoxUtility.cs
1using MyCaffe.basecode;
2using MyCaffe.param;
3using MyCaffe.param.ssd;
4using System;
5using System.Collections.Generic;
6using System.Diagnostics;
7using System.Drawing;
8using System.Linq;
9using System.Text;
10using System.Threading.Tasks;
11
12namespace MyCaffe.common
13{
21 public class BBoxUtility<T> : IDisposable
22 {
23 Blob<T> m_blobDiff;
24 CudaDnn<T> m_cuda;
25 Log m_log;
26
27
33 public BBoxUtility(CudaDnn<T> cuda, Log log)
34 {
35 m_cuda = cuda;
36 m_log = log;
37 m_blobDiff = new Blob<T>(cuda, log);
38 }
39
43 public void Dispose()
44 {
45 if (m_blobDiff != null)
46 {
47 m_blobDiff.Dispose();
48 m_blobDiff = null;
49 }
50 }
51
69 public float ComputeAP(List<Tuple<float, int>> rgTp, int nNumPos, List<Tuple<float, int>> rgFp, ApVersion apVersion, out List<float> rgPrec, out List<float> rgRec)
70 {
71 float fEps = 1e-6f;
72 int nNum = rgTp.Count;
73
74 // Make sure that rgTp and rgFp have complement values.
75 for (int i = 0; i < nNum; i++)
76 {
77 m_log.CHECK_LE(Math.Abs(rgTp[i].Item1 - rgFp[i].Item1), fEps, "The Tp[i] - Fp[i] is less than the threshold " + fEps.ToString());
78 m_log.CHECK_EQ(rgTp[i].Item2, 1 - rgFp[i].Item2, "The Tp[i].second should be one less than Fp[i].second!");
79 }
80
81 rgPrec = new List<float>();
82 rgRec = new List<float>();
83 float fAp = 0;
84
85 if (rgTp.Count == 0 || nNumPos == 0)
86 return fAp;
87
88 // Compute cumsum of rgTp
89 List<int> rgTpCumSum = CumSum(rgTp);
90 m_log.CHECK_EQ(rgTpCumSum.Count, nNum, "The tp cumulative sum should equal the number of rgTp items (" + nNum.ToString() + ")");
91
92 // Compute cumsum of rgFp
93 List<int> rgFpCumSum = CumSum(rgFp);
94 m_log.CHECK_EQ(rgFpCumSum.Count, nNum, "The fp cumulative sum should equal the number of rgFp items (" + nNum.ToString() + ")");
95
96 // Compute precision.
97 for (int i = 0; i < nNum; i++)
98 {
99 rgPrec.Add((float)rgTpCumSum[i] / (float)(rgTpCumSum[i] + rgFpCumSum[i]));
100 }
101
102 // Compute recall
103 for (int i = 0; i < nNum; i++)
104 {
105 m_log.CHECK_LE(rgTpCumSum[i], nNumPos, "The Tp cumulative sum must be less than the num pos of " + nNumPos.ToString());
106 rgRec.Add((float)rgTpCumSum[i] / nNumPos);
107 }
108
109 switch (apVersion)
110 {
111 // VOC2007 style for computing AP
112 case ApVersion.ELEVENPOINT:
113 {
114 List<float> rgMaxPrec = Utility.Create<float>(11, 0);
115 int nStartIdx = nNum - 1;
116
117 for (int j = 10; j >= 0; j--)
118 {
119 for (int i = nStartIdx; i >= 0; i--)
120 {
121 if (rgRec[i] < j / 10.0f)
122 {
123 nStartIdx = i;
124 if (j > 0)
125 rgMaxPrec[j - 1] = rgMaxPrec[j];
126 break;
127 }
128 else
129 {
130 if (rgMaxPrec[j] < rgPrec[i])
131 rgMaxPrec[j] = rgPrec[i];
132 }
133 }
134 }
135 for (int j = 10; j >= 0; j--)
136 {
137 fAp += rgMaxPrec[j] / 11.0f;
138 }
139 }
140 break;
141
142 // VOC2012 or ILSVRC style of computing AP.
143 case ApVersion.MAXINTEGRAL:
144 {
145 float fCurRec = rgRec.Last();
146 float fCurPrec = rgPrec.Last();
147
148 for (int i = nNum - 2; i >= 0; i--)
149 {
150 fCurPrec = Math.Max(rgPrec[i], fCurPrec);
151 float fAbsRec = Math.Abs(fCurRec - rgRec[i]);
152 if (fAbsRec > fEps)
153 fAp += fCurPrec * fAbsRec;
154 fCurRec = rgRec[i];
155 }
156 fAp += fCurRec * fCurPrec;
157 }
158 break;
159
160 // Natural integral.
161 case ApVersion.INTEGRAL:
162 {
163 float fPrevRec = 0.0f;
164 for (int i = 0; i < nNum; i++)
165 {
166 float fAbsRec = Math.Abs(rgRec[i] - fPrevRec);
167 if (fAbsRec > fEps)
168 fAp += rgPrec[i] * fAbsRec;
169 fPrevRec = rgRec[i];
170 }
171 }
172 break;
173
174 default:
175 m_log.FAIL("Unknown ap version '" + apVersion.ToString() + "'!");
176 break;
177 }
178
179 return fAp;
180 }
181
187 public List<int> CumSum(List<Tuple<float, int>> rgPairs)
188 {
189 // Sort the pairs based on the first item of the pair.
190 List<Tuple<float, int>> rgSortPairs = rgPairs.OrderByDescending(p => p.Item1).ToList();
191 List<int> rgCumSum = new List<int>();
192
193 for (int i = 0; i < rgSortPairs.Count; i++)
194 {
195 if (i == 0)
196 rgCumSum.Add(rgSortPairs[i].Item2);
197 else
198 rgCumSum.Add(rgCumSum.Last() + rgSortPairs[i].Item2);
199 }
200
201 return rgCumSum;
202 }
203
211 List<Tuple<float, int>> GetTopKScoreIndex(List<float> rgScores, List<int> rgIdx, int nTopK)
212 {
213 List<Tuple<float, int>> rgItems = new List<Tuple<float, int>>();
214
215 for (int i = 0; i < rgScores.Count; i++)
216 {
217 rgItems.Add(new Tuple<float, int>(rgScores[i], rgIdx[i]));
218 }
219
220 rgItems = rgItems.OrderByDescending(p => p.Item1).ToList();
221
222 if (nTopK > -1 && nTopK < rgItems.Count)
223 rgItems = rgItems.Take(nTopK).ToList();
224
225 return rgItems;
226 }
227
235 List<Tuple<float, int>> GetMaxScoreIndex(List<float> rgScores, float fThreshold, int nTopK)
236 {
237 List<Tuple<float, int>> rgItems = new List<Tuple<float, int>>();
238
239 for (int i = 0; i < rgScores.Count; i++)
240 {
241 if (rgScores[i] > fThreshold)
242 rgItems.Add(new Tuple<float, int>(rgScores[i], i));
243 }
244
245 rgItems = rgItems.OrderByDescending(p => p.Item1).ToList();
246
247 if (nTopK > -1 && nTopK < rgItems.Count)
248 rgItems = rgItems.Take(nTopK).ToList();
249
250 return rgItems;
251 }
252
263 public void ApplyNMSFast(List<NormalizedBBox> rgBBoxes, List<float> rgScores, float fScoreThreshold, float fNmsThreshold, float fEta, int nTopK, out List<int> rgIndices)
264 {
265 rgIndices = new List<int>();
266
267 // Sanity check.
268 m_log.CHECK_EQ(rgBBoxes.Count, rgScores.Count, "The number of BBoxes and scores must be the same.");
269
270 List<Tuple<float, int>> rgScoresIndex = GetMaxScoreIndex(rgScores, fScoreThreshold, nTopK);
271
272 // Do nms.
273 float fAdaptiveThreshold = fNmsThreshold;
274
275 while (rgScoresIndex.Count > 0)
276 {
277 int nIdx = rgScoresIndex[0].Item2;
278 bool bKeep = true;
279
280 for (int k = 0; k < rgIndices.Count; k++)
281 {
282 if (!bKeep)
283 break;
284
285 int nKeptIdx = rgIndices[k];
286 float fOverlap = JaccardOverlap(rgBBoxes[nIdx], rgBBoxes[nKeptIdx]);
287
288 if (fOverlap <= fAdaptiveThreshold)
289 bKeep = true;
290 else
291 bKeep = false;
292 }
293
294 if (bKeep)
295 rgIndices.Add(nIdx);
296
297 rgScoresIndex.RemoveAt(0);
298
299 if (bKeep && fEta < 1 && fAdaptiveThreshold > 0.5f)
300 fAdaptiveThreshold *= fEta;
301 }
302 }
303
312 public List<int> ApplyNMS(List<NormalizedBBox> rgBBoxes, List<float> rgScores, float fThreshold, int nTopK)
313 {
314 Dictionary<int, Dictionary<int, float>> rgOverlaps;
315 return ApplyNMS(rgBBoxes, rgScores, fThreshold, nTopK, false, out rgOverlaps);
316 }
317
328 public List<int> ApplyNMS(List<NormalizedBBox> rgBBoxes, List<float> rgScores, float fThreshold, int nTopK, bool bReuseOverlaps, out Dictionary<int, Dictionary<int, float>> rgOverlaps)
329 {
330 List<int> rgIndices = new List<int>();
331 rgOverlaps = new Dictionary<int, Dictionary<int, float>>();
332
333 // Sanity check.
334 m_log.CHECK_EQ(rgBBoxes.Count, rgScores.Count, "The number of BBoxes and scores must be the same.");
335
336 // Get top_k scores (with corresponding indices)
337 List<int> rgIdx = new List<int>();
338 for (int i = 0; i < rgScores.Count; i++)
339 {
340 rgIdx.Add(i);
341 }
342
343 List<Tuple<float, int>> rgScoresIndex = GetTopKScoreIndex(rgScores, rgIdx, nTopK);
344
345 // Do nms.
346 while (rgScoresIndex.Count > 0)
347 {
348 // Get the current highest score box.
349 int nBestIdx = rgScoresIndex[0].Item2;
350 NormalizedBBox best_bbox = rgBBoxes[nBestIdx];
351 float fSize = Size(best_bbox);
352
353 // Erase small box.
354 if (fSize < 1e-5f)
355 {
356 rgScoresIndex.RemoveAt(0);
357 continue;
358 }
359
360 rgIndices.Add(nBestIdx);
361
362 // Erase the best box.
363 rgScoresIndex.RemoveAt(0);
364
365 // Stop if finding enough boxes for nms.
366 if (nTopK > -1 && rgIndices.Count >= nTopK)
367 break;
368
369 // Compute overlap between best_bbox and other remaining bboxes.
370 // Remove a bbox if the overlap with the best_bbox is larger than nms_threshold.
371 int nIdx = 0;
372 while (nIdx < rgScoresIndex.Count)
373 {
374 Tuple<float, int> item = rgScoresIndex[nIdx];
375 int nCurIdx = item.Item2;
376 NormalizedBBox cur_bbox = rgBBoxes[nCurIdx];
377 fSize = Size(cur_bbox);
378
379 if (fSize < 1e-5f)
380 {
381 rgScoresIndex.RemoveAt(nIdx);
382 continue;
383 }
384
385 float fCurOverlap = 0.0f;
386
387 if (bReuseOverlaps)
388 {
389 if (rgOverlaps.ContainsKey(nBestIdx) &&
390 rgOverlaps[nBestIdx].ContainsKey(nCurIdx))
391 // Use the computed overlap.
392 fCurOverlap = rgOverlaps[nBestIdx][nCurIdx];
393 else if (rgOverlaps.ContainsKey(nCurIdx) &&
394 rgOverlaps[nCurIdx].ContainsKey(nBestIdx))
395 // Use the computed overlap.
396 fCurOverlap = rgOverlaps[nCurIdx][nBestIdx];
397 else
398 {
399 fCurOverlap = JaccardOverlap(best_bbox, cur_bbox);
400
401 // Store the overlap for future use.
402 if (!rgOverlaps.ContainsKey(nBestIdx))
403 rgOverlaps.Add(nBestIdx, new Dictionary<int, float>());
404
405 if (!rgOverlaps[nBestIdx].ContainsKey(nCurIdx))
406 rgOverlaps[nBestIdx].Add(nCurIdx, fCurOverlap);
407 else
408 rgOverlaps[nBestIdx][nCurIdx] = fCurOverlap;
409 }
410 }
411 else
412 {
413 fCurOverlap = JaccardOverlap(best_bbox, cur_bbox);
414 }
415
416 // Remove if necessary
417 if (fCurOverlap > fThreshold)
418 rgScoresIndex.RemoveAt(nIdx);
419 else
420 nIdx++;
421 }
422 }
423
424 return rgIndices;
425 }
426
435 public Dictionary<int, Dictionary<int, List<NormalizedBBox>>> GetDetectionResults(float[] rgData, int nNumDet, int nBackgroundLabelId)
436 {
437 Dictionary<int, Dictionary<int, List<NormalizedBBox>>> rgAllDetections = new Dictionary<int, Dictionary<int, List<NormalizedBBox>>>();
438
439 for (int i = 0; i < nNumDet; i++)
440 {
441 int nStartIdx = i * 7;
442 int nItemId = (int)rgData[nStartIdx];
443 if (nItemId == -1)
444 continue;
445
446 int nLabel = (int)rgData[nStartIdx + 1];
447 m_log.CHECK_NE(nBackgroundLabelId, nLabel, "Found background label in the detection results.");
448
449 NormalizedBBox bbox = new NormalizedBBox(rgData[nStartIdx + 3],
450 rgData[nStartIdx + 4],
451 rgData[nStartIdx + 5],
452 rgData[nStartIdx + 6],
453 nLabel,
454 false,
455 rgData[nStartIdx + 2]);
456 bbox.size = Size(bbox);
457
458 if (!rgAllDetections.ContainsKey(nItemId))
459 rgAllDetections.Add(nItemId, new Dictionary<int, List<NormalizedBBox>>());
460
461 if (!rgAllDetections[nItemId].ContainsKey(nLabel))
462 rgAllDetections[nItemId].Add(nLabel, new List<NormalizedBBox>());
463
464 rgAllDetections[nItemId][nLabel].Add(bbox);
465 }
466
467 return rgAllDetections;
468 }
469
477 public List<NormalizedBBox> GetPrior(float[] rgPriorData, int nNumPriors, out List<List<float>> rgPriorVariances)
478 {
479 List<NormalizedBBox> rgPriorBboxes = new List<NormalizedBBox>();
480 rgPriorVariances = new List<List<float>>();
481
482 for (int i = 0; i < nNumPriors; i++)
483 {
484 int nStartIdx = i * 4;
485 NormalizedBBox bbox = new NormalizedBBox(rgPriorData[nStartIdx + 0],
486 rgPriorData[nStartIdx + 1],
487 rgPriorData[nStartIdx + 2],
488 rgPriorData[nStartIdx + 3]);
489 bbox.size = Size(bbox);
490 rgPriorBboxes.Add(bbox);
491 }
492
493 for (int i = 0; i < nNumPriors; i++)
494 {
495 int nStartIdx = (nNumPriors + i) * 4;
496 List<float> rgVariance = new List<float>();
497
498 for (int j = 0; j < 4; j++)
499 {
500 rgVariance.Add(rgPriorData[nStartIdx + j]);
501 }
502
503 rgPriorVariances.Add(rgVariance);
504 }
505
506 return rgPriorBboxes;
507 }
508
509 private int getLabel(int nPredIdx, int nNumPredsPerClass, int nNumClasses, int nBackgroundLabel, DictionaryMap<List<int>> rgMatchIndices, List<NormalizedBBox> rgGtBoxes)
510 {
511 int nLabel = nBackgroundLabel;
512
513 if (rgMatchIndices != null && rgMatchIndices.Count > 0 && rgGtBoxes != null && rgGtBoxes.Count > 0)
514 {
515 List<KeyValuePair<int, List<int>>> rgMatches = rgMatchIndices.Map.ToList();
516
517 foreach (KeyValuePair<int, List<int>> match in rgMatches)
518 {
519 List<int> rgMatchIdx = match.Value;
520 m_log.CHECK_EQ(rgMatchIdx.Count, nNumPredsPerClass, "The match count should equal the number of predictions per class.");
521
522 if (rgMatchIdx[nPredIdx] > -1)
523 {
524 int nIdx = rgMatchIdx[nPredIdx];
525 m_log.CHECK_LT(nIdx, rgGtBoxes.Count, "The match index should be less than the number of ground truth boxes.");
526 nLabel = rgGtBoxes[nIdx].label;
527
528 m_log.CHECK_GE(nLabel, 0, "The label must be >= 0.");
529 m_log.CHECK_NE(nLabel, nBackgroundLabel, "The label cannot equal the background label.");
530 m_log.CHECK_LT(nLabel, nNumClasses, "The label must be less than the number of classes.");
531
532 // A prior can only be matched to one ground-truth bbox.
533 return nLabel;
534 }
535 }
536 }
537
538 return nLabel;
539 }
540
551 public List<List<float>> ComputeConfLoss(float[] rgConfData, int nNum, int nNumPredsPerClass, int nNumClasses, int nBackgroundLabelId, MultiBoxLossParameter.ConfLossType loss_type)
552 {
553 List<List<float>> rgrgAllConfLoss = new List<List<float>>();
554 int nOffset = 0;
555
556 for (int i = 0; i < nNum; i++)
557 {
558 List<float> rgConfLoss = new List<float>();
559
560 for (int p = 0; p < nNumPredsPerClass; p++)
561 {
562 int nStartIdx = p * nNumClasses;
563 // Get the label index.
564 int nLabel = nBackgroundLabelId;
565 float fLoss = 0;
566
567 switch (loss_type)
568 {
570 {
571 m_log.CHECK_GE(nLabel, 0, "The label must be >= 0 for the SOFTMAX loss type.");
572 m_log.CHECK_LT(nLabel, nNumClasses, "The label must be < NumClasses for the SOFTMAX loss type.");
573 // Compute softmax probability.
574 // We need to subtract the max to avoid numerical issues.
575 float fMaxVal = -float.MaxValue;
576 for (int c = 0; c < nNumClasses; c++)
577 {
578 float fVal = rgConfData[nOffset + nStartIdx + c];
579 fMaxVal = Math.Max(fMaxVal, fVal);
580 }
581
582 float fSum = 0;
583 for (int c = 0; c < nNumClasses; c++)
584 {
585 float fVal = rgConfData[nOffset + nStartIdx + c];
586 fSum += (float)Math.Exp(fVal - fMaxVal);
587 }
588
589 float fValAtLabel = rgConfData[nOffset + nStartIdx + nLabel];
590 float fProb = (float)Math.Exp(fValAtLabel - fMaxVal) / fSum;
591 fLoss = (float)-Math.Log(Math.Max(fProb, float.MinValue));
592 }
593 break;
594
596 {
597 int nTarget = 0;
598 for (int c = 0; c < nNumClasses; c++)
599 {
600 nTarget = (c == nLabel) ? 1 : 0;
601 float fInput = rgConfData[nOffset + nStartIdx + c];
602 fLoss -= fInput * (nTarget - ((fInput >= 0) ? 1.0f : 0.0f)) - (float)Math.Log(1 + Math.Exp(fInput - 2 * fInput * ((fInput >= 0) ? 1.0f : 0.0f)));
603 }
604 }
605 break;
606
607 default:
608 m_log.FAIL("Unknown loss type '" + loss_type.ToString() + "'!");
609 break;
610 }
611
612 rgConfLoss.Add(fLoss);
613 }
614
615 rgrgAllConfLoss.Add(rgConfLoss);
616 nOffset += nNumPredsPerClass * nNumClasses;
617 }
618
619 return rgrgAllConfLoss;
620 }
621
634 public List<List<float>> ComputeConfLoss(float[] rgConfData, int nNum, int nNumPredsPerClass, int nNumClasses, int nBackgroundLabelId, MultiBoxLossParameter.ConfLossType loss_type, List<DictionaryMap<List<int>>> rgAllMatchIndices, DictionaryMap<List<NormalizedBBox>> rgAllGtBoxes)
635 {
636 List<Dictionary<int, List<int>>> rgAllMatchIndices1 = new List<Dictionary<int, List<int>>>();
637 foreach (DictionaryMap<List<int>> item in rgAllMatchIndices)
638 {
639 rgAllMatchIndices1.Add(item.Map);
640 }
641
642 return ComputeConfLoss(rgConfData, nNum, nNumPredsPerClass, nNumClasses, nBackgroundLabelId, loss_type, rgAllMatchIndices1, rgAllGtBoxes.Map);
643 }
644
657 public List<List<float>> ComputeConfLoss(float[] rgConfData, int nNum, int nNumPredsPerClass, int nNumClasses, int nBackgroundLabelId, MultiBoxLossParameter.ConfLossType loss_type, List<Dictionary<int, List<int>>> rgAllMatchIndices, Dictionary<int, List<NormalizedBBox>> rgAllGtBoxes)
658 {
659 m_log.CHECK_LT(nBackgroundLabelId, nNumClasses, "The background id must be less than the number of classes!");
660 List<List<float>> rgrgAllConfLoss = new List<List<float>>();
661 int nOffset = 0;
662
663 for (int i = 0; i < nNum; i++)
664 {
665 List<float> rgConfLoss = new List<float>();
666 Dictionary<int, List<int>> rgMatchIndices = rgAllMatchIndices[i];
667
668 for (int p = 0; p < nNumPredsPerClass; p++)
669 {
670 int nStartIdx = p * nNumClasses;
671 // Get the label index.
672 int nLabel = nBackgroundLabelId;
673
674 foreach (KeyValuePair<int, List<int>> kv in rgMatchIndices)
675 {
676 List<int> rgMatchIndex = kv.Value;
677 m_log.CHECK_EQ(rgMatchIndex.Count, nNumPredsPerClass, "The number of match indexes must be equal to the NumPredsPerClass!");
678
679 if (rgMatchIndex[p] > -1)
680 {
681 m_log.CHECK(rgAllGtBoxes.ContainsKey(i), "The AllGtBoxes does not have the label '" + i.ToString() + "'!");
682 List<NormalizedBBox> rgGtBboxes = rgAllGtBoxes[i];
683
684 m_log.CHECK_LT(rgMatchIndex[p], rgGtBboxes.Count, "The match index at '" + p.ToString() + "' must be less than the number of Gt bboxes at label " + i.ToString() + " (" + rgGtBboxes.Count.ToString() + ")!");
685
686 nLabel = rgGtBboxes[rgMatchIndex[p]].label;
687 m_log.CHECK_GE(nLabel, 0, "The label must be >= 0.");
688 m_log.CHECK_NE(nLabel, nBackgroundLabelId, "The label cannot be the background label of '" + nBackgroundLabelId.ToString() + "'!");
689 m_log.CHECK_LT(nLabel, nNumClasses, "The label must be < NumClasses (" + nNumClasses.ToString() + ")!");
690
691 // A prior can only be matched to one gt bbox.
692 break;
693 }
694 }
695
696 float fLoss = 0;
697 switch (loss_type)
698 {
700 {
701 m_log.CHECK_GE(nLabel, 0, "The label must be >= 0 for the SOFTMAX loss type.");
702 m_log.CHECK_LT(nLabel, nNumClasses, "The label must be < NumClasses for the SOFTMAX loss type.");
703 // Compute softmax probability.
704 // We need to subtract the max to avoid numerical issues.
705 float fMaxVal = rgConfData[nStartIdx];
706 for (int c = 1; c < nNumClasses; c++)
707 {
708 float fVal = rgConfData[nOffset + nStartIdx + c];
709 fMaxVal = Math.Max(fMaxVal, fVal);
710 }
711
712 float fSum = 0;
713 for (int c = 0; c < nNumClasses; c++)
714 {
715 float fVal = rgConfData[nOffset + nStartIdx + c];
716 fSum += (float)Math.Exp(fVal - fMaxVal);
717 }
718
719 float fValAtLabel = rgConfData[nOffset + nStartIdx + nLabel];
720 float fProb = (float)Math.Exp(fValAtLabel - fMaxVal) / fSum;
721 fLoss = (float)-Math.Log(Math.Max(fProb, float.MinValue));
722 }
723 break;
724
726 {
727 int nTarget = 0;
728 for (int c = 0; c < nNumClasses; c++)
729 {
730 nTarget = (c == nLabel) ? 1 : 0;
731 float fInput = rgConfData[nOffset + nStartIdx + c];
732 fLoss -= fInput * (nTarget - ((fInput >= 0) ? 1.0f : 0.0f)) - (float)Math.Log(1 + Math.Exp(fInput - 2 * fInput * ((fInput >= 0) ? 1.0f : 0.0f)));
733 }
734 }
735 break;
736
737 default:
738 m_log.FAIL("Unknown loss type '" + loss_type.ToString() + "'!");
739 break;
740 }
741
742 rgConfLoss.Add(fLoss);
743 }
744
745 rgrgAllConfLoss.Add(rgConfLoss);
746 nOffset += nNumPredsPerClass * nNumClasses;
747 }
748
749 return rgrgAllConfLoss;
750 }
751
760 public List<Dictionary<int, List<float>>> GetConfidenceScores(float[] rgConfData, int nNum, int nNumPredsPerClass, int nNumClasses)
761 {
762 List<Dictionary<int, List<float>>> rgConfPreds = new List<Dictionary<int, List<float>>>();
763 int nOffset = 0;
764
765 for (int i = 0; i < nNum; i++)
766 {
767 Dictionary<int, List<float>> rgLabelScores = new Dictionary<int, List<float>>();
768
769 for (int p = 0; p < nNumPredsPerClass; p++)
770 {
771 int nStartIdx = p * nNumClasses;
772
773 for (int c = 0; c < nNumClasses; c++)
774 {
775 float fConf = rgConfData[nOffset + nStartIdx + c];
776
777 if (!rgLabelScores.ContainsKey(c))
778 rgLabelScores.Add(c, new List<float>());
779
780 rgLabelScores[c].Add(fConf);
781 }
782 }
783
784 rgConfPreds.Add(rgLabelScores);
785 nOffset += nNumPredsPerClass * nNumClasses;
786 }
787
788 return rgConfPreds;
789 }
790
802 public List<LabelBBox> GetLocPredictions(float[] rgLocData, int nNum, int nNumPredsPerClass, int nNumLocClasses, bool bShareLocation)
803 {
804 List<LabelBBox> rgLocPreds = new List<LabelBBox>();
805
806 if (bShareLocation)
807 m_log.CHECK_EQ(nNumLocClasses, 1, "When shareing locations, the nNumLocClasses must be 1.");
808
809 int nOffset = 0;
810
811 for (int i = 0; i < nNum; i++)
812 {
813 LabelBBox labelBbox = new LabelBBox();
814
815 for (int p = 0; p < nNumPredsPerClass; p++)
816 {
817 int nStartIdx = p * nNumLocClasses * 4;
818
819 for (int c = 0; c < nNumLocClasses; c++)
820 {
821 int nLabel = (bShareLocation) ? -1 : c;
822 labelBbox[nLabel].Add(new NormalizedBBox(rgLocData[nStartIdx + nOffset + c * 4 + 0],
823 rgLocData[nStartIdx + nOffset + c * 4 + 1],
824 rgLocData[nStartIdx + nOffset + c * 4 + 2],
825 rgLocData[nStartIdx + nOffset + c * 4 + 3]));
826 }
827 }
828
829 nOffset += nNumPredsPerClass * nNumLocClasses * 4;
830 rgLocPreds.Add(labelBbox);
831 }
832
833 return rgLocPreds;
834 }
835
844 public DictionaryMap<List<NormalizedBBox>> GetGroundTruth(float[] rgGtData, int nNumGt, int nBackgroundLabelId, bool bUseDifficultGt)
845 {
846 DictionaryMap<List<NormalizedBBox>> rgAllGt = new DictionaryMap<List<NormalizedBBox>>(null);
847
848 for (int i = 0; i < nNumGt; i++)
849 {
850 int nStartIdx = i * 8;
851 int nItemId = (int)rgGtData[nStartIdx];
852 if (nItemId == -1)
853 continue;
854
855 int nLabel = (int)rgGtData[nStartIdx + 1];
856 m_log.CHECK_NE(nBackgroundLabelId, nLabel, "Found the background label in the dataset!");
857
858 bool bDifficult = (rgGtData[nStartIdx + 7] == 0) ? false : true;
859 // Skip reading the difficult ground truth.
860 if (!bUseDifficultGt && bDifficult)
861 continue;
862
863 NormalizedBBox bbox = new NormalizedBBox(rgGtData[nStartIdx + 3],
864 rgGtData[nStartIdx + 4],
865 rgGtData[nStartIdx + 5],
866 rgGtData[nStartIdx + 6],
867 nLabel,
868 bDifficult);
869 bbox.size = Size(bbox);
870
871 if (rgAllGt[nItemId] == null)
872 rgAllGt[nItemId] = new List<NormalizedBBox>();
873
874 rgAllGt[nItemId].Add(bbox);
875 }
876
877 return rgAllGt;
878 }
879
888 public Dictionary<int, LabelBBox> GetGroundTruthEx(float[] rgGtData, int nNumGt, int nBackgroundLabelId, bool bUseDifficultGt)
889 {
890 Dictionary<int, LabelBBox> rgAllGtBboxes = new Dictionary<int, LabelBBox>();
891
892 for (int i = 0; i < nNumGt; i++)
893 {
894 int nStartIdx = i * 8;
895 int nItemId = (int)rgGtData[nStartIdx];
896 if (nItemId == -1)
897 break;
898
899 int nLabel = (int)rgGtData[nStartIdx + 1];
900 m_log.CHECK_NE(nBackgroundLabelId, nLabel, "Found the background label in the dataset!");
901
902 bool bDifficult = (rgGtData[nStartIdx + 7] == 0) ? false : true;
903 // Skip reading the difficult ground truth.
904 if (!bUseDifficultGt && bDifficult)
905 continue;
906
907 NormalizedBBox bbox = new NormalizedBBox(rgGtData[nStartIdx + 3],
908 rgGtData[nStartIdx + 4],
909 rgGtData[nStartIdx + 5],
910 rgGtData[nStartIdx + 6],
911 nLabel,
912 bDifficult);
913 bbox.size = Size(bbox);
914
915 if (!rgAllGtBboxes.ContainsKey(nItemId))
916 rgAllGtBboxes.Add(nItemId, new LabelBBox());
917
918 rgAllGtBboxes[nItemId].Add(nLabel, bbox);
919 }
920
921 return rgAllGtBboxes;
922 }
923
935 public void Match(List<NormalizedBBox> rgGtBboxes, List<NormalizedBBox> rgPredBboxes, int nLabel, MultiBoxLossParameter.MatchType match_type, float fOverlapThreshold, bool bIgnoreCrossBoundaryBbox, out List<int> rgMatchIndices, out List<float> rgMatchOverlaps)
936 {
937 int nNumPred = rgPredBboxes.Count;
938 rgMatchIndices = Utility.Create<int>(nNumPred, -1);
939 rgMatchOverlaps = Utility.Create<float>(nNumPred, 0);
940
941 int nNumGt = 0;
942 List<int> rgGtIndices = new List<int>();
943
944 // label -1 means comparing against all ground truth.
945 if (nLabel == -1)
946 {
947 nNumGt = rgGtBboxes.Count;
948 for (int i = 0; i < nNumGt; i++)
949 {
950 rgGtIndices.Add(i);
951 }
952 }
953
954 // Otherwise match gt boxes with the specified label.
955 else
956 {
957 for (int i = 0; i < rgGtBboxes.Count; i++)
958 {
959 if (rgGtBboxes[i].label == nLabel)
960 {
961 nNumGt++;
962 rgGtIndices.Add(i);
963 }
964 }
965 }
966
967 if (nNumGt == 0)
968 return;
969
970 // Store the positive overlap between predictions and ground truth.
971 Dictionary<int, Dictionary<int, float>> rgOverlaps = new Dictionary<int, Dictionary<int, float>>();
972 for (int i = 0; i < nNumPred; i++)
973 {
974 rgOverlaps.Add(i, new Dictionary<int, float>());
975
976 if (bIgnoreCrossBoundaryBbox && IsCrossBoundary(rgPredBboxes[i]))
977 {
978 rgMatchIndices.Add(-2);
979 continue;
980 }
981
982 for (int j = 0; j < nNumGt; j++)
983 {
984 int nGtIdx = rgGtIndices[j];
985 float fOverlap = JaccardOverlap(rgPredBboxes[i], rgGtBboxes[nGtIdx]);
986 if (fOverlap > 1e-6f)
987 {
988 rgMatchOverlaps[i] = Math.Max(rgMatchOverlaps[i], fOverlap);
989 rgOverlaps[i].Add(j, fOverlap);
990 }
991 }
992 }
993
994 // Bipartite matching.
995 List<int> rgGtPool = new List<int>();
996 for (int i = 0; i < nNumGt; i++)
997 {
998 rgGtPool.Add(i);
999 }
1000
1001 // Find the most overlapped gt and corresponding predictions.
1002 while (rgGtPool.Count > 0)
1003 {
1004 int nMaxIdx = -1;
1005 int nMaxGtIdx = -1;
1006 float fMaxOverlap = -1;
1007
1008 foreach (KeyValuePair<int, Dictionary<int, float>> kv in rgOverlaps)
1009 {
1010 int i = kv.Key;
1011
1012 // The prediction already has match ground truth or is ignored.
1013 if (rgMatchIndices[i] != -1)
1014 continue;
1015
1016 for (int p = 0; p < rgGtPool.Count; p++)
1017 {
1018 int j = rgGtPool[p];
1019
1020 // No overlap between the i'th prediction and j'th ground truth.
1021 if (!kv.Value.ContainsKey(j))
1022 continue;
1023
1024 // Find the maximum overlap pair.
1025 if (kv.Value[j] > fMaxOverlap)
1026 {
1027 // If the prediction has not been matched to any ground truth,
1028 // and the overlap is larger than the maximum overlap, update.
1029 nMaxIdx = i;
1030 nMaxGtIdx = j;
1031 fMaxOverlap = kv.Value[j];
1032 }
1033 }
1034 }
1035
1036 // Cannot find a good match.
1037 if (nMaxIdx == -1)
1038 {
1039 break;
1040 }
1041 else
1042 {
1043 m_log.CHECK_EQ(rgMatchIndices[nMaxIdx], -1, "The match index at index=" + nMaxIdx.ToString() + " should be -1.");
1044 rgMatchIndices[nMaxIdx] = rgGtIndices[nMaxGtIdx];
1045 rgMatchOverlaps[nMaxIdx] = fMaxOverlap;
1046
1047 // Remove the ground truth.
1048 rgGtPool.Remove(nMaxGtIdx);
1049 }
1050 }
1051
1052 // Do the matching
1053 switch (match_type)
1054 {
1055 case MultiBoxLossParameter.MatchType.BIPARTITE:
1056 // Already done.
1057 break;
1058
1059 case MultiBoxLossParameter.MatchType.PER_PREDICTION:
1060 // Get most overlapped for the rest of the prediction bboxes.
1061 foreach (KeyValuePair<int, Dictionary<int, float>> kv in rgOverlaps)
1062 {
1063 int i = kv.Key;
1064
1065 // The prediction already has matched ground truth or is ignored.
1066 if (rgMatchIndices[i] != -1)
1067 continue;
1068
1069 int nMaxGtIdx = -1;
1070 float fMaxOverlap = -1;
1071
1072 for (int j = 0; j < nNumGt; j++)
1073 {
1074 // No overlap between the i'th prediction and j'th ground truth.
1075 if (!kv.Value.ContainsKey(j))
1076 continue;
1077
1078 // Find the maximum overlapped pair.
1079 float fOverlap = kv.Value[j];
1080
1081 // If the prediction has not been matched on any ground truth,
1082 // and the overlap is larger than the maximum overlap, update.
1083 if (fOverlap >= fOverlapThreshold && fOverlap > fMaxOverlap)
1084 {
1085 nMaxGtIdx = j;
1086 fMaxOverlap = fOverlap;
1087 }
1088 }
1089
1090 // Found a matched ground truth.
1091 if (nMaxGtIdx != -1)
1092 {
1093 m_log.CHECK_EQ(rgMatchIndices[i], -1, "The match index at index=" + i.ToString() + " should be -1.");
1094 rgMatchIndices[i] = rgGtIndices[nMaxGtIdx];
1095 rgMatchOverlaps[i] = fMaxOverlap;
1096 }
1097 }
1098 break;
1099
1100 default:
1101 m_log.FAIL("Unknown matching type '" + match_type.ToString() + "'!");
1102 break;
1103 }
1104 }
1105
1112 {
1113 if (bbox.xmin < 0 || bbox.xmin > 1)
1114 return true;
1115
1116 if (bbox.ymin < 0 || bbox.ymin > 1)
1117 return true;
1118
1119 if (bbox.xmax < 0 || bbox.xmax > 1)
1120 return true;
1121
1122 if (bbox.ymax < 0 || bbox.ymax > 1)
1123 return true;
1124
1125 return false;
1126 }
1127
1142 public List<LabelBBox> DecodeAll(List<LabelBBox> rgAllLocPreds, List<NormalizedBBox> rgPriorBboxes, List<List<float>> rgrgfPrioVariances, int nNum, bool bShareLocation, int nNumLocClasses, int nBackgroundLabelId, PriorBoxParameter.CodeType codeType, bool bVarianceEncodedInTarget, bool bClip)
1143 {
1144 List<LabelBBox> rgAllDecodedBboxes = new List<LabelBBox>();
1145
1146 m_log.CHECK_EQ(rgAllLocPreds.Count, nNum, "The number of Loc Preds does not equal the expected Num!");
1147
1148 for (int i = 0; i < nNum; i++)
1149 {
1150 // Decode predictions into bboxes.
1151 LabelBBox decode_bboxes = new LabelBBox();
1152
1153 for (int c = 0; c < nNumLocClasses; c++)
1154 {
1155 int nLabel = (bShareLocation) ? -1 : c;
1156
1157 // Ignore background class.
1158 if (nLabel == nBackgroundLabelId)
1159 continue;
1160
1161 // Something bad happened if there are not predictions for current label.
1162 if (!rgAllLocPreds[i].Contains(nLabel))
1163 m_log.FAIL("Could not find the location predictions for label '" + nLabel.ToString() + "'!");
1164
1165 List<NormalizedBBox> rgLabelLocPreds = rgAllLocPreds[i][nLabel];
1166 decode_bboxes[nLabel] = Decode(rgPriorBboxes, rgrgfPrioVariances, codeType, bVarianceEncodedInTarget, bClip, rgLabelLocPreds);
1167 }
1168
1169 rgAllDecodedBboxes.Add(decode_bboxes);
1170 }
1171
1172 return rgAllDecodedBboxes;
1173 }
1174
1185 public List<NormalizedBBox> Decode(List<NormalizedBBox> rgPriorBbox, List<List<float>> rgrgfPriorVariance, PriorBoxParameter.CodeType code_type, bool bEncodeVarianceInTarget, bool bClip, List<NormalizedBBox> rgBbox)
1186 {
1187 m_log.CHECK_EQ(rgPriorBbox.Count, rgrgfPriorVariance.Count, "The number of prior boxes must match the number of variance lists.");
1188 m_log.CHECK_EQ(rgPriorBbox.Count, rgBbox.Count, "The number of prior boxes must match the number of boxes.");
1189 int nNumBoxes = rgPriorBbox.Count;
1190
1191 if (nNumBoxes >= 1)
1192 m_log.CHECK_EQ(rgrgfPriorVariance[0].Count, 4, "The variance lists must have 4 items.");
1193
1194 List<NormalizedBBox> rgDecodeBoxes = new List<NormalizedBBox>();
1195
1196 for (int i = 0; i < nNumBoxes; i++)
1197 {
1198 NormalizedBBox decode_box = Decode(rgPriorBbox[i], rgrgfPriorVariance[i], code_type, bEncodeVarianceInTarget, bClip, rgBbox[i]);
1199 rgDecodeBoxes.Add(decode_box);
1200 }
1201
1202 return rgDecodeBoxes;
1203 }
1204
1215 public NormalizedBBox Decode(NormalizedBBox prior_bbox, List<float> rgfPriorVariance, PriorBoxParameter.CodeType code_type, bool bEncodeVarianceInTarget, bool bClip, NormalizedBBox bbox)
1216 {
1217 NormalizedBBox decode_bbox;
1218
1219 switch (code_type)
1220 {
1221 case PriorBoxParameter.CodeType.CORNER:
1222 if (bEncodeVarianceInTarget)
1223 {
1224 // Variance is encoded in target, we simply need to add the offset predictions.
1225 decode_bbox = new NormalizedBBox(prior_bbox.xmin + bbox.xmin,
1226 prior_bbox.ymin + bbox.ymin,
1227 prior_bbox.xmax + bbox.xmax,
1228 prior_bbox.ymax + bbox.ymax);
1229 }
1230 else
1231 {
1232 // Variance is encoded in the bbox, we need to scale the offset accordingly.
1233 m_log.CHECK_EQ(rgfPriorVariance.Count, 4, "The variance must have 4 values!");
1234 foreach (float fVar in rgfPriorVariance)
1235 {
1236 m_log.CHECK_GT(fVar, 0, "Each variance must be greater than 0.");
1237 }
1238
1239 decode_bbox = new NormalizedBBox(prior_bbox.xmin + rgfPriorVariance[0] * bbox.xmin,
1240 prior_bbox.ymin + rgfPriorVariance[1] * bbox.ymin,
1241 prior_bbox.xmax + rgfPriorVariance[2] * bbox.xmax,
1242 prior_bbox.ymax + rgfPriorVariance[3] * bbox.ymax);
1243 }
1244 break;
1245
1246 case PriorBoxParameter.CodeType.CENTER_SIZE:
1247 {
1248 float fPriorWidth = prior_bbox.xmax - prior_bbox.xmin;
1249 m_log.CHECK_GT(fPriorWidth, 0, "The prior width must be greater than zero.");
1250 float fPriorHeight = prior_bbox.ymax - prior_bbox.ymin;
1251 m_log.CHECK_GT(fPriorHeight, 0, "The prior height must be greater than zero.");
1252 float fPriorCenterX = (prior_bbox.xmin + prior_bbox.xmax) / 2;
1253 float fPriorCenterY = (prior_bbox.ymin + prior_bbox.ymax) / 2;
1254
1255 float fDecodeBboxCenterX;
1256 float fDecodeBboxCenterY;
1257 float fDecodeBboxWidth;
1258 float fDecodeBboxHeight;
1259
1260 if (bEncodeVarianceInTarget)
1261 {
1262 // Variance is encoded in target, we simply need to resote the offset prdedictions.
1263 fDecodeBboxCenterX = bbox.xmin * fPriorWidth + fPriorCenterX;
1264 fDecodeBboxCenterY = bbox.ymin * fPriorHeight + fPriorCenterY;
1265 fDecodeBboxWidth = (float)Math.Exp(bbox.xmax) * fPriorWidth;
1266 fDecodeBboxHeight = (float)Math.Exp(bbox.ymax) * fPriorHeight;
1267 }
1268 else
1269 {
1270 // Variance is encoded in the bbox, we need to scale the offset accordingly.
1271 fDecodeBboxCenterX = rgfPriorVariance[0] * bbox.xmin * fPriorWidth + fPriorCenterX;
1272 fDecodeBboxCenterY = rgfPriorVariance[1] * bbox.ymin * fPriorHeight + fPriorCenterY;
1273 fDecodeBboxWidth = (float)Math.Exp(rgfPriorVariance[2] * bbox.xmax) * fPriorWidth;
1274 fDecodeBboxHeight = (float)Math.Exp(rgfPriorVariance[3] * bbox.ymax) * fPriorHeight;
1275 }
1276
1277 decode_bbox = new NormalizedBBox(fDecodeBboxCenterX - fDecodeBboxWidth / 2,
1278 fDecodeBboxCenterY - fDecodeBboxHeight / 2,
1279 fDecodeBboxCenterX + fDecodeBboxWidth / 2,
1280 fDecodeBboxCenterY + fDecodeBboxHeight / 2);
1281 }
1282 break;
1283
1284 case PriorBoxParameter.CodeType.CORNER_SIZE:
1285 {
1286 float fPriorWidth = prior_bbox.xmax - prior_bbox.xmin;
1287 m_log.CHECK_GT(fPriorWidth, 0, "The prior width must be greater than zero.");
1288 float fPriorHeight = prior_bbox.ymax - prior_bbox.ymin;
1289 m_log.CHECK_GT(fPriorHeight, 0, "The prior height must be greater than zero.");
1290
1291 if (bEncodeVarianceInTarget)
1292 {
1293 // Variance is encoded in target, we simply need to add the offset predictions.
1294 decode_bbox = new NormalizedBBox(prior_bbox.xmin + bbox.xmin * fPriorWidth,
1295 prior_bbox.ymin + bbox.ymin * fPriorHeight,
1296 prior_bbox.xmax + bbox.xmax * fPriorWidth,
1297 prior_bbox.ymax + bbox.ymax * fPriorHeight);
1298 }
1299 else
1300 {
1301 // Encode variance in bbox.
1302 m_log.CHECK_EQ(rgfPriorVariance.Count, 4, "The variance must have 4 values!");
1303 foreach (float fVar in rgfPriorVariance)
1304 {
1305 m_log.CHECK_GT(fVar, 0, "Each variance must be greater than 0.");
1306 }
1307
1308 decode_bbox = new NormalizedBBox(prior_bbox.xmin + rgfPriorVariance[0] * bbox.xmin * fPriorWidth,
1309 prior_bbox.ymin + rgfPriorVariance[1] * bbox.ymin * fPriorHeight,
1310 prior_bbox.xmax + rgfPriorVariance[2] * bbox.xmax * fPriorWidth,
1311 prior_bbox.ymax + rgfPriorVariance[3] * bbox.ymax * fPriorHeight);
1312 }
1313 }
1314 break;
1315
1316 default:
1317 m_log.FAIL("Unknown code type '" + code_type.ToString());
1318 return null;
1319 }
1320
1321 decode_bbox.size = Size(decode_bbox);
1322 if (bClip)
1323 decode_bbox = Clip(decode_bbox);
1324
1325 return decode_bbox;
1326 }
1327
1337 public NormalizedBBox Encode(NormalizedBBox prior_bbox, List<float> rgfPriorVariance, PriorBoxParameter.CodeType code_type, bool bEncodeVarianceInTarget, NormalizedBBox bbox)
1338 {
1339 NormalizedBBox encode_bbox;
1340
1341 switch (code_type)
1342 {
1343 case PriorBoxParameter.CodeType.CORNER:
1344 if (bEncodeVarianceInTarget)
1345 {
1346 encode_bbox = new NormalizedBBox(bbox.xmin - prior_bbox.xmin,
1347 bbox.ymin - prior_bbox.ymin,
1348 bbox.xmax - prior_bbox.xmax,
1349 bbox.ymax - prior_bbox.ymax);
1350 }
1351 else
1352 {
1353 // Encode variance in bbox.
1354 m_log.CHECK_EQ(rgfPriorVariance.Count, 4, "The variance must have 4 values!");
1355 foreach (float fVar in rgfPriorVariance)
1356 {
1357 m_log.CHECK_GT(fVar, 0, "Each variance must be greater than 0.");
1358 }
1359
1360 encode_bbox = new NormalizedBBox((bbox.xmin - prior_bbox.xmin) / rgfPriorVariance[0],
1361 (bbox.ymin - prior_bbox.ymin) / rgfPriorVariance[1],
1362 (bbox.xmax - prior_bbox.xmax) / rgfPriorVariance[2],
1363 (bbox.ymax - prior_bbox.ymax) / rgfPriorVariance[3]);
1364 }
1365 break;
1366
1367 case PriorBoxParameter.CodeType.CENTER_SIZE:
1368 {
1369 float fPriorWidth = prior_bbox.xmax - prior_bbox.xmin;
1370 m_log.CHECK_GT(fPriorWidth, 0, "The prior width must be greater than zero.");
1371 float fPriorHeight = prior_bbox.ymax - prior_bbox.ymin;
1372 m_log.CHECK_GT(fPriorHeight, 0, "The prior height must be greater than zero.");
1373 float fPriorCenterX = (prior_bbox.xmin + prior_bbox.xmax) / 2;
1374 float fPriorCenterY = (prior_bbox.ymin + prior_bbox.ymax) / 2;
1375
1376 float fBboxWidth = bbox.xmax - bbox.xmin;
1377 m_log.CHECK_GT(fBboxWidth, 0, "The bbox width must be greater than zero.");
1378 float fBboxHeight = bbox.ymax - bbox.ymin;
1379 m_log.CHECK_GT(fBboxHeight, 0, "The bbox height must be greater than zero.");
1380 float fBboxCenterX = (bbox.xmin + bbox.xmax) / 2;
1381 float fBboxCenterY = (bbox.ymin + bbox.ymax) / 2;
1382
1383 if (bEncodeVarianceInTarget)
1384 {
1385 encode_bbox = new NormalizedBBox((fBboxCenterX - fPriorCenterX) / fPriorWidth,
1386 (fBboxCenterY - fPriorCenterY) / fPriorHeight,
1387 (float)Math.Log(fBboxWidth / fPriorWidth),
1388 (float)Math.Log(fBboxHeight / fPriorHeight));
1389 }
1390 else
1391 {
1392 // Encode variance in bbox.
1393 m_log.CHECK_EQ(rgfPriorVariance.Count, 4, "The variance must have 4 values!");
1394 foreach (float fVar in rgfPriorVariance)
1395 {
1396 m_log.CHECK_GT(fVar, 0, "Each variance must be greater than 0.");
1397 }
1398
1399 encode_bbox = new NormalizedBBox((fBboxCenterX - fPriorCenterX) / fPriorWidth / rgfPriorVariance[0],
1400 (fBboxCenterY - fPriorCenterY) / fPriorHeight / rgfPriorVariance[1],
1401 (float)Math.Log(fBboxWidth / fPriorWidth) / rgfPriorVariance[2],
1402 (float)Math.Log(fBboxHeight / fPriorHeight) / rgfPriorVariance[3]);
1403 }
1404 }
1405 break;
1406
1407 case PriorBoxParameter.CodeType.CORNER_SIZE:
1408 {
1409 float fPriorWidth = prior_bbox.xmax - prior_bbox.xmin;
1410 m_log.CHECK_GT(fPriorWidth, 0, "The prior width must be greater than zero.");
1411 float fPriorHeight = prior_bbox.ymax - prior_bbox.ymin;
1412 m_log.CHECK_GT(fPriorHeight, 0, "The prior height must be greater than zero.");
1413 float fPriorCenterX = (prior_bbox.xmin + prior_bbox.xmax) / 2;
1414 float fPriorCenterY = (prior_bbox.ymin + prior_bbox.ymax) / 2;
1415
1416 if (bEncodeVarianceInTarget)
1417 {
1418 encode_bbox = new NormalizedBBox((bbox.xmin - prior_bbox.xmin) / fPriorWidth,
1419 (bbox.ymin - prior_bbox.ymin) / fPriorHeight,
1420 (bbox.xmax - prior_bbox.xmax) / fPriorWidth,
1421 (bbox.ymax - prior_bbox.ymax) / fPriorHeight);
1422 }
1423 else
1424 {
1425 // Encode variance in bbox.
1426 m_log.CHECK_EQ(rgfPriorVariance.Count, 4, "The variance must have 4 values!");
1427 foreach (float fVar in rgfPriorVariance)
1428 {
1429 m_log.CHECK_GT(fVar, 0, "Each variance must be greater than 0.");
1430 }
1431
1432 encode_bbox = new NormalizedBBox((bbox.xmin - prior_bbox.xmin) / fPriorWidth / rgfPriorVariance[0],
1433 (bbox.ymin - prior_bbox.ymin) / fPriorHeight / rgfPriorVariance[1],
1434 (bbox.xmax - prior_bbox.xmax) / fPriorWidth / rgfPriorVariance[2],
1435 (bbox.ymax - prior_bbox.ymax) / fPriorHeight / rgfPriorVariance[3]);
1436 }
1437 }
1438 break;
1439
1440 default:
1441 m_log.FAIL("Unknown code type '" + code_type.ToString());
1442 return null;
1443 }
1444
1445 return encode_bbox;
1446 }
1447
1455 public bool MeetEmitConstraint(NormalizedBBox src_bbox, NormalizedBBox bbox, EmitConstraint emit_constraint)
1456 {
1457 if (emit_constraint.emit_type == EmitConstraint.EmitType.CENTER)
1458 {
1459 float fXCenter = (bbox.xmin + bbox.xmax) / 2;
1460 float fYCenter = (bbox.ymin + bbox.ymax) / 2;
1461
1462 if ((fXCenter >= src_bbox.xmin && fXCenter <= src_bbox.xmax) &&
1463 (fYCenter >= src_bbox.ymin && fYCenter <= src_bbox.ymax))
1464 return true;
1465 else
1466 return false;
1467 }
1468 else if (emit_constraint.emit_type == EmitConstraint.EmitType.MIN_OVERLAP)
1469 {
1470 float fBboxCoverage = Coverage(bbox, src_bbox);
1471 if (fBboxCoverage > emit_constraint.emit_overlap)
1472 return true;
1473 else
1474 return false;
1475 }
1476 else
1477 {
1478 m_log.FAIL("Unknown emit type!");
1479 return false;
1480 }
1481 }
1482
1489 public float Coverage(NormalizedBBox bbox1, NormalizedBBox bbox2)
1490 {
1491 NormalizedBBox intersectBBox = Intersect(bbox1, bbox2);
1492 float fIntersectSize = Size(intersectBBox);
1493
1494 if (fIntersectSize > 0)
1495 {
1496 float fBbox1Size = Size(bbox1);
1497 return fBbox1Size / fIntersectSize;
1498 }
1499
1500 return 0;
1501 }
1502
1510 {
1511 float fSrcWidth = srcBbox.xmax - srcBbox.xmin;
1512 float fSrcHeight = srcBbox.ymax - srcBbox.ymin;
1513
1514 return new NormalizedBBox(srcBbox.xmin + bbox.xmin * fSrcWidth,
1515 srcBbox.ymin + bbox.ymin * fSrcHeight,
1516 srcBbox.xmax + bbox.xmax * fSrcWidth,
1517 srcBbox.ymax + bbox.ymax * fSrcHeight, 0, bbox.difficult);
1518 }
1519
1527 public float JaccardOverlap(NormalizedBBox bbox1, NormalizedBBox bbox2, bool bNormalized = true)
1528 {
1529 NormalizedBBox intersect_bbox = Intersect(bbox1, bbox2);
1530 float fIntersectWidth = intersect_bbox.xmax - intersect_bbox.xmin;
1531 float fIntersectHeight = intersect_bbox.ymax - intersect_bbox.ymin;
1532
1533 if (!bNormalized)
1534 {
1535 fIntersectWidth += 1;
1536 fIntersectHeight += 1;
1537 }
1538
1539 if (fIntersectWidth > 0 && fIntersectHeight > 0)
1540 {
1541 float fIntersectSize = fIntersectWidth * fIntersectHeight;
1542 float fBbox1Size = Size(bbox1);
1543 float fBbox2Size = Size(bbox2);
1544 return fIntersectSize / (fBbox1Size + fBbox2Size - fIntersectSize);
1545 }
1546
1547 return 0;
1548 }
1549
1558 {
1559 int height = (int)szImg.Height;
1560 int width = (int)szImg.Width;
1561 NormalizedBBox temp_bbox = bbox.Clone();
1562
1563 if (p != null)
1564 {
1565 float fResizeHeight = p.height;
1566 float fResizeWidth = p.width;
1567 float fResizeAspect = fResizeWidth / fResizeHeight;
1568 int nHeightScale = (int)p.height_scale;
1569 int nWidthScale = (int)p.width_scale;
1570 float fAspect = (float)width / (float)height;
1571 float fPadding;
1572
1573 switch (p.resize_mode)
1574 {
1575 case ResizeParameter.ResizeMode.WARP:
1576 temp_bbox = Clip(temp_bbox);
1577 return Scale(temp_bbox, height, width);
1578
1579 case ResizeParameter.ResizeMode.FIT_LARGE_SIZE_AND_PAD:
1580 float fxmin = 0.0f;
1581 float fymin = 0.0f;
1582 float fxmax = 1.0f;
1583 float fymax = 1.0f;
1584
1585 if (fAspect > fResizeAspect)
1586 {
1587 fPadding = (fResizeHeight - fResizeWidth / fAspect) / 2;
1588 fymin = fPadding / fResizeHeight;
1589 fymax = 1.0f - fPadding / fResizeHeight;
1590 }
1591 else
1592 {
1593 fPadding = (fResizeWidth - fResizeHeight * fAspect) / 2;
1594 fxmin = fPadding / fResizeWidth;
1595 fxmax = 1.0f - fPadding / fResizeWidth;
1596 }
1597
1598 Project(new NormalizedBBox(fxmin, fymin, fxmax, fymax), bbox, out temp_bbox);
1599 temp_bbox = Clip(temp_bbox);
1600 return Scale(temp_bbox, height, width);
1601
1602 case ResizeParameter.ResizeMode.FIT_SMALL_SIZE:
1603 if (nHeightScale == 0 || nWidthScale == 0)
1604 {
1605 temp_bbox = Clip(temp_bbox);
1606 return Scale(temp_bbox, height, width);
1607 }
1608 else
1609 {
1610 temp_bbox = Scale(temp_bbox, nHeightScale, nWidthScale);
1611 return Clip(temp_bbox, height, width);
1612 }
1613
1614 default:
1615 m_log.FAIL("Unknown resize mode '" + p.resize_mode.ToString() + "'!");
1616 return null;
1617 }
1618 }
1619 else
1620 {
1621 // Clip the normalized bbox first.
1622 temp_bbox = Clip(temp_bbox);
1623 // Scale the bbox according to the original image size.
1624 return Scale(temp_bbox, height, width);
1625 }
1626 }
1627
1635 public bool Project(NormalizedBBox src, NormalizedBBox bbox, out NormalizedBBox proj_bbox)
1636 {
1637 proj_bbox = bbox.Clone();
1638
1639 if (bbox.xmin >= src.xmax || bbox.xmax <= src.xmin ||
1640 bbox.ymin >= src.ymax || bbox.ymax <= src.ymin)
1641 return false;
1642
1643 float src_width = src.xmax - src.xmin;
1644 float src_height = src.ymax - src.ymin;
1645 proj_bbox = new NormalizedBBox((bbox.xmin - src.xmin) / src_width,
1646 (bbox.ymin - src.ymin) / src_height,
1647 (bbox.xmax - src.xmin) / src_width,
1648 (bbox.ymax - src.ymin) / src_height,
1649 bbox.label, bbox.difficult);
1650 proj_bbox = Clip(proj_bbox);
1651
1652 float fSize = Size(proj_bbox);
1653 if (fSize > 0)
1654 return true;
1655
1656 return false;
1657 }
1658
1668 public void Extrapolate(ResizeParameter param, int nHeight, int nWidth, NormalizedBBox crop_bbox, NormalizedBBox bbox)
1669 {
1670 float fHeightScale = param.height_scale;
1671 float fWidthScale = param.width_scale;
1672
1673 if (fHeightScale > 0 && fWidthScale > 0 && param.resize_mode == ResizeParameter.ResizeMode.FIT_SMALL_SIZE)
1674 {
1675 float fOrigAspect = (float)nWidth / (float)nHeight;
1676 float fResizeHeight = param.height;
1677 float fResizeWidth = param.width;
1678 float fResizeAspect = fResizeWidth / fResizeHeight;
1679
1680 if (fOrigAspect < fResizeAspect)
1681 fResizeHeight = fResizeWidth / fOrigAspect;
1682 else
1683 fResizeWidth = fResizeHeight * fOrigAspect;
1684
1685 float fCropHeight = fResizeHeight * (crop_bbox.ymax - crop_bbox.ymin);
1686 float fCropWidth = fResizeWidth * (crop_bbox.xmax - crop_bbox.xmin);
1687 m_log.CHECK_GE(fCropWidth, fWidthScale, "The crop width must be >= the width scale!");
1688 m_log.CHECK_GE(fCropHeight, fHeightScale, "The crop height must be >= the height scale!");
1689
1690 bbox.Set(bbox.xmin * fCropWidth / fWidthScale,
1691 bbox.xmax * fCropWidth / fWidthScale,
1692 bbox.ymin * fCropHeight / fHeightScale,
1693 bbox.ymax * fCropHeight / fHeightScale);
1694 }
1695 }
1696
1704 {
1705 // Return [0,0,0,0] if there is no intersection.
1706 if (bbox2.xmin > bbox1.xmax || bbox2.xmax < bbox1.xmin ||
1707 bbox2.ymin > bbox1.ymax || bbox2.ymax < bbox1.ymin)
1708 return new NormalizedBBox(0.0f, 0.0f, 0.0f, 0.0f);
1709
1710 return new NormalizedBBox(Math.Max(bbox1.xmin, bbox2.xmin),
1711 Math.Max(bbox1.ymin, bbox2.ymin),
1712 Math.Min(bbox1.xmax, bbox2.xmax),
1713 Math.Min(bbox1.ymax, bbox2.ymax));
1714 }
1715
1723 public NormalizedBBox Clip(NormalizedBBox bbox, float fHeight = 1.0f, float fWidth = 1.0f)
1724 {
1725 NormalizedBBox clipped = bbox.Clone();
1726 clipped.xmin = Math.Max(Math.Min(bbox.xmin, fWidth), 0.0f);
1727 clipped.ymin = Math.Max(Math.Min(bbox.ymin, fHeight), 0.0f);
1728 clipped.xmax = Math.Max(Math.Min(bbox.xmax, fWidth), 0.0f);
1729 clipped.ymax = Math.Max(Math.Min(bbox.ymax, fHeight), 0.0f);
1730 clipped.size = Size(clipped);
1731 return clipped;
1732 }
1733
1741 public NormalizedBBox Scale(NormalizedBBox bbox, int nHeight, int nWidth)
1742 {
1743 NormalizedBBox scaled = bbox.Clone();
1744 scaled.xmin = bbox.xmin * nWidth;
1745 scaled.ymin = bbox.ymin * nHeight;
1746 scaled.xmax = bbox.xmax * nWidth;
1747 scaled.ymax = bbox.ymax * nHeight;
1748 bool bNormalized = !(nWidth > 1 || nHeight > 1);
1749 scaled.size = Size(scaled, bNormalized);
1750 return scaled;
1751 }
1752
1759 public float Size(NormalizedBBox bbox, bool bNormalized = true)
1760 {
1761 if (bbox.xmax < bbox.xmin || bbox.ymax < bbox.ymin)
1762 {
1763 // If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0
1764 return 0f;
1765 }
1766
1767 float fWidth = bbox.xmax - bbox.xmin;
1768 float fHeight = bbox.ymax - bbox.ymin;
1769
1770 if (bNormalized)
1771 return fWidth * fHeight;
1772 else // bbox is not in range [0,1]
1773 return (fWidth + 1) * (fHeight + 1);
1774 }
1775
1786 public void FindMatches(List<LabelBBox> rgAllLocPreds, DictionaryMap<List<NormalizedBBox>> rgAllGtBboxes, List<NormalizedBBox> rgPriorBboxes, List<List<float>> rgrgPriorVariances, MultiBoxLossParameter p, out List<DictionaryMap<List<float>>> rgAllMatchOverlaps, out List<DictionaryMap<List<int>>> rgAllMatchIndices)
1787 {
1788 rgAllMatchOverlaps = new List<DictionaryMap<List<float>>>();
1789 rgAllMatchIndices = new List<DictionaryMap<List<int>>>();
1790
1791 int nNumClasses = (int)p.num_classes;
1792 m_log.CHECK_GE(nNumClasses, 1, "The num_classes should not be less than 1.");
1793
1794 bool bShareLocation = p.share_location;
1795 int nLocClasses = (bShareLocation) ? 1 : nNumClasses;
1797 float fOverlapThreshold = p.overlap_threshold;
1798 bool bUsePriorForMatching = p.use_prior_for_matching;
1799 int nBackgroundLabelId = (int)p.background_label_id;
1801 bool bEncodeVarianceInTarget = p.encode_variance_in_target;
1802 bool bIgnoreCrossBoundaryBbox = p.ignore_cross_boundary_bbox;
1803
1804 // Find the matches.
1805 int nNum = rgAllLocPreds.Count;
1806 for (int i = 0; i < nNum; i++)
1807 {
1808 DictionaryMap<List<int>> rgMatchIndices = new DictionaryMap<List<int>>(null);
1809 DictionaryMap<List<float>> rgMatchOverlaps = new DictionaryMap<List<float>>(null);
1810
1811 // Check if there is a ground truth for the current image.
1812 if (!rgAllGtBboxes.Map.ContainsKey(i))
1813 {
1814 // There is no gt for current image. All predictions are negative.
1815 rgAllMatchIndices.Add(rgMatchIndices);
1816 rgAllMatchOverlaps.Add(rgMatchOverlaps);
1817 continue;
1818 }
1819
1820 // Find match between predictions and ground truth.
1821 List<NormalizedBBox> rgGtBboxes = rgAllGtBboxes[i];
1822 if (!bUsePriorForMatching)
1823 {
1824 for (int c = 0; c < nLocClasses; c++)
1825 {
1826 int nLabel = (bShareLocation) ? -1 : c;
1827
1828 // Ignore background loc predictions.
1829 if (!bShareLocation && nLabel == nBackgroundLabelId)
1830 continue;
1831
1832 // Decode the prediction into bbox first.
1833 bool bClipBbox = false;
1834 List<NormalizedBBox> rgLocBBoxes = Decode(rgPriorBboxes, rgrgPriorVariances, codeType, bEncodeVarianceInTarget, bClipBbox, rgAllLocPreds[i][nLabel]);
1835
1836 List<int> rgMatchIndices1;
1837 List<float> rgMatchOverlaps1;
1838 Match(rgGtBboxes, rgLocBBoxes, nLabel, matchType, fOverlapThreshold, bIgnoreCrossBoundaryBbox, out rgMatchIndices1, out rgMatchOverlaps1);
1839
1840 rgMatchIndices[nLabel] = rgMatchIndices1;
1841 rgMatchOverlaps[nLabel] = rgMatchOverlaps1;
1842 }
1843 }
1844 else
1845 {
1846 // Use prior bboxes to match against all ground truth.
1847 List<int> rgTempMatchIndices = new List<int>();
1848 List<float> rgTempMatchOverlaps = new List<float>();
1849 int nLabel = -1;
1850
1851 Match(rgGtBboxes, rgPriorBboxes, nLabel, matchType, fOverlapThreshold, bIgnoreCrossBoundaryBbox, out rgTempMatchIndices, out rgTempMatchOverlaps);
1852
1853 if (bShareLocation)
1854 {
1855 rgMatchIndices[nLabel] = rgTempMatchIndices;
1856 rgMatchOverlaps[nLabel] = rgTempMatchOverlaps;
1857 }
1858 else
1859 {
1860 // Get ground truth label for each ground truth bbox.
1861 List<int> rgGtLabels = new List<int>();
1862 for (int g = 0; g < rgGtBboxes.Count; g++)
1863 {
1864 rgGtLabels.Add(rgGtBboxes[g].label);
1865 }
1866
1867 // Distribute the matching results to different loc_class.
1868 for (int c = 0; c < nLocClasses; c++)
1869 {
1870 // Ignore background loc predictions.
1871 if (c == nBackgroundLabelId)
1872 continue;
1873
1874 rgMatchIndices[c] = rgTempMatchIndices;
1875 rgMatchOverlaps[c] = rgTempMatchOverlaps;
1876
1877 for (int m = 0; m < rgTempMatchIndices.Count; m++)
1878 {
1879 if (rgTempMatchIndices[m] > -1)
1880 {
1881 int nGtIdx = rgTempMatchIndices[m];
1882 m_log.CHECK_LT(nGtIdx, rgGtLabels.Count, "The gt index is larger than the number of gt labels.");
1883 if (c == rgGtLabels[nGtIdx])
1884 rgMatchIndices[c][m] = nGtIdx;
1885 }
1886 }
1887 }
1888 }
1889 }
1890
1891 rgAllMatchIndices.Add(rgMatchIndices);
1892 rgAllMatchOverlaps.Add(rgMatchOverlaps);
1893 }
1894 }
1895
1902 public int CountNumMatches(List<DictionaryMap<List<int>>> rgAllMatchIndices, int nNum)
1903 {
1904 int nNumMatches = 0;
1905
1906 for (int i = 0; i < nNum; i++)
1907 {
1908 Dictionary<int, List<int>> rgMatchIndices = rgAllMatchIndices[i].Map;
1909
1910 foreach (KeyValuePair<int, List<int>> kv in rgMatchIndices)
1911 {
1912 List<int> rgMatchIndex = kv.Value;
1913
1914 for (int m = 0; m < rgMatchIndex.Count; m++)
1915 {
1916 if (rgMatchIndex[m] > -1)
1917 nNumMatches++;
1918 }
1919 }
1920 }
1921
1922 return nNumMatches;
1923 }
1924
1933 public bool IsEligibleMining(MultiBoxLossParameter.MiningType miningType, int nMatchIdx, float fMatchOverlap, float fNegOverlap)
1934 {
1935 if (miningType == MultiBoxLossParameter.MiningType.MAX_NEGATIVE)
1936 {
1937 if (nMatchIdx == -1 && fMatchOverlap < fNegOverlap)
1938 return true;
1939 else
1940 return false;
1941 }
1942 else if (miningType == MultiBoxLossParameter.MiningType.HARD_EXAMPLE)
1943 {
1944 return true;
1945 }
1946 else
1947 {
1948 return false;
1949 }
1950 }
1951
1966 public int MineHardExamples(Blob<T> blobConf, List<LabelBBox> rgAllLocPreds, DictionaryMap<List<NormalizedBBox>> rgAllGtBBoxes, List<NormalizedBBox> rgPriorBboxes, List<List<float>> rgrgPriorVariances, List<DictionaryMap<List<float>>> rgAllMatchOverlaps, MultiBoxLossParameter p, List<DictionaryMap<List<int>>> rgAllMatchIndices, List<List<int>> rgAllNegIndices, out int nNumNegs)
1967 {
1968 int nNum = rgAllLocPreds.Count;
1969 int nNumMatches = CountNumMatches(rgAllMatchIndices, nNum);
1970
1971 nNumNegs = 0;
1972
1973 if (nNumMatches == 0)
1974 return nNumMatches;
1975
1976 int nNumPriors = rgPriorBboxes.Count;
1977 m_log.CHECK_EQ(nNumPriors, rgrgPriorVariances.Count, "The number of priors must be the same as the number of prior variances.");
1978
1979 // Get parameters.
1980 int nNumClasses = (int)p.num_classes;
1981 m_log.CHECK_GE(nNumClasses, 1, "num_classes should be at least 1.");
1982
1983 int nBackgroundLabelId = (int)p.background_label_id;
1984 bool bUsePriorForNms = p.use_prior_for_nms;
1987
1988 if (miningType == MultiBoxLossParameter.MiningType.NONE)
1989 return nNumMatches;
1990
1992 float fNegPosRatio = p.neg_pos_ratio;
1993 float fNegOverlap = p.neg_overlap;
1995 bool bEncodeVarianceInTarget = p.encode_variance_in_target;
1996 float fNmsThreshold = 0;
1997 int nTopK = -1;
1998
1999 if (p.nms_param != null && p.nms_param.Active)
2000 {
2001 fNmsThreshold = p.nms_param.nms_threshold;
2002 nTopK = p.nms_param.top_k.GetValueOrDefault(-1);
2003 }
2004
2005 int nSampleSize = p.sample_size;
2006
2007 // Compute confidence losses based on matching results.
2008 float[] rgConfData = Utility.ConvertVecF<T>(blobConf.mutable_cpu_data);
2009 List<List<float>> rgAllConfLoss = ComputeConfLoss(rgConfData, nNum, nNumPriors, nNumClasses, nBackgroundLabelId, confLossType, rgAllMatchIndices, rgAllGtBBoxes);
2010 List<List<float>> rgAllLocLoss = new List<List<float>>();
2011
2012 // Compute localization losses based on matching results.
2013 if (miningType == MultiBoxLossParameter.MiningType.HARD_EXAMPLE)
2014 {
2015 Blob<T> blobLocPred = new Blob<T>(m_cuda, m_log);
2016 Blob<T> blobLocGt = new Blob<T>(m_cuda, m_log);
2017
2018 List<int> rgLocShape = Utility.Create<int>(2, 1);
2019 rgLocShape[1] = nNumMatches * 4;
2020 blobLocPred.Reshape(rgLocShape);
2021 blobLocGt.Reshape(rgLocShape);
2022 EncodeLocPrediction(rgAllLocPreds, rgAllGtBBoxes, rgAllMatchIndices, rgPriorBboxes, rgrgPriorVariances, p, blobLocPred, blobLocGt);
2023
2024 rgAllLocLoss = ComputeLocLoss(blobLocPred, blobLocGt, rgAllMatchIndices, nNum, nNumPriors, locLossType);
2025 }
2026 // No localization loss.
2027 else
2028 {
2029 for (int i = 0; i < nNum; i++)
2030 {
2031 List<float> rgLocLoss = Utility.Create<float>(nNumPriors, 0.0f);
2032 rgAllLocLoss.Add(rgLocLoss);
2033 }
2034 }
2035
2036 Stopwatch sw = new Stopwatch();
2037 sw.Start();
2038
2039 for (int i = 0; i < nNum; i++)
2040 {
2041 DictionaryMap<List<int>> rgMatchIndices = rgAllMatchIndices[i];
2042 DictionaryMap<List<float>> rgMatchOverlaps = rgAllMatchOverlaps[i];
2043
2044 // loc + conf loss.
2045 List<float> rgConfLoss = rgAllConfLoss[i];
2046 List<float> rgLocLoss = rgAllLocLoss[i];
2047 List<float> rgLoss = new List<float>();
2048
2049 for (int j = 0; j < rgConfLoss.Count; j++)
2050 {
2051 rgLoss.Add(rgConfLoss[j] + rgLocLoss[j]);
2052 }
2053
2054 // Pick negatives or hard examples based on loss.
2055 List<int> rgSelIndices = new List<int>();
2056 List<int> rgNegIndices = new List<int>();
2057
2058 foreach (KeyValuePair<int, List<int>> kv in rgMatchIndices.Map)
2059 {
2060 int nLabel = kv.Key;
2061 int nNumSel = 0;
2062
2063 // Get potential indices and loss pairs.
2064 List<KeyValuePair<float, int>> rgLossIndices = new List<KeyValuePair<float, int>>();
2065
2066 for (int m = 0; m < rgMatchIndices[nLabel].Count; m++)
2067 {
2068 if (IsEligibleMining(miningType, rgMatchIndices[nLabel][m], rgMatchOverlaps[nLabel][m], fNegOverlap))
2069 {
2070 rgLossIndices.Add(new KeyValuePair<float, int>(rgLoss[m], m));
2071 nNumSel++;
2072 }
2073 }
2074
2075 if (miningType == MultiBoxLossParameter.MiningType.MAX_NEGATIVE)
2076 {
2077 int nNumPos = 0;
2078
2079 for (int m = 0; m < rgMatchIndices[nLabel].Count; m++)
2080 {
2081 if (rgMatchIndices[nLabel][m] > -1)
2082 nNumPos++;
2083 }
2084
2085 nNumSel = Math.Min((int)(nNumPos * fNegPosRatio), nNumSel);
2086 }
2087 else if (miningType == MultiBoxLossParameter.MiningType.HARD_EXAMPLE)
2088 {
2089 m_log.CHECK_GT(nSampleSize, 0, "The sample size must be greater than 0 for HARD_EXAMPLE mining.");
2090 nNumSel = Math.Min(nSampleSize, nNumSel);
2091 }
2092
2093 // Select samples.
2094 if (p.nms_param != null && p.nms_param.Active && fNmsThreshold > 0)
2095 {
2096 // Do nms before selecting samples.
2097 List<float> rgSelLoss = new List<float>();
2098 List<NormalizedBBox> rgSelBoxes = new List<NormalizedBBox>();
2099
2100 if (bUsePriorForNms)
2101 {
2102 for (int m = 0; m < rgMatchIndices[nLabel].Count; m++)
2103 {
2104 if (IsEligibleMining(miningType, rgMatchIndices[nLabel][m], rgMatchOverlaps[nLabel][m], fNegOverlap))
2105 {
2106 rgSelLoss.Add(rgLoss[m]);
2107 rgSelBoxes.Add(rgPriorBboxes[m]);
2108 }
2109 }
2110 }
2111 else
2112 {
2113 // Decode the prediction into bbox first.
2114 bool bClipBbox = false;
2115 List<NormalizedBBox> rgLocBBoxes = Decode(rgPriorBboxes, rgrgPriorVariances, codeType, bEncodeVarianceInTarget, bClipBbox, rgAllLocPreds[i][nLabel]);
2116
2117 for (int m = 0; m < rgMatchIndices[nLabel].Count; m++)
2118 {
2119 if (IsEligibleMining(miningType, rgMatchIndices[nLabel][m], rgMatchOverlaps[nLabel][m], fNegOverlap))
2120 {
2121 rgSelLoss.Add(rgLoss[m]);
2122 rgSelBoxes.Add(rgLocBBoxes[m]);
2123 }
2124 }
2125 }
2126
2127 // Do non-maximum suppression based on the loss.
2128 List<int> rgNmsIndices = ApplyNMS(rgSelBoxes, rgSelLoss, fNmsThreshold, nTopK);
2129 if (rgNmsIndices.Count < nNumSel)
2130 m_log.WriteLine("WARNING: Not enough samples after NMS: " + rgNmsIndices.Count.ToString());
2131
2132 // Pick top example indices after nms.
2133 nNumSel = Math.Min(rgNmsIndices.Count, nNumSel);
2134 for (int n = 0; n < nNumSel; n++)
2135 {
2136 rgSelIndices.Insert(0, rgLossIndices[rgNmsIndices[n]].Value);
2137 }
2138 }
2139 else
2140 {
2141 // Pick top example indices based on loss.
2142 rgLossIndices = rgLossIndices.OrderByDescending(p1 => p1.Key).ToList();
2143 for (int n = 0; n < nNumSel; n++)
2144 {
2145 rgSelIndices.Insert(0, rgLossIndices[n].Value);
2146 }
2147 }
2148
2149 // Update the match_indices and select neg_indices.
2150 for (int m = 0; m < rgMatchIndices[nLabel].Count; m++)
2151 {
2152 if (rgMatchIndices[nLabel][m] > -1)
2153 {
2154 if (miningType == MultiBoxLossParameter.MiningType.HARD_EXAMPLE && !rgSelIndices.Contains(m))
2155 {
2156 rgMatchIndices[nLabel][m] = -1;
2157 nNumMatches -= 1;
2158 }
2159 }
2160 else if (rgMatchIndices[nLabel][m] == -1)
2161 {
2162 if (rgSelIndices.Contains(m))
2163 {
2164 rgNegIndices.Add(m);
2165 nNumNegs += 1;
2166 }
2167 }
2168 }
2169 }
2170
2171 rgAllNegIndices.Add(rgNegIndices);
2172
2173 if (sw.Elapsed.TotalMilliseconds > 1000)
2174 {
2175 double dfPct = (double)(i+1) / nNum;
2176 m_log.WriteLine("Mining at " + dfPct.ToString("P") + ", " + (i+1).ToString("N0") + " of " + nNum.ToString("N0") + "...");
2177 sw.Restart();
2178 }
2179 }
2180
2181 return nNumMatches;
2182 }
2183
2195 public void EncodeLocPrediction(List<LabelBBox> rgAllLocPreds, DictionaryMap<List<NormalizedBBox>> rgAllGtBboxes, List<DictionaryMap<List<int>>> rgAllMatchIndices, List<NormalizedBBox> rgPriorBboxes, List<List<float>> rgrgPriorVariances, MultiBoxLossParameter p, Blob<T> blobLocPred, Blob<T> blobLocGt)
2196 {
2197 int nLocPredData = blobLocPred.count();
2198 float[] rgLocPredData = new float[nLocPredData];
2199 int nLocGtData = blobLocGt.count();
2200 float[] rgLocGtData = new float[nLocGtData];
2201
2202 int nNum = rgAllLocPreds.Count;
2203 // Get parameters.
2205 bool bEncodeVarianceInTarget = p.encode_variance_in_target;
2206 bool bBpInside = p.bp_inside;
2207 bool bUsePriorForMatching = p.use_prior_for_matching;
2208 int nCount = 0;
2209
2210 for (int i = 0; i < nNum; i++)
2211 {
2212 foreach (KeyValuePair<int, List<int>> kv in rgAllMatchIndices[i].Map)
2213 {
2214 int nLabel = kv.Key;
2215 List<int> rgMatchIndex = kv.Value;
2216
2217 m_log.CHECK(rgAllLocPreds[i].Contains(nLabel), "The all local pred must contain the label '" + nLabel.ToString() + "'!");
2218 List<NormalizedBBox> rgLocPred = rgAllLocPreds[i][nLabel];
2219
2220 for (int j = 0; j < rgMatchIndex.Count; j++)
2221 {
2222 if (rgMatchIndex[j] <= -1)
2223 continue;
2224
2225 // Store encoded ground truth.
2226 int nGtIdx = rgMatchIndex[j];
2227 m_log.CHECK(rgAllGtBboxes.Map.ContainsKey(i), "All gt bboxes should contain '" + i.ToString() + "'!");
2228 m_log.CHECK_LT(nGtIdx, rgAllGtBboxes[i].Count, "The ground truth index should be less than the number of ground truths at '" + i.ToString() + "'!");
2229 NormalizedBBox gtBbox = rgAllGtBboxes[i][nGtIdx];
2230 m_log.CHECK_LT(j, rgPriorBboxes.Count, "The prior bbox count is too small!");
2231 NormalizedBBox gtEncode = Encode(rgPriorBboxes[j], rgrgPriorVariances[j], codeType, bEncodeVarianceInTarget, gtBbox);
2232
2233 rgLocGtData[nCount * 4 + 0] = gtEncode.xmin;
2234 rgLocGtData[nCount * 4 + 1] = gtEncode.ymin;
2235 rgLocGtData[nCount * 4 + 2] = gtEncode.xmax;
2236 rgLocGtData[nCount * 4 + 3] = gtEncode.ymax;
2237
2238 // Store location prediction.
2239 m_log.CHECK_LT(j, rgLocPred.Count, "The loc pred count is too small!");
2240
2241 if (bBpInside)
2242 {
2243 NormalizedBBox matchBbox = rgPriorBboxes[j];
2244
2245 if (!bUsePriorForMatching)
2246 {
2247 bool bClipBbox = false;
2248 matchBbox = Decode(rgPriorBboxes[j], rgrgPriorVariances[j], codeType, bEncodeVarianceInTarget, bClipBbox, rgLocPred[j]);
2249 }
2250
2251 // When a dimension of match_bbox is outside of image region, use
2252 // gt_encode to simulate zero gradient.
2253 rgLocPredData[nCount * 4 + 0] = (matchBbox.xmin < 0 || matchBbox.xmin > 1) ? gtEncode.xmin : rgLocPred[j].xmin;
2254 rgLocPredData[nCount * 4 + 1] = (matchBbox.ymin < 0 || matchBbox.ymin > 1) ? gtEncode.ymin : rgLocPred[j].ymin;
2255 rgLocPredData[nCount * 4 + 2] = (matchBbox.xmax < 0 || matchBbox.xmax > 1) ? gtEncode.xmax : rgLocPred[j].xmax;
2256 rgLocPredData[nCount * 4 + 3] = (matchBbox.ymax < 0 || matchBbox.ymax > 1) ? gtEncode.ymax : rgLocPred[j].ymax;
2257 }
2258 else
2259 {
2260 rgLocPredData[nCount * 4 + 0] = rgLocPred[j].xmin;
2261 rgLocPredData[nCount * 4 + 1] = rgLocPred[j].ymin;
2262 rgLocPredData[nCount * 4 + 2] = rgLocPred[j].xmax;
2263 rgLocPredData[nCount * 4 + 3] = rgLocPred[j].ymax;
2264 }
2265
2266 if (bEncodeVarianceInTarget)
2267 {
2268 for (int k = 0; k < 4; k++)
2269 {
2270 m_log.CHECK_GT(rgrgPriorVariances[j][k], 0, "The variance at " + j.ToString() + ", " + k.ToString() + " must be greater than zero.");
2271 rgLocPredData[nCount * 4 + k] /= rgrgPriorVariances[j][k];
2272 rgLocGtData[nCount * 4 + k] /= rgrgPriorVariances[j][k];
2273 }
2274 }
2275
2276 nCount++;
2277 }
2278 }
2279 }
2280
2281 blobLocPred.mutable_cpu_data = Utility.ConvertVec<T>(rgLocPredData);
2282 blobLocGt.mutable_cpu_data = Utility.ConvertVec<T>(rgLocGtData);
2283 }
2284
2297 public void EncodeConfPrediction(float[] rgfConfData, int nNum, int nNumPriors, MultiBoxLossParameter p, List<DictionaryMap<List<int>>> rgAllMatchIndices, List<List<int>> rgAllNegIndices, DictionaryMap<List<NormalizedBBox>> rgAllGtBBoxes, Blob<T> blobConfPred, Blob<T> blobConfGt)
2298 {
2299 int nConfPredData = blobConfPred.count();
2300 float[] rgConfPredData = new float[nConfPredData];
2301 int nConfGtData = blobConfGt.count();
2302 float[] rgConfGtData = new float[nConfGtData];
2303 int nConfDataOffset = 0;
2304 int nConfGtDataOffset = 0;
2305
2306 // Get parameters.
2307 int nNumClasses = (int)p.num_classes;
2308 m_log.CHECK_GE(nNumClasses, 1, "The the num_classes should not be less than 1.");
2309 int nBackgroundLabelId = (int)p.background_label_id;
2310 bool bMapObjectToAgnostic = p.map_object_to_agnostic;
2311
2312 if (bMapObjectToAgnostic)
2313 {
2314 if (nBackgroundLabelId >= 0)
2315 m_log.CHECK_EQ(nNumClasses, 2, "There should be 2 classes when mapping obect to agnostic with a background label.");
2316 else
2317 m_log.CHECK_EQ(nNumClasses, 1, "There should only b 1 class when mapping object to agnostic with no background label.");
2318 }
2319
2321 bool bDoNegMining;
2322
2323 if (p.do_neg_mining.HasValue)
2324 {
2325 m_log.WriteLine("WARNING: do_neg_mining is depreciated, using mining_type instead.");
2326 bDoNegMining = p.do_neg_mining.Value;
2327 m_log.CHECK(bDoNegMining == (miningType != MultiBoxLossParameter.MiningType.NONE), "The mining_type and do_neg_mining settings are inconsistent.");
2328 }
2329
2330 bDoNegMining = (miningType != MultiBoxLossParameter.MiningType.NONE) ? true : false;
2332 int nCount = 0;
2333
2334 for (int i = 0; i < nNum; i++)
2335 {
2336 if (rgAllGtBBoxes.Map.ContainsKey(i))
2337 {
2338 // Save matched (positive) bboxes scores and labels.
2339 DictionaryMap<List<int>> rgMatchIndicies = rgAllMatchIndices[i];
2340
2341 foreach (KeyValuePair<int, List<int>> kv in rgAllMatchIndices[i].Map)
2342 {
2343 List<int> rgMatchIndex = kv.Value;
2344 m_log.CHECK_EQ(rgMatchIndex.Count, nNumPriors, "The match index count should equal the number of priors '" + nNumPriors.ToString() + "'!");
2345
2346 for (int j = 0; j < nNumPriors; j++)
2347 {
2348 if (rgMatchIndex[j] <= -1)
2349 continue;
2350
2351 int nGtLabel = (bMapObjectToAgnostic) ? nBackgroundLabelId + 1 : rgAllGtBBoxes[i][rgMatchIndex[j]].label;
2352 int nIdx = (bDoNegMining) ? nCount : j;
2353
2354 switch (confLossType)
2355 {
2357 rgConfGtData[nConfGtDataOffset + nIdx] = nGtLabel;
2358 break;
2359
2360 case MultiBoxLossParameter.ConfLossType.LOGISTIC:
2361 rgConfGtData[nConfGtDataOffset + nIdx * nNumClasses + nGtLabel] = 1;
2362 break;
2363
2364 default:
2365 m_log.FAIL("Unknown conf loss type.");
2366 break;
2367 }
2368
2369 if (bDoNegMining)
2370 {
2371 Array.Copy(rgfConfData, nConfDataOffset + j * nNumClasses, rgConfPredData, nCount * nNumClasses, nNumClasses);
2372 nCount++;
2373 }
2374 }
2375 }
2376
2377 // Go to next image.
2378 if (bDoNegMining)
2379 {
2380 // Save negative bboxes scores and labels.
2381 for (int n = 0; n < rgAllNegIndices[i].Count; n++)
2382 {
2383 int j = rgAllNegIndices[i][n];
2384 m_log.CHECK_LT(j, nNumPriors, "The number of priors is too small!");
2385
2386 Array.Copy(rgfConfData, nConfDataOffset + j * nNumClasses, rgConfPredData, nCount * nNumClasses, nNumClasses);
2387
2388 switch (confLossType)
2389 {
2391 rgConfGtData[nConfGtDataOffset + nCount] = nBackgroundLabelId;
2392 break;
2393
2394 case MultiBoxLossParameter.ConfLossType.LOGISTIC:
2395 if (nBackgroundLabelId >= 0 && nBackgroundLabelId < nNumClasses)
2396 rgConfGtData[nConfGtDataOffset + nCount * nNumClasses + nBackgroundLabelId] = 1;
2397 break;
2398
2399 default:
2400 m_log.FAIL("Unknown conf loss type.");
2401 break;
2402 }
2403
2404 nCount++;
2405 }
2406 }
2407 }
2408
2409 if (bDoNegMining)
2410 nConfDataOffset += nNumPriors * nNumClasses;
2411 else
2412 nConfGtDataOffset += nNumPriors;
2413 }
2414
2415 blobConfPred.mutable_cpu_data = Utility.ConvertVec<T>(rgConfPredData);
2416 blobConfGt.mutable_cpu_data = Utility.ConvertVec<T>(rgConfGtData);
2417 }
2418
2429 public List<List<float>> ComputeLocLoss(Blob<T> blobLocPred, Blob<T> blobLocGt, List<DictionaryMap<List<int>>> rgAllMatchIndices, int nNum, int nNumPriors, MultiBoxLossParameter.LocLossType lossType)
2430 {
2431 List<List<float>> rgLocAllLoss = new List<List<float>>();
2432 int nLocCount = blobLocPred.count();
2433 m_log.CHECK_EQ(nLocCount, blobLocGt.count(), "The loc pred and loc gt must have the same count!");
2434 float[] rgfDiff = null;
2435
2436 if (nLocCount != 0)
2437 {
2438 m_blobDiff.ReshapeLike(blobLocPred);
2439 m_cuda.sub(nLocCount, blobLocPred.gpu_data, blobLocGt.gpu_data, m_blobDiff.mutable_gpu_data);
2440 rgfDiff = Utility.ConvertVecF<T>(m_blobDiff.mutable_cpu_data);
2441 }
2442
2443 int nCount = 0;
2444
2445 for (int i = 0; i < nNum; i++)
2446 {
2447 List<float> rgLocLoss = Utility.Create<float>(nNumPriors, 0.0f);
2448
2449 foreach (KeyValuePair<int, List<int>> kv in rgAllMatchIndices[i].Map)
2450 {
2451 List<int> rgMatchIndex = kv.Value;
2452 m_log.CHECK_EQ(nNumPriors, rgMatchIndex.Count, "The match index count at " + i.ToString() + " is too small.");
2453
2454 for (int j = 0; j < rgMatchIndex.Count; j++)
2455 {
2456 if (rgMatchIndex[j] <= -1)
2457 continue;
2458
2459 double dfLoss = 0;
2460
2461 for (int k = 0; k < 4; k++)
2462 {
2463 float fVal = rgfDiff[nCount * 4 + k];
2464
2465 if (lossType == MultiBoxLossParameter.LocLossType.SMOOTH_L1)
2466 {
2467 float fAbsVal = Math.Abs(fVal);
2468
2469 if (fAbsVal < 1.0f)
2470 dfLoss += 0.5 * fVal * fVal;
2471 else
2472 dfLoss += fAbsVal - 0.5;
2473 }
2474 else if (lossType == MultiBoxLossParameter.LocLossType.L2)
2475 {
2476 dfLoss += 0.5 * fVal * fVal;
2477 }
2478 else
2479 {
2480 m_log.FAIL("Unknown loc loss type!");
2481 }
2482 }
2483
2484 rgLocLoss[j] = (float)dfLoss;
2485 nCount++;
2486 }
2487 }
2488
2489 rgLocAllLoss.Add(rgLocLoss);
2490 }
2491
2492 return rgLocAllLoss;
2493 }
2494 }
2495}
The LabelBBox manages a bounding box used in SSD.
Definition: LabelBBox.cs:17
void Add(int nLabel, NormalizedBBox bbox)
Add a new bbox to the label.
Definition: LabelBBox.cs:41
The Log class provides general output in text form.
Definition: Log.cs:13
void CHECK(bool b, string str)
Test a flag for true.
Definition: Log.cs:227
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
Definition: Log.cs:394
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
void CHECK_NE(double df1, double df2, string str)
Test whether one number is not-equal to another.
Definition: Log.cs:251
void CHECK_GT(double df1, double df2, string str)
Test whether one number is greater than another.
Definition: Log.cs:299
void CHECK_LE(double df1, double df2, string str)
Test whether one number is less than or equal to another.
Definition: Log.cs:263
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
Definition: Log.cs:287
void CHECK_LT(double df1, double df2, string str)
Test whether one number is less than another.
Definition: Log.cs:275
The NormalizedBBox manages a bounding box used in SSD.
float ymax
Get/set the y maximum.
float xmax
Get/set the x maximum.
NormalizedBBox Clone()
Return a copy of the object.
float xmin
Get/set the x minimum.
bool difficult
Get/set the difficulty.
float size
Get/set the size.
float ymin
Get/set the y minimum.
void Set(float fxmin, float fymin, float fxmax, float fymax, int? nLabel=null, bool? bDifficult=null, float? fScore=null, float? fSize=null)
Set the values of the NormalizedBbox.
int label
Get/set the label.
The Utility class provides general utility funtions.
Definition: Utility.cs:35
static List< int > Create(int nCount, int nStart, int nInc)
Create a new List and fill it with values starting with start and incrementing by inc.
Definition: Utility.cs:721
static double[] ConvertVec(float[] rgf)
Convert an array of float to an array of generics.
Definition: Utility.cs:550
The BBox class processes the NormalizedBBox data used with SSD.
Definition: BBoxUtility.cs:22
List< NormalizedBBox > GetPrior(float[] rgPriorData, int nNumPriors, out List< List< float > > rgPriorVariances)
Get the prior boundary boxes from the rgPriorData.
Definition: BBoxUtility.cs:477
float Coverage(NormalizedBBox bbox1, NormalizedBBox bbox2)
Compute the coverage of bbox1 by bbox2.
void FindMatches(List< LabelBBox > rgAllLocPreds, DictionaryMap< List< NormalizedBBox > > rgAllGtBboxes, List< NormalizedBBox > rgPriorBboxes, List< List< float > > rgrgPriorVariances, MultiBoxLossParameter p, out List< DictionaryMap< List< float > > > rgAllMatchOverlaps, out List< DictionaryMap< List< int > > > rgAllMatchIndices)
Find matches between prediction bboxes and ground truth bboxes.
DictionaryMap< List< NormalizedBBox > > GetGroundTruth(float[] rgGtData, int nNumGt, int nBackgroundLabelId, bool bUseDifficultGt)
Create a set of ground truth bounding boxes from the rgGtData.
Definition: BBoxUtility.cs:844
NormalizedBBox Clip(NormalizedBBox bbox, float fHeight=1.0f, float fWidth=1.0f)
Clip the BBox to a set range.
bool MeetEmitConstraint(NormalizedBBox src_bbox, NormalizedBBox bbox, EmitConstraint emit_constraint)
Check if a bbox meets the emit constraint w.r.t the src_bbox.
float Size(NormalizedBBox bbox, bool bNormalized=true)
Calculate the size of a BBox.
List< int > ApplyNMS(List< NormalizedBBox > rgBBoxes, List< float > rgScores, float fThreshold, int nTopK)
Do non maximum supression given bboxes and scores.
Definition: BBoxUtility.cs:312
List< int > CumSum(List< Tuple< float, int > > rgPairs)
Calculate the cumulative sum of a set of pairs.
Definition: BBoxUtility.cs:187
List< List< float > > ComputeConfLoss(float[] rgConfData, int nNum, int nNumPredsPerClass, int nNumClasses, int nBackgroundLabelId, MultiBoxLossParameter.ConfLossType loss_type)
Compute the confidence loss for each prior from rgConfData.
Definition: BBoxUtility.cs:551
BBoxUtility(CudaDnn< T > cuda, Log log)
The constructor.
Definition: BBoxUtility.cs:33
List< LabelBBox > GetLocPredictions(float[] rgLocData, int nNum, int nNumPredsPerClass, int nNumLocClasses, bool bShareLocation)
Create a set of local predictions from the rgLocData.
Definition: BBoxUtility.cs:802
bool Project(NormalizedBBox src, NormalizedBBox bbox, out NormalizedBBox proj_bbox)
Project one bbox onto another.
void Dispose()
Clean up all resources.
Definition: BBoxUtility.cs:43
List< int > ApplyNMS(List< NormalizedBBox > rgBBoxes, List< float > rgScores, float fThreshold, int nTopK, bool bReuseOverlaps, out Dictionary< int, Dictionary< int, float > > rgOverlaps)
Do non maximum supression given bboxes and scores.
Definition: BBoxUtility.cs:328
List< Dictionary< int, List< float > > > GetConfidenceScores(float[] rgConfData, int nNum, int nNumPredsPerClass, int nNumClasses)
Calculate the confidence predictions from rgConfData.
Definition: BBoxUtility.cs:760
NormalizedBBox Decode(NormalizedBBox prior_bbox, List< float > rgfPriorVariance, PriorBoxParameter.CodeType code_type, bool bEncodeVarianceInTarget, bool bClip, NormalizedBBox bbox)
Decode a bounding box.
NormalizedBBox Locate(NormalizedBBox srcBbox, NormalizedBBox bbox)
Locate bbox in the coordinate system of the source Bbox.
NormalizedBBox Scale(NormalizedBBox bbox, int nHeight, int nWidth)
Scale the BBox to a set range.
List< LabelBBox > DecodeAll(List< LabelBBox > rgAllLocPreds, List< NormalizedBBox > rgPriorBboxes, List< List< float > > rgrgfPrioVariances, int nNum, bool bShareLocation, int nNumLocClasses, int nBackgroundLabelId, PriorBoxParameter.CodeType codeType, bool bVarianceEncodedInTarget, bool bClip)
Decode all bboxes in a batch.
void ApplyNMSFast(List< NormalizedBBox > rgBBoxes, List< float > rgScores, float fScoreThreshold, float fNmsThreshold, float fEta, int nTopK, out List< int > rgIndices)
Do a fast non maximum supression given bboxes and scores.
Definition: BBoxUtility.cs:263
Dictionary< int, Dictionary< int, List< NormalizedBBox > > > GetDetectionResults(float[] rgData, int nNumDet, int nBackgroundLabelId)
Get detection results from rgData.
Definition: BBoxUtility.cs:435
void Extrapolate(ResizeParameter param, int nHeight, int nWidth, NormalizedBBox crop_bbox, NormalizedBBox bbox)
Extrapolate the transformed bbox if height_scale and width_scale are explicitly provied,...
bool IsEligibleMining(MultiBoxLossParameter.MiningType miningType, int nMatchIdx, float fMatchOverlap, float fNegOverlap)
Returns whether or not mining is eligible given the mining type and match index.
List< NormalizedBBox > Decode(List< NormalizedBBox > rgPriorBbox, List< List< float > > rgrgfPriorVariance, PriorBoxParameter.CodeType code_type, bool bEncodeVarianceInTarget, bool bClip, List< NormalizedBBox > rgBbox)
Decode a set of bounding box.
bool IsCrossBoundary(NormalizedBBox bbox)
Returns whether or not the bbox is overlaps outside the range [0,1]
List< List< float > > ComputeLocLoss(Blob< T > blobLocPred, Blob< T > blobLocGt, List< DictionaryMap< List< int > > > rgAllMatchIndices, int nNum, int nNumPriors, MultiBoxLossParameter.LocLossType lossType)
Compute the localization loss per matched prior.
Dictionary< int, LabelBBox > GetGroundTruthEx(float[] rgGtData, int nNumGt, int nBackgroundLabelId, bool bUseDifficultGt)
Create a set of ground truth bounding boxes from the rgGtData.
Definition: BBoxUtility.cs:888
float JaccardOverlap(NormalizedBBox bbox1, NormalizedBBox bbox2, bool bNormalized=true)
Calculates the Jaccard overlap between two bounding boxes.
NormalizedBBox Intersect(NormalizedBBox bbox1, NormalizedBBox bbox2)
Create the intersection of two bounding boxes.
List< List< float > > ComputeConfLoss(float[] rgConfData, int nNum, int nNumPredsPerClass, int nNumClasses, int nBackgroundLabelId, MultiBoxLossParameter.ConfLossType loss_type, List< DictionaryMap< List< int > > > rgAllMatchIndices, DictionaryMap< List< NormalizedBBox > > rgAllGtBoxes)
Compute the confidence loss for each prior from rgConfData.
Definition: BBoxUtility.cs:634
void EncodeLocPrediction(List< LabelBBox > rgAllLocPreds, DictionaryMap< List< NormalizedBBox > > rgAllGtBboxes, List< DictionaryMap< List< int > > > rgAllMatchIndices, List< NormalizedBBox > rgPriorBboxes, List< List< float > > rgrgPriorVariances, MultiBoxLossParameter p, Blob< T > blobLocPred, Blob< T > blobLocGt)
Encode the localization prediction and ground truth for each matched prior.
int MineHardExamples(Blob< T > blobConf, List< LabelBBox > rgAllLocPreds, DictionaryMap< List< NormalizedBBox > > rgAllGtBBoxes, List< NormalizedBBox > rgPriorBboxes, List< List< float > > rgrgPriorVariances, List< DictionaryMap< List< float > > > rgAllMatchOverlaps, MultiBoxLossParameter p, List< DictionaryMap< List< int > > > rgAllMatchIndices, List< List< int > > rgAllNegIndices, out int nNumNegs)
Mine the hard examples from the batch.
void EncodeConfPrediction(float[] rgfConfData, int nNum, int nNumPriors, MultiBoxLossParameter p, List< DictionaryMap< List< int > > > rgAllMatchIndices, List< List< int > > rgAllNegIndices, DictionaryMap< List< NormalizedBBox > > rgAllGtBBoxes, Blob< T > blobConfPred, Blob< T > blobConfGt)
Encode the confidence predictions and ground truth for each matched prior.
float ComputeAP(List< Tuple< float, int > > rgTp, int nNumPos, List< Tuple< float, int > > rgFp, ApVersion apVersion, out List< float > rgPrec, out List< float > rgRec)
Compute the average precision given true positive and false positive vectors.
Definition: BBoxUtility.cs:69
NormalizedBBox Encode(NormalizedBBox prior_bbox, List< float > rgfPriorVariance, PriorBoxParameter.CodeType code_type, bool bEncodeVarianceInTarget, NormalizedBBox bbox)
Encode a bounding box.
void Match(List< NormalizedBBox > rgGtBboxes, List< NormalizedBBox > rgPredBboxes, int nLabel, MultiBoxLossParameter.MatchType match_type, float fOverlapThreshold, bool bIgnoreCrossBoundaryBbox, out List< int > rgMatchIndices, out List< float > rgMatchOverlaps)
Find matches between a list of two bounding boxes.
Definition: BBoxUtility.cs:935
NormalizedBBox Output(NormalizedBBox bbox, SizeF szImg, ResizeParameter p)
Output the predicted bbox on the actual image.
List< List< float > > ComputeConfLoss(float[] rgConfData, int nNum, int nNumPredsPerClass, int nNumClasses, int nBackgroundLabelId, MultiBoxLossParameter.ConfLossType loss_type, List< Dictionary< int, List< int > > > rgAllMatchIndices, Dictionary< int, List< NormalizedBBox > > rgAllGtBoxes)
Compute the confidence loss for each prior from rgConfData.
Definition: BBoxUtility.cs:657
int CountNumMatches(List< DictionaryMap< List< int > > > rgAllMatchIndices, int nNum)
Counts the number of matches in the list of maps.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
T[] mutable_cpu_data
Get data from the GPU and bring it over to the host, or Set data from the Host and send it over to th...
Definition: Blob.cs:1461
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
void sub(int n, long hA, long hB, long hY, int nAOff=0, int nBOff=0, int nYOff=0, int nB=0)
Subtracts B from A and places the result in Y.
Definition: CudaDnn.cs:7312
bool Active
When active, the parameter is used, otherwise it is ignored.
Specifies the parameters for the EmitConstraint used with SSD.
EmitType emit_type
Get/set the emit type.
float emit_overlap
Get/set the emit overlap used with MIN_OVERLAP.
EmitType
Specifies the emit type.
Specifies the parameters for the MultiBoxLossParameter.
float overlap_threshold
Get/set the overlap threshold (default = 0.5).
MatchType
Defines the matching method used during training.
PriorBoxParameter.CodeType code_type
Get/set the coding method for the bounding box.
float neg_overlap
Get/set the negative overlap upperbound for the unmatched predictions (default = 0....
LocLossType
Defines the localization loss types.
int sample_size
Get/set the number of samples (default = 64).
float neg_pos_ratio
Get/set the negative/positive ratio (default = 3.0).
bool share_location
Get/sets whether or not the bounding box is shared among different classes (default = true).
MiningType
Defines the mining type used during training.
NonMaximumSuppressionParameter nms_param
Get/set the parameters used for the non maximum suppression during hard example training.
LocLossType loc_loss_type
Get/set the localization loss type (default = SMOOTH_L1).
uint background_label_id
Get/set the background label id.
ConfLossType
Defines the confidence loss types.
bool encode_variance_in_target
Get/set whether or not to encode the variance of the prior box in the loc loss target instead of in t...
bool ignore_cross_boundary_bbox
Get/set whether or not to ignore cross boundary bbox during matching (default = false)....
bool map_object_to_agnostic
Get/set whether or not to map all object classes to an agnostic class (default = false)....
bool? do_neg_mining
DEPRECIATED: Get/set whether or not to perform negative mining (default = false).
bool bp_inside
Get/set whether or not to only backpropagate on corners which are inside of the image region when enc...
ConfLossType conf_loss_type
Get/set the confidence loss type (default = SOFTMAX).
bool use_prior_for_matching
Get/set whether or not to use prior for matching.
uint num_classes
Get/set the number of classes to be predicted - required!
MiningType mining_type
Get/set the mining type used during training (default = MAX_NEGATIVE).
bool use_prior_for_nms
Get/set whether or not to use the prior bbox for nms.
MatchType match_type
Get/set the matching method used during training (default = PER_PREDICTION).
int? top_k
Get/set the maximum number of results kept.
float nms_threshold
Get/set the threshold to be used in nms.
Specifies the parameters for the PriorBoxParameter.
CodeType
Defines the encode/decode type.
Specifies the parameters for the ResizeParameter for use with SSD.
uint height
Get/set the resizing height.
uint width
Get/set the resizing width.
ResizeMode
Defines the resizing mode.
ResizeMode resize_mode
Get/set the resizing mode.
uint width_scale
Get/set the resizing width scale used with FIT_SMALL_SIZE_mode.
uint height_scale
Get/set the resizing height scale used with FIT_SMALL_SIZE mode.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
ApVersion
Defines the different way of computing average precision.
Definition: Interfaces.cs:234
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe.param.ssd namespace contains all SSD related parameter objects that correspond to the nat...
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12