MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
DetectionOutputLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Drawing;
4using System.IO;
5using System.Linq;
6using System.Text;
7using MyCaffe.basecode;
8using MyCaffe.common;
9using MyCaffe.data;
10using MyCaffe.fillers;
11using MyCaffe.param;
12using MyCaffe.param.ssd;
13
14namespace MyCaffe.layers.ssd
15{
24 public class DetectionOutputLayer<T> : Layer<T>
25 {
26 int m_nNumClasses;
27 bool m_bShareLocations;
28 int m_nNumLocClasses;
29 int m_nBackgroundLabelId;
30 PriorBoxParameter.CodeType m_codeType;
31 bool m_bVarianceEncodedInTarget;
32 int m_nKeepTopK;
33 float m_fConfidenceThreshold;
34 int m_nNumPriors;
35 float m_fNmsThreshold;
36 int m_nTopK;
37 float m_fEta;
38
39 bool m_bNeedSave = false;
40 string m_strOutputDir;
41 string m_strOutputNamePrefix;
43 Dictionary<int, string> m_rgLabelToName = new Dictionary<int, string>();
44 Dictionary<int, string> m_rgLabelToDisplayName = new Dictionary<int, string>();
45 List<string> m_rgstrNames = new List<string>();
46 List<SizeF> m_rgSizes = new List<SizeF>();
47 int m_nNumTestImage;
48 int m_nNameCount;
49 ResizeParameter m_resizeParam = null;
50
51 PropertyTree m_detections = new PropertyTree();
52
53 bool m_bVisualize;
54 float m_fVisualizeThreshold;
55 DataTransformer<T> m_transformer;
56 string m_strSaveFile;
57 Blob<T> m_blobBboxPreds;
58 Blob<T> m_blobBboxPermute;
59 Blob<T> m_blobConfPermute;
60 BBoxUtility<T> m_bboxUtil;
61
70 : base(cuda, log, p)
71 {
72 m_type = LayerParameter.LayerType.DETECTION_OUTPUT;
73 m_blobBboxPreds = new Blob<T>(cuda, log);
74 m_blobBboxPreds.Name = m_param.name + " bbox preds";
75 m_blobBboxPermute = new Blob<T>(cuda, log);
76 m_blobBboxPermute.Name = m_param.name + " bbox permute";
77 m_blobConfPermute = new Blob<T>(cuda, log);
78 m_blobConfPermute.Name = m_param.name + " bbox conf";
79 m_bboxUtil = new BBoxUtility<T>(cuda, log);
80 }
81
83 protected override void dispose()
84 {
85 dispose(ref m_blobBboxPreds);
86 dispose(ref m_blobBboxPermute);
87 dispose(ref m_blobConfPermute);
88
89 if (m_bboxUtil != null)
90 {
91 m_bboxUtil.Dispose();
92 m_bboxUtil = null;
93 }
94
95 if (m_transformer != null)
96 {
97 m_transformer.Dispose();
98 m_transformer = null;
99 }
100
101 base.dispose();
102 }
103
105 protected override void setup_internal_blobs(BlobCollection<T> col)
106 {
107 if (col.Count > 0)
108 return;
109
110 col.Add(m_blobBboxPreds);
111 col.Add(m_blobBboxPermute);
112 col.Add(m_blobConfPermute);
113 }
114
118 public override int MinBottomBlobs
119 {
120 get { return 3; }
121 }
122
126 public override int MaxBottomBlobs
127 {
128 get { return 4; }
129 }
130
134 public override int ExactNumTopBlobs
135 {
136 get { return 1; }
137 }
138
144 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
145 {
146 m_log.CHECK_GT(m_param.detection_output_param.num_classes, 0, "There must be at least one class specified.");
147 m_nNumClasses = (int)m_param.detection_output_param.num_classes;
148
150 m_nNumLocClasses = (m_bShareLocations) ? 1 : m_nNumClasses;
153 m_bVarianceEncodedInTarget = m_param.detection_output_param.variance_encoded_in_target;
155 m_fConfidenceThreshold = m_param.detection_output_param.confidence_threshold.GetValueOrDefault(-float.MaxValue);
156
157 // Parameters used in nms.
159 m_log.CHECK_GE(m_fNmsThreshold, 0, "The nms_threshold must be non negative.");
161 m_log.CHECK_GT(m_fEta, 0, "The nms_param.eta must be > 0.");
162 m_log.CHECK_LE(m_fEta, 1, "The nms_param.eta must be < 0.");
163
164 m_nTopK = m_param.detection_output_param.nms_param.top_k.GetValueOrDefault(-1);
165
167 m_bNeedSave = !string.IsNullOrEmpty(m_strOutputDir);
168 if (m_bNeedSave && !Directory.Exists(m_strOutputDir))
169 Directory.CreateDirectory(m_strOutputDir);
170
173
175 {
177 if (!File.Exists(strLabelMapFile))
178 {
179 // Ignore saving if there is no label map file.
180 m_log.WriteLine("WARNING: Could not find the label_map_file '" + strLabelMapFile + "'!");
181 m_bNeedSave = false;
182 }
183 else
184 {
185 LabelMap label_map;
186
187 try
188 {
189 RawProto proto = RawProto.FromFile(strLabelMapFile);
190 label_map = LabelMap.FromProto(proto);
191 }
192 catch (Exception excpt)
193 {
194 throw new Exception("Failed to read label map file!", excpt);
195 }
196
197 try
198 {
199 m_rgLabelToName = label_map.MapToName(m_log, true, false);
200 }
201 catch (Exception excpt)
202 {
203 throw new Exception("Failed to convert the label to name!", excpt);
204 }
205
206 try
207 {
208 m_rgLabelToDisplayName = label_map.MapToName(m_log, true, true);
209 }
210 catch (Exception excpt)
211 {
212 throw new Exception("Failed to convert the label to display name!", excpt);
213 }
214 }
215 }
216 else
217 {
218 m_bNeedSave = false;
219 }
220
222 {
224 if (!File.Exists(strNameSizeFile))
225 {
226 // Ignore saving if there is no name size file.
227 m_log.WriteLine("WARNING: Could not find the name_size_file '" + strNameSizeFile + "'!");
228 m_bNeedSave = false;
229 }
230 else
231 {
232 using (StreamReader sr = new StreamReader(strNameSizeFile))
233 {
234 string strName;
235 int nHeight;
236 int nWidth;
237
238 string strLine = sr.ReadLine();
239 while (strLine != null)
240 {
241 string[] rgstr = strLine.Split(' ');
242 if (rgstr.Length != 3 && rgstr.Length != 4)
243 throw new Exception("Invalid name_size_file format, expected 'name' 'height' 'width'");
244
245 int nNameIdx = (rgstr.Length == 4) ? 1 : 0;
246 strName = rgstr[nNameIdx].Trim(',');
247 nHeight = int.Parse(rgstr[nNameIdx + 1].Trim(','));
248 nWidth = int.Parse(rgstr[nNameIdx + 2].Trim(','));
249
250 m_rgstrNames.Add(strName);
251 m_rgSizes.Add(new SizeF(nWidth, nHeight));
252
253 strLine = sr.ReadLine();
254 }
255 }
256
259 else
260 m_nNumTestImage = m_rgstrNames.Count;
261
262 m_log.CHECK_LE(m_nNumTestImage, m_rgstrNames.Count, "The number of test images cannot exceed the number of names.");
263 }
264 }
265 else
266 {
267 m_bNeedSave = false;
268 }
269
272
273 m_nNameCount = 0;
274
276 if (m_bVisualize)
277 {
278 m_fVisualizeThreshold = m_param.detection_output_param.visualize_threshold.GetValueOrDefault(0.6f);
279 m_transformer = new DataTransformer<T>(m_cuda, m_log, m_param.transform_param, m_phase, 0, 0, 0);
280 m_transformer.InitRand();
282 }
283
284 m_blobBboxPreds.ReshapeLike(colBottom[0]);
285
286 if (!m_bShareLocations)
287 m_blobBboxPermute.ReshapeLike(colBottom[0]);
288
289 m_blobConfPermute.ReshapeLike(colBottom[1]);
290 }
291
297 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
298 {
299 if (m_bNeedSave)
300 {
301 m_log.CHECK_LE(m_nNameCount, m_rgstrNames.Count, "The name count must be <= the number of names.");
302
303 if (m_nNameCount % m_nNumTestImage == 0)
304 {
305 // Clean all outputs.
306 if (m_outputFormat == SaveOutputParameter.OUTPUT_FORMAT.VOC)
307 {
308 string strDir = m_strOutputDir;
309
310 foreach (KeyValuePair<int, string> kv in m_rgLabelToName)
311 {
312 if (kv.Key == m_nBackgroundLabelId)
313 continue;
314
315 string strFile = strDir.TrimEnd('\\') + "\\" + kv.Value + ".txt";
316 if (File.Exists(strFile))
317 File.Delete(strFile);
318 }
319 }
320 }
321 }
322
323 m_log.CHECK_EQ(colBottom[0].num, colBottom[1].num, "The bottom[0] and bottom[1] must have the same 'num'.");
324
325 m_blobBboxPreds.ReshapeLike(colBottom[0]);
326
327 if (!m_bShareLocations)
328 m_blobBboxPermute.ReshapeLike(colBottom[0]);
329
330 m_blobConfPermute.ReshapeLike(colBottom[1]);
331
332 m_nNumPriors = colBottom[2].height / 4;
333 m_log.CHECK_EQ(m_nNumPriors * m_nNumLocClasses * 4, colBottom[0].channels, "The number of priors must match the number of location predictions (bottom[0]).");
334 m_log.CHECK_EQ(m_nNumPriors * m_nNumClasses, colBottom[1].channels, "The number of priors must match the number of confidence predictions (bottom[1]).");
335
336 // num() and channels() are 1.
337 List<int> rgTopShape = Utility.Create<int>(2, 1);
338 // Since the number of bboxes to be kept is unknown before nms, we manually set it to (fake) 1.
339 rgTopShape.Add(1);
340 // Each row is a 7 dimension vecotr, which stores:
341 // [image_id, label, confidence, xmin, ymin, xmax, ymax]
342 rgTopShape.Add(7);
343
344 colTop[0].Reshape(rgTopShape);
345 }
346
347 private string getFileName(string strLabel, string strExt)
348 {
349 string strFile = m_strOutputDir.TrimEnd('\\');
350 strFile += "\\";
351 strFile += m_strOutputNamePrefix;
352 strFile += strLabel;
353 strFile += ".";
354 strFile += strExt;
355
356 return strFile;
357 }
358
370 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
371 {
372 float[] rgfLocData = convertF(colBottom[0].mutable_cpu_data);
373 float[] rgfConfData = convertF(colBottom[1].mutable_cpu_data);
374 float[] rgfPriorData = convertF(colBottom[2].mutable_cpu_data);
375 int nNum = colBottom[0].num;
376
377 // Retrieve all location predictions.
378 List<LabelBBox> rgAllLocPreds = m_bboxUtil.GetLocPredictions(rgfLocData, nNum, m_nNumPriors, m_nNumLocClasses, m_bShareLocations);
379
380 // Retrieve all confidence scores.
381 List<Dictionary<int, List<float>>> rgAllConfScores = m_bboxUtil.GetConfidenceScores(rgfConfData, nNum, m_nNumPriors, m_nNumClasses);
382
383 // Retrieve all prior bboxes, which is the same within a batch since we assume all
384 // images in a batch are of the same dimension.
385 List<List<float>> rgrgPriorVariances;
386 List<NormalizedBBox> rgPriorBboxes = m_bboxUtil.GetPrior(rgfPriorData, m_nNumPriors, out rgrgPriorVariances);
387
388 // Decode all loc predictions to bboxes.
389 bool bClipBbox = false;
390 List<LabelBBox> rgAllDecodeBboxes = m_bboxUtil.DecodeAll(rgAllLocPreds, rgPriorBboxes, rgrgPriorVariances, nNum, m_bShareLocations, m_nNumLocClasses, m_nBackgroundLabelId, m_codeType, m_bVarianceEncodedInTarget, bClipBbox);
391
392 int nNumKept = 0;
393 List<Dictionary<int, List<int>>> rgAllIndices = new List<Dictionary<int, List<int>>>();
394
395 for (int i=0; i < nNum; i++)
396 {
397 LabelBBox decode_bboxes = rgAllDecodeBboxes[i];
398 Dictionary<int, List<float>> rgConfScores = rgAllConfScores[i];
399 Dictionary<int, List<int>> rgIndices = new Dictionary<int, List<int>>();
400 int nNumDet = 0;
401
402 for (int c = 0; c < m_nNumClasses; c++)
403 {
404 // Ignore background class.
405 if (c == m_nBackgroundLabelId)
406 continue;
407
408 // Something bad happened if there are no predictions for the current label.
409 if (!rgConfScores.ContainsKey(c))
410 m_log.FAIL("Could not find confidence predictions for label '" + c.ToString() + "'!");
411
412 List<float> rgfScores = rgConfScores[c];
413 int nLabel = (m_bShareLocations) ? -1 : c;
414
415 // Something bad happened if there are no locations for the current label.
416 if (!decode_bboxes.Contains(nLabel))
417 m_log.FAIL("Could not find location predictions for the label '" + nLabel.ToString() + "'!");
418
419 List<NormalizedBBox> rgBboxes = decode_bboxes[nLabel];
420 List<int> rgIndexes;
421 m_bboxUtil.ApplyNMSFast(rgBboxes, rgfScores, m_fConfidenceThreshold, m_fNmsThreshold, m_fEta, m_nTopK, out rgIndexes);
422 rgIndices[c] = rgIndexes;
423 nNumDet += rgIndices[c].Count;
424 }
425
426 if (m_nKeepTopK > -1 && nNumDet > m_nKeepTopK)
427 {
428 List<Tuple<float, Tuple<int, int>>> rgScoreIndexPairs = new List<Tuple<float, Tuple<int, int>>>();
429
430 foreach (KeyValuePair<int, List<int>> kv in rgIndices)
431 {
432 int nLabel = kv.Key;
433 List<int> rgLabelIndices = kv.Value;
434
435 // Something bad happend for the current label.
436 if (!rgConfScores.ContainsKey(nLabel))
437 m_log.FAIL("Could not find location predictions for label " + nLabel.ToString() + "!");
438
439 List<float> rgScores = rgConfScores[nLabel];
440 for (int j = 0; j < rgLabelIndices.Count; j++)
441 {
442 int nIdx = rgLabelIndices[j];
443 m_log.CHECK_LT(nIdx, rgScores.Count, "The current index must be less than the number of scores!");
444 rgScoreIndexPairs.Add(new Tuple<float, Tuple<int, int>>(rgScores[nIdx], new Tuple<int, int>(nLabel, nIdx)));
445 }
446 }
447
448 // Keep top k results per image.
449 rgScoreIndexPairs = rgScoreIndexPairs.OrderByDescending(p => p.Item1).ToList();
450 if (rgScoreIndexPairs.Count > m_nKeepTopK)
451 rgScoreIndexPairs = rgScoreIndexPairs.Take(m_nKeepTopK).ToList();
452
453 // Store the new indices.
454 Dictionary<int, List<int>> rgNewIndices = new Dictionary<int, List<int>>();
455 for (int j = 0; j < rgScoreIndexPairs.Count; j++)
456 {
457 int nLabel = rgScoreIndexPairs[j].Item2.Item1;
458 int nIdx = rgScoreIndexPairs[j].Item2.Item2;
459
460 if (!rgNewIndices.ContainsKey(nLabel))
461 rgNewIndices.Add(nLabel, new List<int>());
462
463 rgNewIndices[nLabel].Add(nIdx);
464 }
465
466 rgAllIndices.Add(rgNewIndices);
467 nNumKept += m_nKeepTopK;
468 }
469 else
470 {
471 rgAllIndices.Add(rgIndices);
472 nNumKept += nNumDet;
473 }
474 }
475
476 List<int> rgTopShape = Utility.Create<int>(2, 1);
477 rgTopShape.Add(nNumKept);
478 rgTopShape.Add(7);
479 float[] rgfTopData = null;
480
481 if (nNumKept == 0)
482 {
483 m_log.WriteLine("WARNING: Could not find any detections.");
484 rgTopShape[2] = nNum;
485 colTop[0].Reshape(rgTopShape);
486
487 colTop[0].SetData(-1);
488 rgfTopData = convertF(colTop[0].mutable_cpu_data);
489 int nOffset = 0;
490
491 // Generate fake results per image.
492 for (int i = 0; i < nNum; i++)
493 {
494 rgfTopData[nOffset + 0] = i;
495 nOffset += 7;
496 }
497 }
498 else
499 {
500 colTop[0].Reshape(rgTopShape);
501 rgfTopData = convertF(colTop[0].mutable_cpu_data);
502 }
503
504 int nCount = 0;
505 string strDir = m_strOutputDir;
506
507 for (int i = 0; i < nNum; i++)
508 {
509 Dictionary<int, List<float>> rgConfScores = rgAllConfScores[i];
510 LabelBBox decode_bboxes = rgAllDecodeBboxes[i];
511
512 foreach (KeyValuePair<int, List<int>> kv in rgAllIndices[i])
513 {
514 int nLabel = kv.Key;
515
516 // Something bad happened if there are no predictions for the current label.
517 if (!rgConfScores.ContainsKey(nLabel))
518 m_log.FAIL("Could not find confidence predictions for label '" + nLabel.ToString() + "'!");
519
520 List<float> rgfScores = rgConfScores[nLabel];
521 int nLocLabel = (m_bShareLocations) ? -1 : nLabel;
522
523 // Something bad happened if therea re no predictions for the current label.
524 if (!decode_bboxes.Contains(nLocLabel))
525 m_log.FAIL("COuld not find location predictions for label '" + nLabel.ToString() + "'!");
526
527 List<NormalizedBBox> rgBboxes = decode_bboxes[nLocLabel];
528 List<int> rgIndices = kv.Value;
529
530 if (m_bNeedSave)
531 {
532 m_log.CHECK(m_rgLabelToName.ContainsKey(nLabel), "The label to name mapping does not contain the label '" + nLabel.ToString() + "'!");
533 m_log.CHECK_LT(m_nNameCount, m_rgstrNames.Count, "The name count must be less than the number of names.");
534 }
535
536 for (int j = 0; j < rgIndices.Count; j++)
537 {
538 int nIdx = rgIndices[j];
539 rgfTopData[nCount * 7 + 0] = i;
540 rgfTopData[nCount * 7 + 1] = nLabel;
541 rgfTopData[nCount * 7 + 2] = rgfScores[nIdx];
542
543 NormalizedBBox bbox = rgBboxes[nIdx];
544 rgfTopData[nCount * 7 + 3] = bbox.xmin;
545 rgfTopData[nCount * 7 + 4] = bbox.ymin;
546 rgfTopData[nCount * 7 + 5] = bbox.xmax;
547 rgfTopData[nCount * 7 + 6] = bbox.ymax;
548
549 if (m_bNeedSave)
550 {
551 NormalizedBBox out_bbox = m_bboxUtil.Output(bbox, m_rgSizes[m_nNameCount], m_resizeParam);
552
553 float fScore = rgfTopData[nCount * 7 + 2];
554 float fXmin = out_bbox.xmin;
555 float fYmin = out_bbox.ymin;
556 float fXmax = out_bbox.xmax;
557 float fYmax = out_bbox.ymax;
558
559 PropertyTree pt_xmin = new PropertyTree();
560 pt_xmin.Put("", Math.Round(fXmin * 100) / 100);
561
562 PropertyTree pt_ymin = new PropertyTree();
563 pt_ymin.Put("", Math.Round(fYmin * 100) / 100);
564
565 PropertyTree pt_wd = new PropertyTree();
566 pt_wd.Put("", Math.Round((fXmax - fXmin) * 100) / 100);
567
568 PropertyTree pt_ht = new PropertyTree();
569 pt_ht.Put("", Math.Round((fYmax - fYmin) * 100) / 100);
570
571 PropertyTree cur_bbox = new PropertyTree();
572 cur_bbox.AddChild("", pt_xmin);
573 cur_bbox.AddChild("", pt_ymin);
574 cur_bbox.AddChild("", pt_wd);
575 cur_bbox.AddChild("", pt_ht);
576
577 PropertyTree cur_det = new PropertyTree();
578 cur_det.Put("image_id", m_rgstrNames[m_nNameCount]);
579 if (m_outputFormat == SaveOutputParameter.OUTPUT_FORMAT.ILSVRC)
580 cur_det.Put("category_id", nLabel);
581 else
582 cur_det.Put("category_id", m_rgLabelToName[nLabel]);
583
584 cur_det.AddChild("bbox", cur_bbox);
585 cur_det.Put("score", fScore);
586
587 m_detections.AddChild("", cur_det);
588 }
589
590 nCount++;
591 }
592 }
593
594 if (m_bNeedSave)
595 {
596 m_nNameCount++;
597
598 if (m_nNameCount % m_nNumTestImage == 0)
599 {
600 if (m_outputFormat == SaveOutputParameter.OUTPUT_FORMAT.VOC)
601 {
602 Dictionary<string, StreamWriter> rgOutFiles = new Dictionary<string, StreamWriter>();
603
604 for (int c = 0; c < m_nNumClasses; c++)
605 {
606 if (c == m_nBackgroundLabelId)
607 continue;
608
609 string strLabelName = m_rgLabelToName[c];
610 string strFile = getFileName(strLabelName, "txt");
611 rgOutFiles.Add(strLabelName, new StreamWriter(strFile));
612 }
613
614 foreach (PropertyTree pt in m_detections.Children)
615 {
616 string strLabel = pt.Get("category_id").Value;
617 if (!rgOutFiles.ContainsKey(strLabel))
618 {
619 m_log.WriteLine("WARNING! Cannot find '" + strLabel + "' label in the output files!");
620 continue;
621 }
622
623 string strImageName = pt.Get("image_id").Value;
624 float fScore = (float)pt.Get("score").Numeric;
625
626 List<int> bbox = new List<int>();
627 foreach (Property elm in pt.GetChildren("bbox"))
628 {
629 bbox.Add((int)elm.Numeric);
630 }
631
632 string strLine = strImageName;
633 strLine += " " + fScore.ToString();
634 strLine += " " + bbox[0].ToString() + " " + bbox[1].ToString();
635 strLine += " " + (bbox[0] + bbox[2]).ToString();
636 strLine += " " + (bbox[1] + bbox[3]).ToString();
637 rgOutFiles[strLabel].WriteLine(strLine);
638 }
639
640 for (int c = 0; c < m_nNumClasses; c++)
641 {
642 if (c == m_nBackgroundLabelId)
643 continue;
644
645 string strLabel = m_rgLabelToName[c];
646 rgOutFiles[strLabel].Flush();
647 rgOutFiles[strLabel].Close();
648 rgOutFiles[strLabel].Dispose();
649 }
650 }
651 else if (m_outputFormat == SaveOutputParameter.OUTPUT_FORMAT.COCO)
652 {
653 string strFile = getFileName("", "json");
654 using (StreamWriter sw = new StreamWriter(strFile))
655 {
656 PropertyTree output = new PropertyTree();
657 output.AddChild("detections", m_detections);
658 string strOut = output.ToJson();
659 sw.Write(strOut);
660 }
661 }
662 else if (m_outputFormat == SaveOutputParameter.OUTPUT_FORMAT.ILSVRC)
663 {
664 string strFile = getFileName("", "txt");
665 using (StreamWriter sw = new StreamWriter(strFile))
666 {
667 foreach (PropertyTree pt in m_detections.Children)
668 {
669 int nLabel = (int)pt.Get("category_id").Numeric;
670 string strImageName = pt.Get("image_id").Value;
671 float fScore = (float)pt.Get("score").Numeric;
672
673 List<int> bbox = new List<int>();
674 foreach (Property elm in pt.GetChildren("bbox"))
675 {
676 bbox.Add((int)elm.Numeric);
677 }
678
679 string strLine = strImageName;
680 strLine += " " + fScore.ToString();
681 strLine += " " + bbox[0].ToString() + " " + bbox[1].ToString();
682 strLine += " " + (bbox[0] + bbox[2]).ToString();
683 strLine += " " + (bbox[1] + bbox[3]).ToString();
684 sw.WriteLine(strLine);
685 }
686 }
687 }
688
689 m_nNameCount = 0;
690 m_detections.Clear();
691 }
692 }
693
694 if (m_bVisualize)
695 {
696#warning DetectionOutputLayer - does not visualize detections yet.
697 // TBD.
698 }
699 }
700
701 colTop[0].mutable_cpu_data = convert(rgfTopData);
702 colTop[0].type = BLOB_TYPE.MULTIBBOX;
703 }
704
711 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
712 {
713 throw new NotImplementedException();
714 }
715 }
716}
The LabelBBox manages a bounding box used in SSD.
Definition: LabelBBox.cs:17
bool Contains(int nLabel)
Returns whether or not the label is contained in the label bounding boxe set.
Definition: LabelBBox.cs:62
The Log class provides general output in text form.
Definition: Log.cs:13
void CHECK(bool b, string str)
Test a flag for true.
Definition: Log.cs:227
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
Definition: Log.cs:394
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
void CHECK_GT(double df1, double df2, string str)
Test whether one number is greater than another.
Definition: Log.cs:299
void CHECK_LE(double df1, double df2, string str)
Test whether one number is less than or equal to another.
Definition: Log.cs:263
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
Definition: Log.cs:287
void CHECK_LT(double df1, double df2, string str)
Test whether one number is less than another.
Definition: Log.cs:275
The NormalizedBBox manages a bounding box used in SSD.
float ymax
Get/set the y maximum.
float xmax
Get/set the x maximum.
float xmin
Get/set the x minimum.
float ymin
Get/set the y minimum.
The RawProto class is used to parse and output Google prototxt file data.
Definition: RawProto.cs:17
static RawProto FromFile(string strFileName)
Parses a prototxt from a file and returns it as a RawProto.
Definition: RawProto.cs:281
The Utility class provides general utility funtions.
Definition: Utility.cs:35
static List< int > Create(int nCount, int nStart, int nInc)
Create a new List and fill it with values starting with start and incrementing by inc.
Definition: Utility.cs:721
The BBox class processes the NormalizedBBox data used with SSD.
Definition: BBoxUtility.cs:22
List< NormalizedBBox > GetPrior(float[] rgPriorData, int nNumPriors, out List< List< float > > rgPriorVariances)
Get the prior boundary boxes from the rgPriorData.
Definition: BBoxUtility.cs:477
List< LabelBBox > GetLocPredictions(float[] rgLocData, int nNum, int nNumPredsPerClass, int nNumLocClasses, bool bShareLocation)
Create a set of local predictions from the rgLocData.
Definition: BBoxUtility.cs:802
void Dispose()
Clean up all resources.
Definition: BBoxUtility.cs:43
List< Dictionary< int, List< float > > > GetConfidenceScores(float[] rgConfData, int nNum, int nNumPredsPerClass, int nNumClasses)
Calculate the confidence predictions from rgConfData.
Definition: BBoxUtility.cs:760
List< LabelBBox > DecodeAll(List< LabelBBox > rgAllLocPreds, List< NormalizedBBox > rgPriorBboxes, List< List< float > > rgrgfPrioVariances, int nNum, bool bShareLocation, int nNumLocClasses, int nBackgroundLabelId, PriorBoxParameter.CodeType codeType, bool bVarianceEncodedInTarget, bool bClip)
Decode all bboxes in a batch.
void ApplyNMSFast(List< NormalizedBBox > rgBBoxes, List< float > rgScores, float fScoreThreshold, float fNmsThreshold, float fEta, int nTopK, out List< int > rgIndices)
Do a fast non maximum supression given bboxes and scores.
Definition: BBoxUtility.cs:263
NormalizedBBox Output(NormalizedBBox bbox, SizeF szImg, ResizeParameter p)
Output the predicted bbox on the actual image.
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
void SetData(double df)
Set all blob data to the value specified.
int Count
Returns the number of items in the collection.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
The Property class stores both a numeric and text value.
double? Numeric
Returns the numeric value.
string Value
Returns the text value.
The PropertyTree class implements a simple property tree similar to the ptree in Boost.
Definition: PropertyTree.cs:13
void Clear()
Clear all nodes and values from the tree.
Definition: PropertyTree.cs:92
List< PropertyTree > Children
Returns a list of all child property trees within the tree.
void Put(string str, string strVal)
Add a new property string value.
Definition: PropertyTree.cs:29
void AddChild(string str, PropertyTree pt)
Add a new child to the Property tree.
Definition: PropertyTree.cs:55
string ToJson()
Converts the property tree to a Json representation.
Property Get(string strName)
Retrieves a property at the current level of the tree.
Definition: PropertyTree.cs:68
List< Property > GetChildren(string strName)
Retrieves all properties with the given key at the current level of the tree.
Definition: PropertyTree.cs:81
Applies common transformations to the input data, such as scaling, mirroring, subtracting the image m...
virtual void InitRand()
Initialize the underlying random number generator.
void Dispose()
Cleanup all resources used.
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
void convert(BlobCollection< T > col)
Convert a collection of blobs from / to half size.
Definition: Layer.cs:535
float convertF(T df)
Converts a generic to a float value.
Definition: Layer.cs:1359
Phase m_phase
Specifies the Phase under which the Layer is run.
Definition: Layer.cs:51
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
The DetectionOutputLayer generates the detection output based on location and confidence predictions ...
override int MaxBottomBlobs
Returns the maximum number of bottom (input) Blobs: loc pred, conf pred, prior bbox
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Does not implement.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Do non-maximum suppression (nms) on prediction results.
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
override int MinBottomBlobs
Returns the minimum number of bottom (input) Blobs: loc pred, conf pred, prior bbox
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: det
DetectionOutputLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The DetectionOutputLayer constructor.
override void dispose()
Releases all GPU and host resources used by the Layer.
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
DetectionOutputParameter detection_output_param
Returns the parmeter set when initialized with LayerType.DETECTION_OUTPUT
TransformationParameter transform_param
Returns the parameter set when initialized with LayerType.TRANSFORM
LayerType
Specifies the layer type.
bool Active
When active, the parameter is used, otherwise it is ignored.
PriorBoxParameter.CodeType code_type
Specifies the coding method for the bbox.
bool visualize
Specifies whether or not to visualize the detection results.
float? confidence_threshold
Specifies the threshold for deciding which detections to consider - only those which are larger than ...
float? visualize_threshold
Specifies the theshold used to visualize detection results.
NonMaximumSuppressionParameter nms_param
Specifies the parameter used for non maximum suppression.
bool variance_encoded_in_target
Specifies whether or not the variance is encoded in the target; otherwise we need to adjust the predi...
string save_file
When provided, specifies the outputs to the video file.
bool share_location
Specifies whether or not to sare the bounding box is shared among different classes (default = true).
uint num_classes
Specifies the number of classes that are actually predicted - required!
SaveOutputParameter save_output_param
Specifies the parameter used for saving the detection results.
int background_label_id
Specifies the background class.
int keep_top_k
Specifies the number of total bboxes to be kept per image after nms step, -1 means keeping all bboxes...
Specifies the LabelMap used with SSD.
Definition: LabelMap.cs:22
Dictionary< int, string > MapToName(Log log, bool bStrict, bool bDisplayName)
Map the labels into a dictionary.
Definition: LabelMap.cs:71
static LabelMap FromProto(RawProto rp)
Parses the parameter from a RawProto.
Definition: LabelMap.cs:191
float eta
Get/set the parameter for adaptive nms.
int? top_k
Get/set the maximum number of results kept.
float nms_threshold
Get/set the threshold to be used in nms.
Specifies the parameters for the PriorBoxParameter.
CodeType
Defines the encode/decode type.
Specifies the parameters for the ResizeParameter for use with SSD.
Specifies the parameters for the SaveOutputLayer.
OUTPUT_FORMAT output_format
Specifies the output format.
string output_directory
Specifies the output directory - if not empty, the results will be saved.
string name_size_file
Optionally, specifies the output name size file.
ResizeParameter resize_param
Specifies the resize parameter used in saving the data.
string label_map_file
Optionally, specifies the output label map file.
OUTPUT_FORMAT
Defines the output format.
string output_name_prefix
Specifies the output name prefix.
uint? num_test_image
Specifies the number of test images.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
BLOB_TYPE
Defines the tpe of data held by a given Blob.
Definition: Interfaces.cs:62
The MyCaffe.data namespace contains dataset creators used to create common testing datasets such as M...
Definition: BinaryFile.cs:16
The MyCaffe.fillers namespace contains all fillers including the Filler class.
The MyCaffe.layers.ssd namespace contains all Single-Shot MultiBox (SSD) related layers.
Definition: LayerFactory.cs:19
The MyCaffe.param.ssd namespace contains all SSD related parameter objects that correspond to the nat...
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12