MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
DetectionEvaluateLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Drawing;
4using System.IO;
5using System.Linq;
6using System.Text;
7using MyCaffe.basecode;
8using MyCaffe.common;
9using MyCaffe.fillers;
10using MyCaffe.param;
11using MyCaffe.param.ssd;
12
13namespace MyCaffe.layers.ssd
14{
24 public class DetectionEvaluateLayer<T> : Layer<T>
25 {
26 int m_nNumClasses;
27 int m_nBackgroundLabelId;
28 float m_fOverlapThreshold;
29 bool m_bEvaluateDifficultGt;
30 List<SizeF> m_rgSizes = new List<SizeF>();
31 int m_nCount;
32 bool m_bUseNormalizedBbox;
33 ResizeParameter m_resizeParam = null;
34 BBoxUtility<T> m_bboxUtil;
35
44 : base(cuda, log, p)
45 {
46 m_type = LayerParameter.LayerType.DETECTION_EVALUATE;
47 m_bboxUtil = new BBoxUtility<T>(cuda, log);
48 }
49
51 protected override void dispose()
52 {
53 if (m_bboxUtil != null)
54 {
55 m_bboxUtil.Dispose();
56 m_bboxUtil = null;
57 }
58
59 base.dispose();
60 }
61
65 public override int ExactNumBottomBlobs
66 {
67 get { return 2; }
68 }
69
73 public override int ExactNumTopBlobs
74 {
75 get { return 1; }
76 }
77
83 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
84 {
85 m_log.CHECK_GT(m_param.detection_evaluate_param.num_classes, 1, "There must be at least one class!");
86 m_nNumClasses = (int)m_param.detection_evaluate_param.num_classes;
87
88 m_nBackgroundLabelId = (int)m_param.detection_evaluate_param.background_label_id;
90 m_log.CHECK_GT(m_fOverlapThreshold, 0.0f, "The overlap_threshold must be non-negative.");
91
93
95 {
96 using (StreamReader sr = new StreamReader(m_param.detection_evaluate_param.name_size_file))
97 {
98 string strLine = sr.ReadLine();
99
100 while (strLine != null)
101 {
102 string[] rgstr = strLine.Split(' ', ',');
103 if (rgstr.Length == 3 || rgstr.Length == 4)
104 {
105 int nNameIdx = (rgstr.Length == 4) ? 1 : 0;
106 string strName = rgstr[nNameIdx].Trim(',');
107 int nHeight = int.Parse(rgstr[nNameIdx + 1].Trim(','));
108 int nWidth = int.Parse(rgstr[nNameIdx + 2].Trim(','));
109
110 m_rgSizes.Add(new SizeF(nWidth, nHeight));
111 }
112
113 strLine = sr.ReadLine();
114 }
115 }
116 }
117
118 m_nCount = 0;
119
120 // If there is no name_size_provided, use normalized bbox to evaluate.
121 m_bUseNormalizedBbox = (m_rgSizes.Count == 0) ? true : false;
122
123 // Retrieve resize parameter if there is one provided.
125 }
126
132 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
133 {
134 m_log.CHECK_LE(m_nCount, m_rgSizes.Count, "The count must be less than or equal to the number of Sizes.");
135 m_log.CHECK_EQ(colBottom[0].num, 1, "The bottom[0].num must = 1.");
136 m_log.CHECK_EQ(colBottom[0].channels, 1, "The bottom[0].channels must = 1.");
137 m_log.CHECK_EQ(colBottom[0].width, 7, "The bottom[0].width must = 7.");
138 m_log.CHECK_EQ(colBottom[1].num, 1, "The bottom[1].num must = 1.");
139 m_log.CHECK_EQ(colBottom[1].channels, 1, "The bottom[1].channels must = 1.");
140 m_log.CHECK_EQ(colBottom[1].width, 8, "The bottom[1].width must = 8.");
141
142 // num() and channels() are 1.
143 List<int> rgTopShape = Utility.Create<int>(2, 1);
144 int nNumPosClasses = (m_nBackgroundLabelId == -1) ? m_nNumClasses : m_nNumClasses - 1;
145 int nNumValidDet = 0;
146 int nOffset = 0;
147
148 float[] rgfDetData = convertF(colBottom[0].mutable_cpu_data);
149 for (int i = 0; i < colBottom[0].height; i++)
150 {
151 if (rgfDetData[1 + nOffset] != -1)
152 nNumValidDet++;
153
154 nOffset += 7;
155 }
156
157 rgTopShape.Add(nNumPosClasses + nNumValidDet);
158
159 // Each row is a 5 dimension vector, which stores
160 // [image_id, label, confidence, true_pos, false_pos]
161 rgTopShape.Add(5);
162
163 colTop[0].Reshape(rgTopShape);
164 }
165
166 private int sortBboxDescending(NormalizedBBox b1, NormalizedBBox b2)
167 {
168 if (b1.score < b2.score)
169 return 1;
170
171 if (b1.score > b2.score)
172 return -1;
173
174 return 0;
175 }
176
187 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
188 {
189 float[] rgfDetData = convertF(colBottom[0].mutable_cpu_data);
190 float[] rgfGtData = convertF(colBottom[1].mutable_cpu_data);
191
192 // Retrieve all detection results.
193 Dictionary<int, Dictionary<int, List<NormalizedBBox>>> rgAllDetections = m_bboxUtil.GetDetectionResults(rgfDetData, colBottom[0].height, m_nBackgroundLabelId);
194
195 // Retrieve all ground truth (including difficult ones).
196 Dictionary<int, LabelBBox> rgAllGtBboxes = m_bboxUtil.GetGroundTruthEx(rgfGtData, colBottom[1].height, m_nBackgroundLabelId, true);
197
198 colTop[0].SetData(0);
199 float[] rgfTopData = convertF(colTop[0].mutable_cpu_data);
200 int nNumDet = 0;
201
202 // Insert number of ground truth for each label.
203 Dictionary<int, int> rgNumPos = new Dictionary<int, int>();
204 List<KeyValuePair<int, LabelBBox>> rgAllGtBboxList = rgAllGtBboxes.ToList();
205
206 foreach (KeyValuePair<int, LabelBBox> kv in rgAllGtBboxList)
207 {
208 List<KeyValuePair<int, List<NormalizedBBox>>> kvLabels = kv.Value.ToList();
209 foreach (KeyValuePair<int, List<NormalizedBBox>> kvLabel in kvLabels)
210 {
211 int nCount = 0;
212
213 if (m_bEvaluateDifficultGt)
214 {
215 nCount = kvLabel.Value.Count;
216 }
217 else
218 {
219 // Get number of non difficult ground truth.
220 for (int i = 0; i < kvLabel.Value.Count; i++)
221 {
222 if (!kvLabel.Value[i].difficult)
223 nCount++;
224 }
225 }
226
227 if (!rgNumPos.ContainsKey(kvLabel.Key))
228 rgNumPos.Add(kvLabel.Key, nCount);
229 else
230 rgNumPos[kvLabel.Key] += nCount;
231 }
232 }
233
234 for (int c = 0; c < m_nNumClasses; c++)
235 {
236 if (c == m_nBackgroundLabelId)
237 continue;
238
239 rgfTopData[nNumDet * 5 + 0] = -1;
240 rgfTopData[nNumDet * 5 + 1] = c;
241
242 if (!rgNumPos.ContainsKey(c))
243 rgfTopData[nNumDet * 5 + 2] = 0;
244 else
245 rgfTopData[nNumDet * 5 + 2] = rgNumPos[c];
246
247 rgfTopData[nNumDet * 5 + 3] = -1;
248 rgfTopData[nNumDet * 5 + 4] = -1;
249 nNumDet++;
250 }
251
252 // Insert detection evaluate status.
253 foreach (KeyValuePair<int, Dictionary<int, List<NormalizedBBox>>> kv in rgAllDetections)
254 {
255 int nImageId = kv.Key;
256 Dictionary<int, List<NormalizedBBox>> detections = kv.Value;
257
258 // No ground truth for current image. All detections become false_pos.
259 if (!rgAllGtBboxes.ContainsKey(nImageId))
260 {
261 List<KeyValuePair<int, List<NormalizedBBox>>> kvLabels = detections.OrderBy(p => p.Key).ToList();
262 foreach (KeyValuePair<int, List<NormalizedBBox>> kvLabel in kvLabels)
263 {
264 int nLabel = kvLabel.Key;
265 if (nLabel == -1)
266 continue;
267
268 List<NormalizedBBox> bboxes = kvLabel.Value;
269 for (int i = 0; i < bboxes.Count; i++)
270 {
271 rgfTopData[nNumDet * 5 + 0] = nImageId;
272 rgfTopData[nNumDet * 5 + 1] = nLabel;
273 rgfTopData[nNumDet * 5 + 2] = bboxes[i].score;
274 rgfTopData[nNumDet * 5 + 3] = 0;
275 rgfTopData[nNumDet * 5 + 4] = 1;
276 nNumDet++;
277 }
278 }
279 }
280
281 // Gound truth's exist for current image.
282 else
283 {
284 LabelBBox label_bboxes = rgAllGtBboxes[nImageId];
285
286 List<KeyValuePair<int, List<NormalizedBBox>>> kvLabels = detections.OrderBy(p => p.Key).ToList();
287 foreach (KeyValuePair<int, List<NormalizedBBox>> kvLabel in kvLabels)
288 {
289 int nLabel = kvLabel.Key;
290 if (nLabel == -1)
291 continue;
292
293 List<NormalizedBBox> bboxes = kvLabel.Value;
294
295 // No ground truth for current label. All detectiosn become false_pos
296 if (!label_bboxes.Contains(nLabel))
297 {
298 for (int i = 0; i < bboxes.Count; i++)
299 {
300 rgfTopData[nNumDet * 5 + 0] = nImageId;
301 rgfTopData[nNumDet * 5 + 1] = nLabel;
302 rgfTopData[nNumDet * 5 + 2] = bboxes[i].score;
303 rgfTopData[nNumDet * 5 + 3] = 0;
304 rgfTopData[nNumDet * 5 + 4] = 1;
305 nNumDet++;
306 }
307 }
308
309 // Ground truth for current label found.
310 else
311 {
312 List<NormalizedBBox> gt_bboxes = label_bboxes[nLabel];
313 // Scale ground truth if needed.
314 if (!m_bUseNormalizedBbox)
315 {
316 m_log.CHECK_LE(m_nCount, m_rgSizes.Count, "The count must be <= the sizes count.");
317 for (int i = 0; i < gt_bboxes.Count; i++)
318 {
319 gt_bboxes[i] = m_bboxUtil.Output(gt_bboxes[i], m_rgSizes[m_nCount], m_resizeParam);
320 }
321 }
322
323 List<bool> rgbVisited = Utility.Create<bool>(gt_bboxes.Count, false);
324
325 // Sort detections in decending order based on scores.
326 if (bboxes.Count > 1)
327 bboxes.Sort(new Comparison<NormalizedBBox>(sortBboxDescending));
328
329 for (int i = 0; i < bboxes.Count; i++)
330 {
331 rgfTopData[nNumDet * 5 + 0] = nImageId;
332 rgfTopData[nNumDet * 5 + 1] = nLabel;
333 rgfTopData[nNumDet * 5 + 2] = bboxes[i].score;
334
335 if (!m_bUseNormalizedBbox)
336 bboxes[i] = m_bboxUtil.Output(bboxes[i], m_rgSizes[m_nCount], m_resizeParam);
337
338 // Compare with each ground truth bbox.
339 float fOverlapMax = -1;
340 int nJmax = -1;
341
342 for (int j = 0; j < gt_bboxes.Count; j++)
343 {
344 float fOverlap = m_bboxUtil.JaccardOverlap(bboxes[i], gt_bboxes[j], m_bUseNormalizedBbox);
345 if (fOverlap > fOverlapMax)
346 {
347 fOverlapMax = fOverlap;
348 nJmax = j;
349 }
350 }
351
352 if (fOverlapMax >= m_fOverlapThreshold)
353 {
354 if (m_bEvaluateDifficultGt || (!m_bEvaluateDifficultGt && !gt_bboxes[nJmax].difficult))
355 {
356 // True positive.
357 if (!rgbVisited[nJmax])
358 {
359 rgfTopData[nNumDet * 5 + 3] = 1;
360 rgfTopData[nNumDet * 5 + 4] = 0;
361 rgbVisited[nJmax] = true;
362 }
363 // False positive (multiple detectioN).
364 else
365 {
366 rgfTopData[nNumDet * 5 + 3] = 0;
367 rgfTopData[nNumDet * 5 + 4] = 1;
368 }
369 }
370 }
371 else
372 {
373 // False positive.
374 rgfTopData[nNumDet * 5 + 3] = 0;
375 rgfTopData[nNumDet * 5 + 4] = 1;
376 }
377
378 nNumDet++;
379 }
380 }
381 }
382 }
383
384 if (m_rgSizes.Count > 0)
385 {
386 m_nCount++;
387
388 // Reset count after a full iteration through the DB.
389 if (m_nCount == m_rgSizes.Count)
390 m_nCount = 0;
391 }
392 }
393
394 colTop[0].mutable_cpu_data = convert(rgfTopData);
395 }
396
403 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
404 {
405 throw new NotImplementedException();
406 }
407 }
408}
The LabelBBox manages a bounding box used in SSD.
Definition: LabelBBox.cs:17
List< KeyValuePair< int, List< NormalizedBBox > > > ToList()
Returns the internal dictionary of items as a list.
Definition: LabelBBox.cs:31
The Log class provides general output in text form.
Definition: Log.cs:13
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
void CHECK_GT(double df1, double df2, string str)
Test whether one number is greater than another.
Definition: Log.cs:299
void CHECK_LE(double df1, double df2, string str)
Test whether one number is less than or equal to another.
Definition: Log.cs:263
The NormalizedBBox manages a bounding box used in SSD.
float score
Get/set the score.
The Utility class provides general utility funtions.
Definition: Utility.cs:35
static List< int > Create(int nCount, int nStart, int nInc)
Create a new List and fill it with values starting with start and incrementing by inc.
Definition: Utility.cs:721
The BBox class processes the NormalizedBBox data used with SSD.
Definition: BBoxUtility.cs:22
void Dispose()
Clean up all resources.
Definition: BBoxUtility.cs:43
Dictionary< int, Dictionary< int, List< NormalizedBBox > > > GetDetectionResults(float[] rgData, int nNumDet, int nBackgroundLabelId)
Get detection results from rgData.
Definition: BBoxUtility.cs:435
Dictionary< int, LabelBBox > GetGroundTruthEx(float[] rgGtData, int nNumGt, int nBackgroundLabelId, bool bUseDifficultGt)
Create a set of ground truth bounding boxes from the rgGtData.
Definition: BBoxUtility.cs:888
float JaccardOverlap(NormalizedBBox bbox1, NormalizedBBox bbox2, bool bNormalized=true)
Calculates the Jaccard overlap between two bounding boxes.
NormalizedBBox Output(NormalizedBBox bbox, SizeF szImg, ResizeParameter p)
Output the predicted bbox on the actual image.
The BlobCollection contains a list of Blobs.
void SetData(double df)
Set all blob data to the value specified.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
void convert(BlobCollection< T > col)
Convert a collection of blobs from / to half size.
Definition: Layer.cs:535
float convertF(T df)
Converts a generic to a float value.
Definition: Layer.cs:1359
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
The DetectionEvaluateLayer generates the detection evaluation based on the DetectionOutputLayer and g...
override int ExactNumBottomBlobs
Returns the exact number of required bottom (input) Blobs: det res, gt
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Evaluate the detection output.
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: det
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
DetectionEvaluateLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The DetectionEvaluateLayer constructor.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Does not implement.
override void dispose()
Releases all GPU and host resources used by the Layer.
Specifies the base parameter for all layers.
DetectionEvaluateParameter detection_evaluate_param
Returns the parmeter set when initialized with LayerType.DETECTION_EVALUATE
LayerType
Specifies the layer type.
uint background_label_id
Specifies the background class.
bool evaulte_difficult_gt
Specifies whether or not to consider the ground truth for evaluation.
string name_size_file
Specifies the file which contains a list of names and sizes in the same order of the input database....
uint num_classes
Specifies the number of classes that are actually predicted - required!
float overlap_threshold
Specifies the threshold for deciding true/false positive.
ResizeParameter resize_param
Specifies the resize parameter used in converting the NormalizedBBox to the original size.
Specifies the parameters for the ResizeParameter for use with SSD.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe.fillers namespace contains all fillers including the Filler class.
The MyCaffe.layers.ssd namespace contains all Single-Shot MultiBox (SSD) related layers.
Definition: LayerFactory.cs:19
The MyCaffe.param.ssd namespace contains all SSD related parameter objects that correspond to the nat...
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12