MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
PriorBoxLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using MyCaffe.basecode;
6using MyCaffe.common;
7using MyCaffe.fillers;
8using MyCaffe.param;
9using MyCaffe.param.ssd;
10
11namespace MyCaffe.layers.ssd
12{
22 public class PriorBoxLayer<T> : Layer<T>
23 {
24 List<float> m_rgfMinSizes = new List<float>();
25 List<float> m_rgfMaxSizes = new List<float>();
26 List<float> m_rgfAspectRatios = new List<float>();
27 bool m_bFlip;
28 int m_nNumPriors;
29 bool m_bClip;
30 List<float> m_rgfVariance = new List<float>();
31 int m_nImgW;
32 int m_nImgH;
33 float m_fStepW;
34 float m_fStepH;
35 float m_fOffset;
36
50 : base(cuda, log, p)
51 {
53 }
54
56 protected override void dispose()
57 {
58 base.dispose();
59 }
60
64 public override int ExactNumBottomBlobs
65 {
66 get { return 2; }
67 }
68
72 public override int ExactNumTopBlobs
73 {
74 get { return 1; }
75 }
76
82 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
83 {
85 m_log.CHECK_GT(p.min_size.Count, 0, "Must provied at least one min_size!");
86
87 for (int i = 0; i < p.min_size.Count; i++)
88 {
89 float fMin = p.min_size[i];
90 m_log.CHECK_GT(fMin, 0, "min_size must be positive greater than zero.");
91 m_rgfMinSizes.Add(fMin);
92 }
93
94 m_rgfAspectRatios = PriorBoxParameter.GetAspectRatios(p);
95 m_bFlip = p.flip;
96 m_nNumPriors = m_rgfAspectRatios.Count * m_rgfMinSizes.Count;
97
98 if (p.max_size.Count > 0)
99 {
100 m_log.CHECK_EQ(p.min_size.Count, p.max_size.Count, "The max_size count must equal the min_size count!");
101 for (int i = 0; i < p.max_size.Count; i++)
102 {
103 float fMax = p.max_size[i];
104 m_log.CHECK_GT(fMax, m_rgfMinSizes[i], "The max_size must be greater than the min_size.");
105 m_rgfMaxSizes.Add(fMax);
106 m_nNumPriors++;
107 }
108 }
109
110 m_bClip = p.clip;
111
112 if (p.variance.Count > 1)
113 {
114 // Must and only provide 4 variance values.
115 m_log.CHECK_EQ(p.variance.Count, 4, "Must only have 4 variance values.");
116
117 for (int i = 0; i < p.variance.Count; i++)
118 {
119 float fVar = p.variance[i];
120 m_log.CHECK_GT(fVar, 0, "The variance values must be greater than zero.");
121 m_rgfVariance.Add(fVar);
122 }
123 }
124 else if (p.variance.Count == 1)
125 {
126 float fVar = p.variance[0];
127 m_log.CHECK_GT(fVar, 0, "The variance value must be greater than zero.");
128 m_rgfVariance.Add(fVar);
129 }
130 else
131 {
132 // Set default to 0.1.
133 m_rgfVariance.Add(0.1f);
134 }
135
136 if (p.img_h.HasValue || p.img_w.HasValue)
137 {
138 m_log.CHECK(!p.img_size.HasValue, "Either img_size or img_h/img_w should be specified; but not both.");
139 m_nImgH = (int)p.img_h.Value;
140 m_log.CHECK_GT(m_nImgH, 0, "The img_h should be greater than 0.");
141 m_nImgW = (int)p.img_w.Value;
142 m_log.CHECK_GT(m_nImgW, 0, "The img_w should be greater than 0.");
143 }
144 else if (p.img_size.HasValue)
145 {
146 int nImgSize = (int)p.img_size.Value;
147 m_log.CHECK_GT(nImgSize, 0, "The img_size should be greater than 0.");
148 m_nImgH = nImgSize;
149 m_nImgW = nImgSize;
150 }
151 else
152 {
153 m_nImgH = 0;
154 m_nImgW = 0;
155 }
156
157 if (p.step_h.HasValue || p.step_w.HasValue)
158 {
159 m_log.CHECK(!p.step.HasValue, "Either step_size or step_h/step_w should be specified; but not both.");
160 m_fStepH = p.step_h.Value;
161 m_log.CHECK_GT(m_nImgH, 0, "The step_h should be greater than 0.");
162 m_fStepW = p.step_w.Value;
163 m_log.CHECK_GT(m_nImgW, 0, "The step_w should be greater than 0.");
164 }
165 else if (p.step.HasValue)
166 {
167 float fStep = p.step.Value;
168 m_log.CHECK_GT(fStep, 0, "The step should be greater than 0.");
169 m_fStepH = fStep;
170 m_fStepW = fStep;
171 }
172 else
173 {
174 m_fStepH = 0;
175 m_fStepW = 0;
176 }
177
178 m_fOffset = p.offset;
179 }
180
186 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
187 {
188 List<int> rgTopShape = PriorBoxParameter.Reshape(m_param.prior_box_param, colBottom[0].width, colBottom[0].height, m_nNumPriors);
189
190 // Since all images in a batch have the same height and width, we only need to
191 // generate one set of priors which can be shared across all images.
192 m_log.CHECK_EQ(rgTopShape[0], 1, "The topshape(0) should be 1.");
193
194 // 2 channels.
195 // First channel stores the mean of each prior coordinate.
196 // Second channel stores the variance of each prior coordiante.
197 m_log.CHECK_EQ(rgTopShape[1], 2, "The topshape(1) should be 1.");
198 m_log.CHECK_GT(rgTopShape[2], 0, "The top shape at index 2 must be greater than zero.");
199
200 colTop[0].Reshape(rgTopShape);
201 }
202
215 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
216 {
217 int nLayerW = colBottom[0].width;
218 int nLayerH = colBottom[0].height;
219 int nImgW;
220 int nImgH;
221
222 if (m_nImgW == 0 || m_nImgH == 0)
223 {
224 nImgW = colBottom[1].width;
225 nImgH = colBottom[1].height;
226 }
227 else
228 {
229 nImgW = m_nImgW;
230 nImgH = m_nImgH;
231 }
232
233 float fStepW;
234 float fStepH;
235
236 if (m_fStepW == 0 || m_fStepH == 0)
237 {
238 fStepW = (float)nImgW / (float)nLayerW;
239 fStepH = (float)nImgH / (float)nLayerH;
240 }
241 else
242 {
243 fStepW = m_fStepW;
244 fStepH = m_fStepH;
245 }
246
247 float[] rgfTopData = Utility.ConvertVecF<T>(colTop[0].mutable_cpu_data);
248 int nDim = nLayerH * nLayerW * m_nNumPriors * 4;
249 int nIdx = 0;
250
251 for (int h = 0; h < nLayerH; h++)
252 {
253 for (int w = 0; w < nLayerW; w++)
254 {
255 float fCenterX = (w + m_fOffset) * fStepW;
256 float fCenterY = (h + m_fOffset) * fStepH;
257 float fBoxWidth;
258 float fBoxHeight;
259
260 for (int s = 0; s < m_rgfMinSizes.Count; s++)
261 {
262 int nMinSize = (int)m_rgfMinSizes[s];
263
264 // first prior; aspect_ratio = 1, size = min_size
265 fBoxHeight = nMinSize;
266 fBoxWidth = nMinSize;
267 // xmin
268 rgfTopData[nIdx] = (fCenterX - fBoxWidth / 2.0f) / nImgW;
269 nIdx++;
270 // ymin
271 rgfTopData[nIdx] = (fCenterY - fBoxHeight / 2.0f) / nImgH;
272 nIdx++;
273 // xmax
274 rgfTopData[nIdx] = (fCenterX + fBoxWidth / 2.0f) / nImgW;
275 nIdx++;
276 // ymax
277 rgfTopData[nIdx] = (fCenterY + fBoxHeight / 2.0f) / nImgH;
278 nIdx++;
279
280 if (m_rgfMaxSizes.Count > 0)
281 {
282 m_log.CHECK_EQ(m_rgfMinSizes.Count, m_rgfMaxSizes.Count, "The max_sizes and min_sizes must have the same count.");
283 int nMaxSize = (int)m_rgfMaxSizes[s];
284
285 // second prior; aspect_ratio = 1, size = sqrt(min_size * max_size)
286 fBoxWidth = (float)Math.Sqrt(nMinSize * nMaxSize);
287 fBoxHeight = fBoxWidth;
288 // xmin
289 rgfTopData[nIdx] = (fCenterX - fBoxWidth / 2.0f) / nImgW;
290 nIdx++;
291 // ymin
292 rgfTopData[nIdx] = (fCenterY - fBoxHeight / 2.0f) / nImgH;
293 nIdx++;
294 // xmax
295 rgfTopData[nIdx] = (fCenterX + fBoxWidth / 2.0f) / nImgW;
296 nIdx++;
297 // ymax
298 rgfTopData[nIdx] = (fCenterY + fBoxHeight / 2.0f) / nImgH;
299 nIdx++;
300 }
301
302 // rest of priors
303 for (int r = 0; r < m_rgfAspectRatios.Count; r++)
304 {
305 float fAr = m_rgfAspectRatios[r];
306
307 if (Math.Abs(fAr - 1.0f) < 1e-6f)
308 continue;
309
310 fBoxWidth = (float)(nMinSize * Math.Sqrt(fAr));
311 fBoxHeight = (float)(nMinSize / Math.Sqrt(fAr));
312 // xmin
313 rgfTopData[nIdx] = (fCenterX - fBoxWidth / 2.0f) / nImgW;
314 nIdx++;
315 // ymin
316 rgfTopData[nIdx] = (fCenterY - fBoxHeight / 2.0f) / nImgH;
317 nIdx++;
318 // xmax
319 rgfTopData[nIdx] = (fCenterX + fBoxWidth / 2.0f) / nImgW;
320 nIdx++;
321 // ymax
322 rgfTopData[nIdx] = (fCenterY + fBoxHeight / 2.0f) / nImgH;
323 nIdx++;
324 }
325 }
326 }
327 }
328
329 // Clip the prior's coordinate such that it is within [0,1]
330 if (m_bClip)
331 {
332 for (int d = 0; d < nDim; d++)
333 {
334 rgfTopData[d] = Math.Min(Math.Max(rgfTopData[d], 0.0f), 1.0f);
335 }
336 }
337
338 // Set the variance.
339 int nTopOffset = colTop[0].offset(0, 1);
340
341 if (m_rgfVariance.Count > 1)
342 {
343 int nCount = 0;
344 for (int h = 0; h < nLayerH; h++)
345 {
346 for (int w = 0; w < nLayerW; w++)
347 {
348 for (int i = 0; i < m_nNumPriors; i++)
349 {
350 for (int j = 0; j < 4; j++)
351 {
352 rgfTopData[nTopOffset + nCount] = m_rgfVariance[j];
353 nCount++;
354 }
355 }
356 }
357 }
358 }
359
360 colTop[0].mutable_cpu_data = Utility.ConvertVec<T>(rgfTopData);
361
362 if (m_rgfVariance.Count == 1)
363 colTop[0].SetData(m_rgfVariance[0], nTopOffset, nDim);
364 }
365
367 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
368 {
369 new NotImplementedException();
370 }
371 }
372}
The Log class provides general output in text form.
Definition: Log.cs:13
void CHECK(bool b, string str)
Test a flag for true.
Definition: Log.cs:227
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
void CHECK_GT(double df1, double df2, string str)
Test whether one number is greater than another.
Definition: Log.cs:299
The Utility class provides general utility funtions.
Definition: Utility.cs:35
static double[] ConvertVec(float[] rgf)
Convert an array of float to an array of generics.
Definition: Utility.cs:550
The BlobCollection contains a list of Blobs.
void SetData(double df)
Set all blob data to the value specified.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
The PriorBoxLayer generates prior boxes of designated sizes and aspect ratios across all dimensions o...
override void dispose()
Releases all GPU and host resources used by the Layer.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override int ExactNumBottomBlobs
Returns the exact number of required bottom (input) Blobs: data
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: permute
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Not implemented.
PriorBoxLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The PriorBoxLayer constructor.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Generates prior boxes for a layer with specified parameters.
Specifies the base parameter for all layers.
PriorBoxParameter prior_box_param
Returns the parameter set when initialized with LayerType.PRIORBOX
LayerType
Specifies the layer type.
Specifies the parameters for the PriorBoxParameter.
float offset
Specifies the offset to the top left corner of each cell.
uint? img_size
Specifies the image size. By default we calculate the img_height, img_width, step_x and step_y based ...
uint? img_h
Specifies the image height. By default we calculate the img_height, img_width, step_x and step_y base...
static List< int > Reshape(PriorBoxParameter p, int nLayerWid, int nLayerHt, int? nNumPriors=null)
Calculate the reshape size based on the parameters.
List< float > variance
Specifies the variance for adjusting the prior boxes.
static List< float > GetAspectRatios(PriorBoxParameter p)
Return the list of aspect ratios to use based on the parameters.
List< float > max_size
Specifies the maximum box size (in pixels) and is required!
bool flip
Specifies whether or not to flip each aspect ratio. For example, if there is an aspect ratio 'r' we w...
float? step_w
Specifies the explicit step size to use along width.
uint? img_w
Specifies the image width. By default we calculate the img_height, img_width, step_x and step_y based...
float? step
Specifies the excplicit step size to use.
float? step_h
Specifies the explicit step size to use along height.
bool clip
Specifies whether or not to clip the prior so that it is within [0,1].
List< float > min_size
Specifies the minimum box size (in pixels) and is required!
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe.fillers namespace contains all fillers including the Filler class.
The MyCaffe.layers.ssd namespace contains all Single-Shot MultiBox (SSD) related layers.
Definition: LayerFactory.cs:19
The MyCaffe.param.ssd namespace contains all SSD related parameter objects that correspond to the nat...
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12