MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
DataTransformer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using MyCaffe.basecode;
6using System.Diagnostics;
7using System.IO;
8using MyCaffe.param;
9using MyCaffe.common;
10using MyCaffe.param.ssd;
11
15namespace MyCaffe.data
16{
22 public class DataTransformer<T> : IDisposable
23 {
24 Log m_log;
25 CudaDnn<T> m_cuda = null;
26 List<double> m_rgMeanValues = new List<double>();
28 SimpleDatum m_imgMean = null;
29 float[] m_rgfMeanData = null;
30 double[] m_rgdfMeanData = null;
31 Phase m_phase;
32 CryptoRandom m_random;
33 float[] m_rgfTransformedData = null;
34 double[] m_rgdfTransformedData = null;
35 double m_dfLastMin = 0;
36 double m_dfLastMax = 0;
37 BlobProto m_protoMean = null;
38 BBoxUtility<T> m_bbox = null;
39 ImageTransforms<T> m_imgTransforms = null;
40 long m_hImageOp = 0;
41
53 public DataTransformer(CudaDnn<T> cuda, Log log, TransformationParameter p, Phase phase, int nC, int nH, int nW, SimpleDatum imgMean = null)
54 {
55 m_log = log;
56 m_cuda = cuda;
57
58 if (p.resize_param != null && p.resize_param.Active)
59 {
60 m_log.CHECK_GT(p.resize_param.height, 0, "The resize height must be > 0.");
61 m_log.CHECK_GT(p.resize_param.width, 0, "The resize width must be > 0.");
62 nH = (int)p.resize_param.height;
63 nW = (int)p.resize_param.width;
64 }
65
66 int nDataSize = nC * nH * nW;
67
68 m_param = p;
69 InitRand();
70
71 m_phase = phase;
72 m_bbox = new BBoxUtility<T>(cuda, log);
73 m_imgTransforms = new ImageTransforms<T>(cuda, log, m_random);
74
75 Update(nDataSize, imgMean);
76 }
77
81 public void Dispose()
82 {
83 if (m_hImageOp != 0)
84 {
85 m_cuda.FreeImageOp(m_hImageOp);
86 m_hImageOp = 0;
87 }
88 }
89
93 public void Update(int nDataSize = 0, SimpleDatum imgMean = null)
94 {
95 TransformationParameter p = m_param;
96
97 if (imgMean != null)
98 nDataSize = imgMean.Channels * imgMean.Height * imgMean.Width;
99
100 if (p.mean_file != null)
101 m_protoMean = loadProtoMean(p.mean_file);
102
103 if (p.use_imagedb_mean)
104 {
105 if (m_protoMean == null)
106 {
107 m_imgMean = imgMean;
108
109 if (m_imgMean != null)
110 {
111 if (typeof(T) == typeof(double))
112 m_rgdfMeanData = m_imgMean.GetData<double>();
113 else
114 m_rgfMeanData = m_imgMean.GetData<float>();
115 }
116 }
117 else
118 {
119 if (m_protoMean.data.Count > 0)
120 {
121 if (typeof(T) == typeof(double))
122 {
123 m_rgdfMeanData = new double[m_protoMean.data.Count];
124 Array.Copy(m_protoMean.data.ToArray(), m_rgdfMeanData, m_rgdfMeanData.Length);
125 }
126 else
127 {
128 m_rgfMeanData = new float[m_protoMean.data.Count];
129 Array.Copy(m_protoMean.data.ToArray(), m_rgfMeanData, m_rgfMeanData.Length);
130 }
131 }
132 else
133 {
134 if (typeof(T) == typeof(double))
135 m_rgdfMeanData = m_protoMean.double_data.ToArray();
136 else
137 m_rgfMeanData = m_protoMean.double_data.Select(p1 => (float)p1).ToArray();
138 }
139 }
140 }
141
142 if (p.mean_value.Count > 0)
143 {
144 m_log.CHECK(p.use_imagedb_mean == false, "Cannot specify use_image_mean and mean_value at the same time.");
145
146 for (int c = 0; c < p.mean_value.Count; c++)
147 {
148 m_rgMeanValues.Add(p.mean_value[c]);
149 }
150 }
151
152 if (m_param.resize_param != null && m_param.resize_param.Active)
153 {
154 m_log.CHECK_GT(m_param.resize_param.height, 0, "The resize height must be > 0.");
155 m_log.CHECK_GT(m_param.resize_param.width, 0, "The resize width must be > 0.");
156 }
157
158 if (m_param.expansion_param != null && m_param.expansion_param.Active)
159 {
160 m_log.CHECK_GT(m_param.expansion_param.max_expand_ratio, 1.0, "The expansion ratio must be > 1.0.");
161 }
162
163 if (m_param.mask_param != null && m_param.mask_param.Active)
164 {
165 m_log.CHECK_GT(m_param.mask_param.boundary_right, m_param.mask_param.boundary_left, "The mask right must be > than the left.");
166 m_log.CHECK_GT(m_param.mask_param.boundary_bottom, m_param.mask_param.boundary_top, "The mask bottom must be > than the top.");
167 }
168 }
169
174 {
175 get { return m_param; }
176 }
177
182 {
183 get { return m_imgMean; }
184 set
185 {
186 m_imgMean = value;
187 if (typeof(T) == typeof(double))
188 m_rgdfMeanData = m_imgMean.GetData<double>();
189 else
190 m_rgfMeanData = m_imgMean.GetData<float>();
191 }
192 }
193
194 private BlobProto loadProtoMean(string strFile)
195 {
196 try
197 {
198 if (!File.Exists(strFile))
199 throw new Exception("Cannot find the file '" + strFile + "'!");
200
201 byte[] rgBytes;
202 using (FileStream fs = new FileStream(strFile, FileMode.Open, FileAccess.Read))
203 {
204 using (BinaryReader br = new BinaryReader(fs))
205 {
206 rgBytes = br.ReadBytes((int)fs.Length);
207 }
208 }
209
210 PersistCaffe<T> persist = new PersistCaffe<T>(m_log, true);
211 return persist.LoadBlobProto(rgBytes, 1);
212 }
213 catch (Exception excpt)
214 {
215 m_log.FAIL("Loading Proto Image Mean: " + excpt.Message);
216 return null;
217 }
218 }
219
226 public List<int> InferBlobShape(SimpleDatum d)
227 {
228 int[] rgShape = null;
229 rgShape = InferBlobShape(d, rgShape);
230 return new List<int>(rgShape);
231 }
232
240 public int[] InferBlobShape(SimpleDatum d, int[] rgShape)
241 {
242 int nCropSize = (int)m_param.crop_size;
243 int nDatumChannels = d.Channels;
244 int nDatumHeight = d.Height;
245 int nDatumWidth = d.Width;
246
247 // Check dimensions
248 m_log.CHECK_GT(nDatumChannels, 0, "There must be 1 or more data channels in the datum.");
249
250 // If exists and active, resize based on resize parameter.
251 if (m_param.resize_param != null && m_param.resize_param.Active)
252 m_imgTransforms.InferNewSize(m_param.resize_param, nDatumWidth, nDatumHeight, out nDatumWidth, out nDatumHeight);
253
254 m_log.CHECK_GE(nDatumHeight, nCropSize, "The datum height must be >= the crop size of " + nCropSize.ToString() + ". To fix this change the 'crop_size' DataLayer property.");
255 m_log.CHECK_GE(nDatumWidth, nCropSize, "The datum width must be >= the crop size of " + nCropSize.ToString() + ". To fix this change the 'crop_size' DataLayer property.");
256
257 // Build BlobShape.
258 if (rgShape == null || rgShape.Length != 4)
259 rgShape = new int[4];
260
261 rgShape[0] = 1;
262 rgShape[1] = nDatumChannels;
263 rgShape[2] = (nCropSize > 0) ? nCropSize : nDatumHeight;
264 rgShape[3] = (nCropSize > 0) ? nCropSize : nDatumWidth;
265
266 return rgShape;
267 }
268
276 public int[] InferBlobShape(List<Datum> rgD, int[] rgShape)
277 {
278 int nNum = rgD.Count();
279 m_log.CHECK_GT(nNum, 0, "There are no datum in the input vector.");
280
282 rgShape = InferBlobShape(rgD[0], rgShape);
283 // Adjust num to the size of the vector.
284 rgShape[0] = nNum;
285
286 return rgShape;
287 }
288
296 public List<int> InferBlobShape(int nChannels, int nWidth, int nHeight)
297 {
298 int nCropSize = (int)m_param.crop_size;
299
300 // Check dimensions
301 m_log.CHECK_GT(nChannels, 0, "There must be 1 or more data channels in the datum.");
302
303 // If exists and active, resize based on resize parameter.
304 if (m_param.resize_param != null && m_param.resize_param.Active)
305 m_imgTransforms.InferNewSize(m_param.resize_param, nWidth, nWidth, out nWidth, out nHeight);
306
307 m_log.CHECK_GE(nHeight, nCropSize, "The height must be >= the crop size of " + nCropSize.ToString() + ". To fix this change the 'crop_size' DataLayer property.");
308 m_log.CHECK_GE(nWidth, nCropSize, "The width must be >= the crop size of " + nCropSize.ToString() + ". To fix this change the 'crop_size' DataLayer property.");
309
310 // Build BlobShape.
311 List<int> rgShape = new List<int>();
312 rgShape.Add(1);
313 rgShape.Add(nChannels);
314 rgShape.Add((nCropSize > 0) ? nCropSize : nHeight);
315 rgShape.Add((nCropSize > 0) ? nCropSize : nWidth);
316
317 return rgShape;
318 }
319
323 public virtual void InitRand()
324 {
325 if (m_param.random_seed.HasValue)
326 m_random = new CryptoRandom(CryptoRandom.METHOD.DEFAULT, m_param.random_seed.Value);
327 else
328 m_random = new CryptoRandom(CryptoRandom.METHOD.DEFAULT);
329 }
330
336 protected virtual int Rand(int n)
337 {
338 return m_random.Next(n);
339 }
340
344 public Tuple<double, double> LastRange
345 {
346 get { return new Tuple<double, double>(m_dfLastMin, m_dfLastMax); }
347 }
348
356 {
357 if (m_param.label_mapping == null || !m_param.label_mapping.Active)
358 return sd.Label;
359
360 int nNewLabel = m_param.label_mapping.MapLabel(sd.Label, sd.Boost);
361 sd.SetLabel(nNewLabel);
362
363 return nNewLabel;
364 }
365
373 public void Transform(List<Datum> rgDatum, Blob<T> blobTransformed, CudaDnn<T> cuda, Log log)
374 {
375 int nDatumNum = rgDatum.Count;
376 int nNum = blobTransformed.num;
377 int nChannels = blobTransformed.channels;
378 int nHeight = blobTransformed.height;
379 int nWidth = blobTransformed.width;
380
381 m_log.CHECK_GT(nDatumNum, 0, "There are no datum to add.");
382 m_log.CHECK_LE(nDatumNum, nNum, "The size of the rgDatum must be no greater than the transformed blob num.");
383
384 Blob<T> blobUni = new Blob<T>(cuda, log, 1, nChannels, nHeight, nWidth, false);
385
386 for (int i = 0; i < nDatumNum; i++)
387 {
388 int nOffset = blobTransformed.offset(i);
389
390 if (rgDatum[i] != null)
391 Transform(rgDatum[i], blobUni);
392 else
393 blobUni.SetData(0);
394
395 cuda.copy(blobUni.count(), blobUni.gpu_data, blobTransformed.mutable_gpu_data, 0, nOffset);
396 }
397
398 blobUni.Dispose();
399 }
400
401
410 public void Transform(List<SimpleDatum> rgDatum, Blob<T> blobTransformed, CudaDnn<T> cuda, Log log, bool bJustFill = false)
411 {
412 Transform(rgDatum.ToArray(), blobTransformed, cuda, log, bJustFill);
413 }
414
423 public void Transform(SimpleDatum[] rgDatum, Blob<T> blobTransformed, CudaDnn<T> cuda, Log log, bool bJustFill = false)
424 {
425 int nDatumNum = rgDatum.Length;
426 int nNum = blobTransformed.num;
427 int nChannels = blobTransformed.channels;
428 int nHeight = blobTransformed.height;
429 int nWidth = blobTransformed.width;
430
431 m_log.CHECK_GT(nDatumNum, 0, "There are no datum to add.");
432 m_log.CHECK_LE(nDatumNum, nNum, "The size of the rgDatum must be no greater than the transformed blob num.");
433
434 Blob<T> blobUni = new Blob<T>(cuda, log, 1, nChannels, nHeight, nWidth, false);
435
436 for (int i = 0; i < nDatumNum; i++)
437 {
438 int nOffset = blobTransformed.offset(i);
439
440 if (rgDatum[i] != null)
441 {
442 if (bJustFill)
443 blobUni.mutable_cpu_data = rgDatum[i].GetData<T>();
444 else
445 Transform(rgDatum[i], blobUni);
446 }
447 else
448 {
449 blobUni.SetData(0);
450 }
451
452 cuda.copy(blobUni.count(), blobUni.gpu_data, blobTransformed.mutable_gpu_data, 0, nOffset);
453 }
454
455 blobUni.Dispose();
456 }
457
465 {
466 bool bDoMirror;
467 return Transform(d, blob, out bDoMirror);
468 }
469
477 public AnnotationGroupCollection Transform(SimpleDatum d, Blob<T> blob, out bool bDoMirror)
478 {
479 int nCropSize = (int)m_param.crop_size;
480 int nDatumChannels = d.Channels;
481 int nDatumHeight = d.Height;
482 int nDatumWidth = d.Width;
483
484 // Check dimensions
485 int nChannels = blob.channels;
486 int nHeight = blob.height;
487 int nWidth = blob.width;
488 int nNum = blob.num;
489
490 m_log.CHECK_EQ(nChannels, nDatumChannels, "The datum and blob must have equal channels.");
491 m_log.CHECK_LE(nHeight, nDatumHeight, "The datum and blob must have equal height.");
492 m_log.CHECK_LE(nWidth, nDatumWidth, "The datum and blob must have equal width.");
493 m_log.CHECK_GE(nNum, 1, "The blob must have at least 1 item.");
494
495 if (nCropSize == 0)
496 {
497 m_log.CHECK_EQ(nDatumHeight, nHeight, "The blob height must equal the datum height.");
498 m_log.CHECK_EQ(nDatumWidth, nWidth, "The blob width must equal the datum width.");
499 }
500
501 NormalizedBBox crop_bbox = (d.annotation_type != SimpleDatum.ANNOTATION_TYPE.NONE) ? new NormalizedBBox(0, 0, 0, 0) : null;
502
503 blob.mutable_cpu_data = Transform(d, out bDoMirror, crop_bbox);
504
506 return TransformAnnotation(d, crop_bbox, bDoMirror, true);
507
508 return null;
509 }
510
518 public T[] Transform(SimpleDatum d, out bool bMirror, NormalizedBBox crop_bbox = null)
519 {
520 if (typeof(T) == typeof(double))
521 return (T[])Convert.ChangeType(transformD(d, out bMirror, crop_bbox), typeof(T[]));
522 else
523 return (T[])Convert.ChangeType(transformF(d, out bMirror, crop_bbox), typeof(T[]));
524 }
525
526 private float[] transformF(SimpleDatum d, out bool bMirror, NormalizedBBox crop_bbox = null)
527 {
528 if (!d.GetDataValid(true))
529 throw new Exception("There is no " + ((d.IsRealData) ? "REAL" : "BYTE") + " data in the SimpleDatum!");
530
531 m_dfLastMax = -double.MaxValue;
532 m_dfLastMin = double.MaxValue;
533
534 if (m_param.resize_param != null && m_param.resize_param.Active)
535 d = m_imgTransforms.ApplyResize(d, m_param.resize_param);
536
537 if (m_param.noise_param != null && m_param.noise_param.Active)
538 d = m_imgTransforms.ApplyNoise(d, m_param.noise_param);
539
540 int nDatumChannels = d.Channels;
541 int nDatumHeight = d.Height;
542 int nDatumWidth = d.Width;
543 int nCropSize = (int)m_param.crop_size;
544 int nHeight = ((nCropSize != 0 && nCropSize < nDatumHeight) ? nCropSize : nDatumHeight);
545 int nWidth = ((nCropSize != 0 && nCropSize < nDatumWidth) ? nCropSize : nDatumWidth);
546
547 float fScale = (float)m_param.scale;
549
550 if (scaleOp.HasValue && scaleOp == TransformationParameter.SCALE_OPERATOR.NONE)
551 fScale = 1.0f;
552
553 bool bDoMirror = m_param.mirror && (Rand(2) == 1) ? true : false;
554 bool bUseMeanImage = m_param.use_imagedb_mean;
555 List<float> rgMeanValues = null;
556 float[] rgMean = null;
557 bool bUseReal = d.IsRealData;
558
559 bMirror = bDoMirror;
560
561 m_log.CHECK_GT(nDatumChannels, 0, "The datum must have at least 1 channel.");
562 m_log.CHECK_GE(nDatumHeight, nCropSize, "The datum height must be at least as great as the crop size " + nCropSize.ToString());
563 m_log.CHECK_GE(nDatumWidth, nCropSize, "The datum width must be at least as great as the crop size " + nCropSize.ToString());
564
565 if (bUseMeanImage)
566 {
567 if (m_rgfMeanData == null)
568 m_log.FAIL("You must specify an imgMean parameter when using IMAGE mean subtraction.");
569
570 rgMean = m_rgfMeanData;
571
572 int nExpected = nDatumChannels * nDatumHeight * nDatumWidth;
573 if (nExpected != rgMean.Length)
574 {
575 if (nExpected > rgMean.Length)
576 m_log.WriteLine("The size of the 'mean' image is incorrect! Expected '" + nExpected.ToString() + "' elements, yet loaded '" + rgMean.Length + "' elements.");
577 else
578 m_log.WriteLine("WARNING: The size of the 'mean' image is larger than expected! Expected '" + nExpected.ToString() + "' elements, yet loaded '" + rgMean.Length + "' elements.");
579 }
580 }
581
582 if (m_rgMeanValues.Count > 0)
583 {
584 m_log.CHECK(m_rgMeanValues.Count == 1 || m_rgMeanValues.Count == nDatumChannels, "Specify either 1 mean value or as many as channels: " + nDatumChannels.ToString());
585 rgMeanValues = new List<float>();
586
587 for (int c = 0; c < nDatumChannels; c++)
588 {
589 // Replicate the mean value for simplicity.
590 if (c == 0 || m_rgMeanValues.Count == 1)
591 rgMeanValues.Add((float)m_rgMeanValues[0]);
592 else if (c > 0)
593 rgMeanValues.Add((float)m_rgMeanValues[c]);
594 }
595 }
596
597 int h_off = 0;
598 int w_off = 0;
599
600 if (nCropSize > 0)
601 {
602 // We only do random crop when we do training
603 if (m_phase == Phase.TRAIN)
604 {
605 h_off = Rand(nDatumHeight - nHeight + 1);
606 w_off = Rand(nDatumWidth - nWidth + 1);
607 }
608 else
609 {
610 h_off = (nDatumHeight - nHeight) / 2;
611 w_off = (nDatumWidth - nWidth) / 2;
612 }
613 }
614
615 // Return the normalized crop bbox if specified
616 if (crop_bbox != null)
617 crop_bbox.Set((float)w_off / nDatumWidth, (float)h_off / nDatumHeight, (float)(w_off + nWidth) / nDatumWidth, (float)(h_off + nHeight) / nDatumHeight);
618
619 bool bIsRealData = d.IsRealData;
620 double[] rgdfData = d.RealDataD;
621 float[] rgfData = d.RealDataF;
622 byte[] rgbData = d.ByteData;
623 float fDataElement;
624 float fTransformedElement;
625 int nTopIdx;
626 int nDataIdx;
627 int nItemCount = nDatumChannels * nHeight * nWidth;
628 int[] rgChannelSwap = null;
629
630 if (m_rgfTransformedData == null || m_rgfTransformedData.Length < nItemCount)
631 m_rgfTransformedData = new float[nItemCount];
632
633 if (nDatumChannels == 3 && param.color_order == TransformationParameter.COLOR_ORDER.BGR)
634 rgChannelSwap = new int[] { 2, 1, 0 };
635
636 for (int c1 = 0; c1 < nDatumChannels; c1++)
637 {
638 int c = (rgChannelSwap == null) ? c1 : rgChannelSwap[c1];
639
640 for (int h = 0; h < nHeight; h++)
641 {
642 for (int w = 0; w < nWidth; w++)
643 {
644 nDataIdx = (c * nDatumHeight + h_off + h) * nDatumWidth + w_off + w;
645
646 if (bDoMirror)
647 nTopIdx = (c * nHeight + h) * nWidth + (nWidth - 1 - w);
648 else
649 nTopIdx = (c * nHeight + h) * nWidth + w;
650
651 fDataElement = (bIsRealData) ? ((rgfData != null) ? rgfData[nDataIdx] : (float)rgdfData[nDataIdx]) : rgbData[nDataIdx];
652
653 if (bUseMeanImage)
654 {
655 float fVal = fDataElement - rgMean[nDataIdx];
656
657 if (scaleOp.HasValue && scaleOp == TransformationParameter.SCALE_OPERATOR.POW)
658 fTransformedElement = (fVal < 0) ? fVal : (float)Math.Pow(fVal, fScale);
659 else
660 fTransformedElement = fVal * fScale;
661 }
662 else if (rgMeanValues != null && rgMeanValues.Count > 0)
663 {
664 float fVal = fDataElement - rgMeanValues[c];
665
666 if (scaleOp.HasValue && scaleOp == TransformationParameter.SCALE_OPERATOR.POW)
667 fTransformedElement = (fVal < 0) ? fVal : (float)Math.Pow(fVal, fScale);
668 else
669 fTransformedElement = fVal * fScale;
670 }
671 else
672 {
673 if (scaleOp.HasValue && scaleOp == TransformationParameter.SCALE_OPERATOR.POW)
674 fTransformedElement = (fDataElement < 0) ? fDataElement : (float)Math.Pow(fDataElement, fScale);
675 else
676 fTransformedElement = fDataElement * fScale;
677 }
678
679 if (m_param.mask_param != null && m_param.mask_param.Active)
680 {
681 if (h >= m_param.mask_param.boundary_top && h <= m_param.mask_param.boundary_bottom &&
682 w >= m_param.mask_param.boundary_left && w <= m_param.mask_param.boundary_right)
683 fTransformedElement = 0;
684 }
685
686 if (m_dfLastMax < fTransformedElement)
687 m_dfLastMax = fTransformedElement;
688
689 if (m_dfLastMin > fTransformedElement)
690 m_dfLastMin = fTransformedElement;
691
692 m_rgfTransformedData[nTopIdx] = fTransformedElement;
693 }
694 }
695 }
696
697 return m_rgfTransformedData;
698 }
699
700 private double[] transformD(SimpleDatum d, out bool bMirror, NormalizedBBox crop_bbox = null)
701 {
702 if (!d.GetDataValid(true))
703 throw new Exception("There is no " + ((d.IsRealData) ? "REAL" : "BYTE") + " data in the SimpleDatum!");
704
705 m_dfLastMax = -double.MaxValue;
706 m_dfLastMin = double.MaxValue;
707
708 if (m_param.resize_param != null && m_param.resize_param.Active)
709 d = m_imgTransforms.ApplyResize(d, m_param.resize_param);
710
711 if (m_param.noise_param != null && m_param.noise_param.Active)
712 d = m_imgTransforms.ApplyNoise(d, m_param.noise_param);
713
714 int nDatumChannels = d.Channels;
715 int nDatumHeight = d.Height;
716 int nDatumWidth = d.Width;
717 int nCropSize = (int)m_param.crop_size;
718 int nHeight = ((nCropSize != 0 && nCropSize < nDatumHeight) ? nCropSize : nDatumHeight);
719 int nWidth = ((nCropSize != 0 && nCropSize < nDatumWidth) ? nCropSize : nDatumWidth);
720
721 double dfScale = m_param.scale;
723
724 if (scaleOp.HasValue && scaleOp == TransformationParameter.SCALE_OPERATOR.NONE)
725 dfScale = 1.0;
726
727 bool bDoMirror = m_param.mirror && (Rand(2) == 1) ? true : false;
728 bool bUseMeanImage = m_param.use_imagedb_mean;
729 List<double> rgMeanValues = null;
730 double[] rgMean = null;
731 bool bUseReal = d.IsRealData;
732
733 bMirror = bDoMirror;
734
735 m_log.CHECK_GT(nDatumChannels, 0, "The datum must have at least 1 channel.");
736 m_log.CHECK_GE(nDatumHeight, nCropSize, "The datum height must be at least as great as the crop size " + nCropSize.ToString());
737 m_log.CHECK_GE(nDatumWidth, nCropSize, "The datum width must be at least as great as the crop size " + nCropSize.ToString());
738
739 if (bUseMeanImage)
740 {
741 if (m_rgdfMeanData == null)
742 m_log.FAIL("You must specify an imgMean parameter when using IMAGE mean subtraction.");
743
744 rgMean = m_rgdfMeanData;
745
746 int nExpected = nDatumChannels * nDatumHeight * nDatumWidth;
747 m_log.CHECK_EQ(rgMean.Length, nExpected, "The size of the 'mean' image is incorrect! Expected '" + nExpected.ToString() + "' elements, yet loaded '" + rgMean.Length + "' elements.");
748 }
749
750 if (m_rgMeanValues.Count > 0)
751 {
752 m_log.CHECK(m_rgMeanValues.Count == 1 || m_rgMeanValues.Count == nDatumChannels, "Specify either 1 mean value or as many as channels: " + nDatumChannels.ToString());
753 rgMeanValues = new List<double>();
754
755 for (int c = 0; c < nDatumChannels; c++)
756 {
757 // Replicate the mean value for simplicity.
758 if (c == 0 || m_rgMeanValues.Count == 1)
759 rgMeanValues.Add(m_rgMeanValues[0]);
760 else if (c > 0)
761 rgMeanValues.Add(m_rgMeanValues[c]);
762 }
763 }
764
765 int h_off = 0;
766 int w_off = 0;
767
768 if (nCropSize > 0)
769 {
770 // We only do random crop when we do training
771 if (m_phase == Phase.TRAIN)
772 {
773 h_off = Rand(nDatumHeight - nHeight + 1);
774 w_off = Rand(nDatumWidth - nWidth + 1);
775 }
776 else
777 {
778 h_off = (nDatumHeight - nHeight) / 2;
779 w_off = (nDatumWidth - nWidth) / 2;
780 }
781 }
782
783 // Return the normalized crop bbox if specified
784 if (crop_bbox != null)
785 crop_bbox.Set((float)w_off / nDatumWidth, (float)h_off / nDatumHeight, (float)(w_off + nWidth) / nDatumWidth, (float)(h_off + nHeight) / nDatumHeight);
786
787 bool bIsRealData = d.IsRealData;
788 double[] rgdfData = d.RealDataD;
789 float[] rgfData = d.RealDataF;
790 byte[] rgbData = d.ByteData;
791 double dfDataElement;
792 double dfTransformedElement;
793 int nTopIdx;
794 int nDataIdx;
795 int nItemCount = nDatumChannels * nHeight * nWidth;
796 int[] rgChannelSwap = null;
797
798 if (m_rgdfTransformedData == null || m_rgdfTransformedData.Length < nItemCount)
799 m_rgdfTransformedData = new double[nItemCount];
800
801 if (nDatumChannels == 3 && param.color_order == TransformationParameter.COLOR_ORDER.BGR)
802 rgChannelSwap = new int[] { 2, 1, 0 };
803
804 for (int c1 = 0; c1 < nDatumChannels; c1++)
805 {
806 int c = (rgChannelSwap == null) ? c1 : rgChannelSwap[c1];
807
808 for (int h = 0; h < nHeight; h++)
809 {
810 for (int w = 0; w < nWidth; w++)
811 {
812 nDataIdx = (c * nDatumHeight + h_off + h) * nDatumWidth + w_off + w;
813
814 if (bDoMirror)
815 nTopIdx = (c * nHeight + h) * nWidth + (nWidth - 1 - w);
816 else
817 nTopIdx = (c * nHeight + h) * nWidth + w;
818
819 dfDataElement = (bIsRealData) ? ((rgdfData != null) ? rgdfData[nDataIdx] : rgfData[nDataIdx]) : rgbData[nDataIdx];
820
821 if (bUseMeanImage)
822 {
823 double dfVal = dfDataElement - rgMean[nDataIdx];
824
825 if (scaleOp.HasValue && scaleOp == TransformationParameter.SCALE_OPERATOR.POW)
826 dfTransformedElement = (dfVal < 0) ? dfVal : Math.Pow(dfVal, dfScale);
827 else
828 dfTransformedElement = (dfDataElement - rgMean[nDataIdx]) * dfScale;
829 }
830 else if (rgMeanValues != null && rgMeanValues.Count > 0)
831 {
832 double dfVal = dfDataElement - rgMeanValues[c];
833
834 if (scaleOp.HasValue && scaleOp == TransformationParameter.SCALE_OPERATOR.POW)
835 dfTransformedElement = (dfVal < 0) ? dfVal : Math.Pow(dfVal, dfScale);
836 else
837 dfTransformedElement = dfVal * dfScale;
838 }
839 else
840 {
841 if (scaleOp.HasValue && scaleOp == TransformationParameter.SCALE_OPERATOR.POW)
842 dfTransformedElement = (dfDataElement < 0) ? dfDataElement : Math.Pow(dfDataElement, dfScale);
843 else
844 dfTransformedElement = dfDataElement * dfScale;
845 }
846
847 if (m_param.mask_param != null && m_param.mask_param.Active)
848 {
849 if (h >= m_param.mask_param.boundary_top && h <= m_param.mask_param.boundary_bottom &&
850 w >= m_param.mask_param.boundary_left && w <= m_param.mask_param.boundary_right)
851 dfTransformedElement = 0;
852 }
853
854 if (m_dfLastMax < dfTransformedElement)
855 m_dfLastMax = dfTransformedElement;
856
857 if (m_dfLastMin > dfTransformedElement)
858 m_dfLastMin = dfTransformedElement;
859
860 m_rgdfTransformedData[nTopIdx] = dfTransformedElement;
861 }
862 }
863 }
864
865 return m_rgdfTransformedData;
866 }
867
873 public T[] Transform(SimpleDatum d)
874 {
875 bool bMirror;
876 return Transform(d, out bMirror, null);
877 }
878
887 public T[] Transform(SimpleDatum d, out AnnotationGroupCollection rgTransformedAnnoVec, out bool bMirror, bool bResize = true)
888 {
889 // Transform the datum.
890 NormalizedBBox crop_bbox = new NormalizedBBox(0, 0, 0, 0);
891 T[] rgTrans = Transform(d, out bMirror, crop_bbox);
892
893 // Transform annoation.
894 rgTransformedAnnoVec = TransformAnnotation(d, crop_bbox, bMirror, bResize);
895
896 return rgTrans;
897 }
898
907 public AnnotationGroupCollection TransformAnnotation(SimpleDatum d, NormalizedBBox crop_bbox, bool bMirror, bool bResize)
908 {
909 int nImgHt = d.Height;
910 int nImgWd = d.Width;
911 AnnotationGroupCollection rgTransformedAnnotationGroup = new AnnotationGroupCollection();
912
914 {
915 if (d.annotation_group != null)
916 {
917 // Go through each AnnotationGroup.
918 for (int g = 0; g < d.annotation_group.Count; g++)
919 {
920 // Go through each Annotation.
921 bool bHasValidAnnotation = false;
922 AnnotationGroup anno_group = d.annotation_group[g];
923 AnnotationGroup transformed_anno_group = new AnnotationGroup();
924
925 for (int a = 0; a < anno_group.annotations.Count; a++)
926 {
927 Annotation anno = anno_group.annotations[a];
928 NormalizedBBox bbox = anno.bbox;
929
930 // Adjust bounding box annotation.
931 NormalizedBBox resize_bbox = bbox;
932 if (bResize && m_param.resize_param != null && m_param.resize_param.Active)
933 {
934 m_log.CHECK_GT(nImgHt, 0, "The image height must be > 0!");
935 m_log.CHECK_GT(nImgWd, 0, "The image width must be > 0!");
936 resize_bbox = m_imgTransforms.UpdateBBoxByResizePolicy(m_param.resize_param, nImgWd, nImgHt, resize_bbox);
937 }
938
939 if (m_param.emit_constraint != null && m_param.emit_constraint.Active && !m_bbox.MeetEmitConstraint(crop_bbox, resize_bbox, m_param.emit_constraint))
940 continue;
941
942 NormalizedBBox proj_bbox;
943 if (m_bbox.Project(crop_bbox, resize_bbox, out proj_bbox))
944 {
945 bHasValidAnnotation = true;
946 Annotation transformed_anno = new Annotation(proj_bbox.Clone(), anno.instance_id);
947 NormalizedBBox transformed_bbox = transformed_anno.bbox;
948
949 if (bMirror)
950 {
951 float fTemp = transformed_bbox.xmin;
952 transformed_bbox.xmin = 1 - transformed_bbox.xmax;
953 transformed_bbox.xmax = 1 - fTemp;
954 }
955 else if (bResize && m_param.resize_param != null && m_param.resize_param.Active)
956 {
957 m_bbox.Extrapolate(m_param.resize_param, nImgHt, nImgWd, crop_bbox, transformed_bbox);
958 }
959
960 transformed_anno_group.annotations.Add(transformed_anno);
961 }
962 }
963
964 // Save for output.
965 if (bHasValidAnnotation)
966 {
967 transformed_anno_group.group_label = anno_group.group_label;
968 rgTransformedAnnotationGroup.Add(transformed_anno_group);
969 }
970 }
971 }
972 }
973 else
974 {
975 m_log.FAIL("Unknown annotation type.");
976 }
977
978 return rgTransformedAnnotationGroup;
979 }
980
985 public void SetRange(Blob<T> b)
986 {
987 if (m_param.forced_positive_range_max == 0)
988 return;
989
990 double dfMin = b.min_data;
991 double dfMax = b.max_data;
992 double dfNewMin = 0;
993 double dfNewMax = m_param.forced_positive_range_max;
994 double dfScale = (dfNewMax - dfNewMin) / (dfMax - dfMin);
995
996 b.add_scalar(-dfMin);
997 b.scale_data(dfScale);
998 }
999
1007 {
1008 int nDatumChannels = d.Channels;
1009 int nDatumHeight = d.Height;
1010 int nDatumWidth = d.Width;
1011
1012 // Get the bbox dimension.
1013 NormalizedBBox clipped_bbox = m_bbox.Clip(bbox);
1014 NormalizedBBox scaled_bbox = m_bbox.Scale(clipped_bbox, nDatumHeight, nDatumWidth);
1015 int w_off = (int)scaled_bbox.xmin;
1016 int h_off = (int)scaled_bbox.ymin;
1017 int width = (int)(scaled_bbox.xmax - scaled_bbox.xmin);
1018 int height = (int)(scaled_bbox.ymax - scaled_bbox.ymin);
1019
1020 // Crop the image using bbox.
1021 SimpleDatum crop_datum = new SimpleDatum(d, height, width);
1022 int nCropDatumSize = nDatumChannels * height * width;
1023
1024 if (d.IsRealData)
1025 {
1026 if (d.RealDataD != null)
1027 {
1028 double[] rgData = new double[nCropDatumSize];
1029
1030 for (int h = h_off; h < h_off + height; h++)
1031 {
1032 for (int w = w_off; w < w_off + width; w++)
1033 {
1034 for (int c = 0; c < nDatumChannels; c++)
1035 {
1036 int nDatumIdx = (c * nDatumHeight + h) * nDatumWidth + w;
1037 int nCropDatumIdx = (c * height + h - h_off) * width + w - w_off;
1038 rgData[nCropDatumIdx] = d.RealDataD[nDatumIdx];
1039 }
1040 }
1041 }
1042
1043 crop_datum.SetData(rgData.ToList(), d.Label);
1044 }
1045 else if (d.RealDataF != null)
1046 {
1047 float[] rgData = new float[nCropDatumSize];
1048
1049 for (int h = h_off; h < h_off + height; h++)
1050 {
1051 for (int w = w_off; w < w_off + width; w++)
1052 {
1053 for (int c = 0; c < nDatumChannels; c++)
1054 {
1055 int nDatumIdx = (c * nDatumHeight + h) * nDatumWidth + w;
1056 int nCropDatumIdx = (c * height + h - h_off) * width + w - w_off;
1057 rgData[nCropDatumIdx] = d.RealDataF[nDatumIdx];
1058 }
1059 }
1060 }
1061
1062 crop_datum.SetData(rgData.ToList(), d.Label);
1063 }
1064 else
1065 {
1066 throw new Exception("SimpleDatum: Both the RealDataD and RealDataF are null!");
1067 }
1068 }
1069 else
1070 {
1071 byte[] rgData = new byte[nCropDatumSize];
1072
1073 for (int h = h_off; h < h_off + height; h++)
1074 {
1075 for (int w = w_off; w < w_off + width; w++)
1076 {
1077 for (int c = 0; c < nDatumChannels; c++)
1078 {
1079 int nDatumIdx = (c * nDatumHeight + h) * nDatumWidth + w;
1080 int nCropDatumIdx = (c * height + h - h_off) * width + w - w_off;
1081 rgData[nCropDatumIdx] = d.ByteData[nDatumIdx];
1082 }
1083 }
1084 }
1085
1086 crop_datum.SetData(rgData.ToList(), d.Label);
1087 }
1088
1089 return crop_datum;
1090 }
1091
1099 public SimpleDatum ExpandImage(SimpleDatum d, NormalizedBBox expand_bbox, float fExpandRatio)
1100 {
1101 int nDatumChannels = d.Channels;
1102 int nDatumHeight = d.Height;
1103 int nDatumWidth = d.Width;
1104
1105 // Get the bbox dimension.
1106 int width = (int)(nDatumWidth * fExpandRatio);
1107 int height = (int)(nDatumHeight * fExpandRatio);
1108 float h_off = (float)m_random.NextDouble();
1109 float w_off = (float)m_random.NextDouble();
1110
1111 h_off = (float)Math.Floor(h_off);
1112 w_off = (float)Math.Floor(w_off);
1113
1114 expand_bbox.xmin = -w_off / nDatumWidth;
1115 expand_bbox.ymin = -h_off / nDatumHeight;
1116 expand_bbox.xmax = (width - w_off) / nDatumWidth;
1117 expand_bbox.ymax = (height - h_off) / nDatumHeight;
1118
1119 // Crop the image using bbox.
1120 SimpleDatum expand_datum = new SimpleDatum(d, height, width);
1121 int nExpandDatumSize = nDatumChannels * height * width;
1122
1123 if (d.IsRealData)
1124 {
1125 if (d.RealDataD != null)
1126 {
1127 double[] rgData = new double[nExpandDatumSize];
1128
1129 for (int h = (int)h_off; h < (int)h_off + nDatumHeight; h++)
1130 {
1131 for (int w = (int)w_off; w < (int)w_off + nDatumWidth; w++)
1132 {
1133 for (int c = 0; c < nDatumChannels; c++)
1134 {
1135 int nDatumIdx = (int)((c * nDatumHeight + h - h_off) * nDatumWidth + w - w_off);
1136 int nExpandIdx = (c * height + h) * width + w;
1137 rgData[nExpandIdx] = d.RealDataD[nDatumIdx];
1138 }
1139 }
1140 }
1141
1142 expand_datum.SetData(rgData.ToList(), d.Label);
1143 }
1144 else if (d.RealDataF != null)
1145 {
1146 float[] rgData = new float[nExpandDatumSize];
1147
1148 for (int h = (int)h_off; h < (int)h_off + nDatumHeight; h++)
1149 {
1150 for (int w = (int)w_off; w < (int)w_off + nDatumWidth; w++)
1151 {
1152 for (int c = 0; c < nDatumChannels; c++)
1153 {
1154 int nDatumIdx = (int)((c * nDatumHeight + h - h_off) * nDatumWidth + w - w_off);
1155 int nExpandIdx = (c * height + h) * width + w;
1156 rgData[nExpandIdx] = d.RealDataF[nDatumIdx];
1157 }
1158 }
1159 }
1160
1161 expand_datum.SetData(rgData.ToList(), d.Label);
1162 }
1163 else
1164 {
1165 throw new Exception("SimpleDatum: Both the RealDataD and RealDataF are null!");
1166 }
1167 }
1168 else
1169 {
1170 byte[] rgData = new byte[nExpandDatumSize];
1171
1172 for (int h = (int)h_off; h < (int)h_off + nDatumHeight; h++)
1173 {
1174 for (int w = (int)w_off; w < (int)w_off + nDatumWidth; w++)
1175 {
1176 for (int c = 0; c < nDatumChannels; c++)
1177 {
1178 int nDatumIdx = (int)((c * nDatumHeight + h - h_off) * nDatumWidth + w - w_off);
1179 int nExpandIdx = (c * height + h) * width + w;
1180 rgData[nExpandIdx] = d.ByteData[nDatumIdx];
1181 }
1182 }
1183 }
1184
1185 expand_datum.SetData(rgData.ToList(), d.Label);
1186 }
1187
1188 return expand_datum;
1189 }
1190
1191 private float randomValue(float fMin, float fMax)
1192 {
1193 float fVal = (float)m_random.NextDouble();
1194 return (fVal * (fMax - fMin)) + fMin;
1195 }
1196
1203 {
1204 if (m_param.expansion_param == null || !m_param.expansion_param.Active)
1205 return new SimpleDatum(d, true);
1206
1207 float fExpandProb = m_param.expansion_param.prob;
1208 float fProb = (float)m_random.NextDouble();
1209
1210 if (fProb > fExpandProb)
1211 return new SimpleDatum(d, true);
1212
1213 float fMaxExpandRatio = m_param.expansion_param.max_expand_ratio;
1214 if (Math.Abs(fMaxExpandRatio - 1.0f) < 1e-2)
1215 return new SimpleDatum(d, true);
1216
1217 float fExpandRatio = randomValue(1.0f, fMaxExpandRatio);
1218
1219 // Expand the datum.
1220 NormalizedBBox expand_bbox = new NormalizedBBox(0, 0, 0, 0);
1221 SimpleDatum expanded_datum = ExpandImage(d, expand_bbox, fExpandRatio);
1222 expanded_datum.annotation_type = d.annotation_type;
1223
1224 // Transform the annotation according to the crop_bbox.
1225 bool bMirror = false;
1226 bool bResize = false;
1227 expanded_datum.annotation_group = TransformAnnotation(d, expand_bbox, bMirror, bResize);
1228
1229 return expanded_datum;
1230 }
1231
1240 {
1241 if (m_param.distortion_param == null || !m_param.distortion_param.Active)
1242 return d;
1243
1244 if (m_param.distortion_param.use_gpu)
1245 return d;
1246
1247 if (m_param.distortion_param.brightness_prob == 0 &&
1248 m_param.distortion_param.contrast_prob == 0 &&
1249 m_param.distortion_param.saturation_prob == 0)
1250 return d;
1251
1252 return m_imgTransforms.ApplyDistortEx(d, m_param.distortion_param);
1253 }
1254
1259 public void DistortImage(Blob<T> b)
1260 {
1261 if (m_param.distortion_param == null || !m_param.distortion_param.Active)
1262 return;
1263
1264 if (!m_param.distortion_param.use_gpu)
1265 return;
1266
1267 if (m_param.distortion_param.brightness_prob == 0 &&
1268 m_param.distortion_param.contrast_prob == 0 &&
1269 m_param.distortion_param.saturation_prob == 0)
1270 return;
1271
1272 if (m_hImageOp == 0)
1273 {
1274 m_hImageOp = m_cuda.CreateImageOp(b.num,
1284 }
1285
1286 m_cuda.DistortImage(m_hImageOp, b.count(), b.num, b.count(1), b.gpu_data, b.mutable_gpu_data);
1287 }
1288
1295 {
1296 if (m_param.mask_param == null || !m_param.mask_param.Active)
1297 return d;
1298
1299 int nL = m_param.mask_param.boundary_left;
1300 int nR = m_param.mask_param.boundary_right;
1301 int nT = m_param.mask_param.boundary_top;
1302 int nB = m_param.mask_param.boundary_bottom;
1303 int nDim = d.Height * d.Width;
1304
1305 for (int c = 0; c < d.Channels; c++)
1306 {
1307 for (int y = 0; y < d.Height; y++)
1308 {
1309 for (int x = 0; x < d.Width; x++)
1310 {
1311 int nIdx = c * nDim + y * d.Width + x;
1312
1313 if (y >= nT && y <= nB && x >= nL && x <= nR)
1314 {
1315 if (d.IsRealData)
1316 {
1317 if (d.RealDataD != null)
1318 d.RealDataD[nIdx] = 0;
1319 else if (d.RealDataF != null)
1320 d.RealDataF[nIdx] = 0;
1321 }
1322 else
1323 {
1324 d.ByteData[nIdx] = 0;
1325 }
1326 }
1327 }
1328 }
1329 }
1330
1331 return d;
1332 }
1333
1340 public float[] MaskData(int[] rgShape, float[] rgData)
1341 {
1342 if (m_param.mask_param == null || !m_param.mask_param.Active)
1343 return rgData;
1344
1345 int nL = m_param.mask_param.boundary_left;
1346 int nR = m_param.mask_param.boundary_right;
1347 int nT = m_param.mask_param.boundary_top;
1348 int nB = m_param.mask_param.boundary_bottom;
1349 int nC = rgShape[1];
1350 int nH = rgShape[2];
1351 int nW = rgShape[3];
1352 int nDim = nH * nW;
1353
1354 for (int c = 0; c < nC; c++)
1355 {
1356 for (int y = 0; y < nH; y++)
1357 {
1358 for (int x = 0; x < nW; x++)
1359 {
1360 int nIdx = c * nDim + y * nW + x;
1361
1362 if (y >= nT && y <= nB && x >= nL && x <= nR)
1363 {
1364 rgData[nIdx] = 0f;
1365 }
1366 }
1367 }
1368 }
1369
1370 return rgData;
1371 }
1372
1379 public Datum UnTransform(Blob<T> blob, bool bIncludeMean = true)
1380 {
1381 if (typeof(T) == typeof(double))
1382 return unTransformD(blob, bIncludeMean);
1383 else
1384 return unTransformF(blob, bIncludeMean);
1385 }
1386
1387 private Datum unTransformF(Blob<T> blob, bool bIncludeMean = true)
1388 {
1389 float[] rgData = Utility.ConvertVecF<T>(blob.update_cpu_data());
1390 byte[] rgOutput = new byte[rgData.Length];
1391 int nC = blob.channels;
1392 int nH = blob.height;
1393 int nW = blob.width;
1394 int[] rgChannelSwap = null;
1395 bool bUseMeanImage = m_param.use_imagedb_mean;
1396 List<float> rgMeanValues = null;
1397 float[] rgMean = null;
1398 float dfScale = (float)m_param.scale;
1399
1400 if (bUseMeanImage)
1401 {
1402 if (m_rgfMeanData == null)
1403 m_log.FAIL("You must specify an imgMean parameter when using IMAGE mean subtraction.");
1404
1405 rgMean = m_rgfMeanData;
1406
1407 int nExpected = nC * nH * nW;
1408 m_log.CHECK_EQ(rgMean.Length, nExpected, "The size of the 'mean' image is incorrect! Expected '" + nExpected.ToString() + "' elements, yet loaded '" + rgMean.Length + "' elements.");
1409 }
1410
1411 if (m_rgMeanValues.Count > 0)
1412 {
1413 m_log.CHECK(m_rgMeanValues.Count == 1 || m_rgMeanValues.Count == nC, "Specify either 1 mean value or as many as channels: " + nC.ToString());
1414 rgMeanValues = new List<float>();
1415
1416 for (int c = 0; c < nC; c++)
1417 {
1418 // Replicate the mean value for simplicity.
1419 if (c == 0 || m_rgMeanValues.Count == 1)
1420 rgMeanValues.Add((float)m_rgMeanValues[0]);
1421 else if (c > 0)
1422 rgMeanValues.Add((float)m_rgMeanValues[c]);
1423 }
1424
1425 rgMean = rgMeanValues.ToArray();
1426 }
1427
1429 rgChannelSwap = new int[] { 2, 1, 0 };
1430
1431 for (int c1 = 0; c1 < nC; c1++)
1432 {
1433 int c = (rgChannelSwap == null) ? c1 : rgChannelSwap[c1];
1434
1435 for (int h = 0; h < nH; h++)
1436 {
1437 for (int w = 0; w < nW; w++)
1438 {
1439 int nDataIdx = (c * nH + h) * nW + w;
1440 float fVal = (rgData[nDataIdx] / dfScale);
1441
1442 if (bIncludeMean)
1443 {
1444 if (bUseMeanImage)
1445 fVal += rgMean[nDataIdx];
1446 else if (rgMean != null && rgMean.Length == nC)
1447 fVal += rgMean[c];
1448 }
1449
1450 if (fVal < 0)
1451 fVal = 0;
1452 if (fVal > 255)
1453 fVal = 255;
1454
1455 int nOutIdx = (c1 * nH + h) * nW + w;
1456 rgOutput[nOutIdx] = (byte)fVal;
1457 }
1458 }
1459 }
1460
1461 return new Datum(false, nC, nW, nH, -1, DateTime.MinValue, rgOutput.ToList(), 0, false, -1);
1462 }
1463
1464 private Datum unTransformD(Blob<T> blob, bool bIncludeMean = true)
1465 {
1466 double[] rgData = Utility.ConvertVec<T>(blob.update_cpu_data());
1467 byte[] rgOutput = new byte[rgData.Length];
1468 int nC = blob.channels;
1469 int nH = blob.height;
1470 int nW = blob.width;
1471 int[] rgChannelSwap = null;
1472 bool bUseMeanImage = m_param.use_imagedb_mean;
1473 List<double> rgMeanValues = null;
1474 double[] rgMean = null;
1475 double dfScale = m_param.scale;
1476
1477 if (bUseMeanImage)
1478 {
1479 if (m_rgdfMeanData == null)
1480 m_log.FAIL("You must specify an imgMean parameter when using IMAGE mean subtraction.");
1481
1482 rgMean = m_rgdfMeanData;
1483
1484 int nExpected = nC * nH * nW;
1485 m_log.CHECK_EQ(rgMean.Length, nExpected, "The size of the 'mean' image is incorrect! Expected '" + nExpected.ToString() + "' elements, yet loaded '" + rgMean.Length + "' elements.");
1486 }
1487
1488 if (m_rgMeanValues.Count > 0)
1489 {
1490 m_log.CHECK(m_rgMeanValues.Count == 1 || m_rgMeanValues.Count == nC, "Specify either 1 mean value or as many as channels: " + nC.ToString());
1491 rgMeanValues = new List<double>();
1492
1493 for (int c = 0; c < nC; c++)
1494 {
1495 // Replicate the mean value for simplicity.
1496 if (c == 0 || m_rgMeanValues.Count == 1)
1497 rgMeanValues.Add(m_rgMeanValues[0]);
1498 else if (c > 0)
1499 rgMeanValues.Add(m_rgMeanValues[c]);
1500 }
1501
1502 rgMean = rgMeanValues.ToArray();
1503 }
1504
1506 rgChannelSwap = new int[] { 2, 1, 0 };
1507
1508 for (int c1 = 0; c1 < nC; c1++)
1509 {
1510 int c = (rgChannelSwap == null) ? c1 : rgChannelSwap[c1];
1511
1512 for (int h = 0; h < nH; h++)
1513 {
1514 for (int w = 0; w < nW; w++)
1515 {
1516 int nDataIdx = (c * nH + h) * nW + w;
1517 double dfVal = (rgData[nDataIdx] / dfScale);
1518
1519 if (bIncludeMean)
1520 {
1521 if (bUseMeanImage)
1522 dfVal += rgMean[nDataIdx];
1523 else if (rgMean != null && rgMean.Length == nC)
1524 dfVal += rgMean[c];
1525 }
1526
1527 if (dfVal < 0)
1528 dfVal = 0;
1529 if (dfVal > 255)
1530 dfVal = 255;
1531
1532 int nOutIdx = (c1 * nH + h) * nW + w;
1533 rgOutput[nOutIdx] = (byte)dfVal;
1534 }
1535 }
1536 }
1537
1538 return new Datum(false, nC, nW, nH, -1, DateTime.MinValue, rgOutput.ToList(), 0, false, -1);
1539 }
1540 }
1541}
Defines a collection of AnnotationGroups.
Definition: Annotation.cs:256
void Add(AnnotationGroupCollection col)
Add another AnnotationGroupCollection to this one.
Definition: Annotation.cs:369
int Count
Specifies the number of items in the collection.
Definition: Annotation.cs:350
The AnnoationGroup class manages a group of annotations.
Definition: Annotation.cs:124
int group_label
Get/set the group label.
Definition: Annotation.cs:211
List< Annotation > annotations
Get/set the group annoations.
Definition: Annotation.cs:202
The Annotation class is used by annotations attached to SimpleDatum's and used in SSD.
Definition: Annotation.cs:22
int instance_id
Get/set the instance ID.
Definition: Annotation.cs:55
NormalizedBBox bbox
Get/set the bounding box.
Definition: Annotation.cs:64
The CryptoRandom is a random number generator that can use either the standard .Net Random objec or t...
Definition: CryptoRandom.cs:14
METHOD
Defines the random number generation method to use.
Definition: CryptoRandom.cs:25
int Next(int nMinVal, int nMaxVal, bool bMaxInclusive=true)
Returns a random int within the range
double NextDouble()
Returns a random double within the range .
Definition: CryptoRandom.cs:83
The Datum class is a simple wrapper to the SimpleDatum class to ensure compatibility with the origina...
Definition: Datum.cs:12
The Log class provides general output in text form.
Definition: Log.cs:13
The NormalizedBBox manages a bounding box used in SSD.
float ymax
Get/set the y maximum.
float xmax
Get/set the x maximum.
NormalizedBBox Clone()
Return a copy of the object.
float xmin
Get/set the x minimum.
float ymin
Get/set the y minimum.
The SimpleDatum class holds a data input within host memory.
Definition: SimpleDatum.cs:161
void Copy(SimpleDatum d, bool bCopyData, int? nHeight=null, int? nWidth=null)
Copy another SimpleDatum into this one.
ANNOTATION_TYPE
Specifies the annotation type when using annotations.
Definition: SimpleDatum.cs:204
void SetLabel(int nLabel)
Sets the label.
bool GetDataValid(bool bByType=true)
Returns true if the ByteData or RealDataD or RealDataF are not null, false otherwise.
void SetData(SimpleDatum d)
Set the data of the current SimpleDatum by copying the data of another.
int Channels
Return the number of channels of the data.
bool IsRealData
Returns whether or not the data contains real numbers or byte data.
AnnotationGroupCollection annotation_group
When using annoations, each annotation group contains an annotation for a particular class used with ...
float[] RealDataF
Return the float data. This field is valid when IsRealData = true.
ANNOTATION_TYPE annotation_type
When using annotations, the annotation type specifies the type of annotation. Currently,...
int Boost
Get/set the boost for this data.
int Width
Return the width of the data.
byte[] ByteData
Return the byte data. This field is valid when IsRealData = false.
int Height
Return the height of the data.
double[] RealDataD
Return the double data. This field is valid when IsRealData = true.
int Label
Return the known label of the data.
The Utility class provides general utility funtions.
Definition: Utility.cs:35
static double[] ConvertVec(float[] rgf)
Convert an array of float to an array of generics.
Definition: Utility.cs:550
The BBox class processes the NormalizedBBox data used with SSD.
Definition: BBoxUtility.cs:22
NormalizedBBox Clip(NormalizedBBox bbox, float fHeight=1.0f, float fWidth=1.0f)
Clip the BBox to a set range.
bool MeetEmitConstraint(NormalizedBBox src_bbox, NormalizedBBox bbox, EmitConstraint emit_constraint)
Check if a bbox meets the emit constraint w.r.t the src_bbox.
bool Project(NormalizedBBox src, NormalizedBBox bbox, out NormalizedBBox proj_bbox)
Project one bbox onto another.
NormalizedBBox Scale(NormalizedBBox bbox, int nHeight, int nWidth)
Scale the BBox to a set range.
void Extrapolate(ResizeParameter param, int nHeight, int nWidth, NormalizedBBox crop_bbox, NormalizedBBox bbox)
Extrapolate the transformed bbox if height_scale and width_scale are explicitly provied,...
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
Definition: Blob.cs:800
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
Definition: Blob.cs:1922
double min_data
Returns the minimum value in the data of the Blob.
Definition: Blob.cs:2499
double max_data
Returns the maximum value in the data of the Blob.
Definition: Blob.cs:2525
int height
DEPRECIATED; legacy shape accessor height: use shape(2) instead.
Definition: Blob.cs:808
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487
T[] mutable_cpu_data
Get data from the GPU and bring it over to the host, or Set data from the Host and send it over to th...
Definition: Blob.cs:1461
void scale_data(double df)
Scale the data by a scaling factor.
Definition: Blob.cs:1754
int width
DEPRECIATED; legacy shape accessor width: use shape(3) instead.
Definition: Blob.cs:816
void add_scalar(double dfVal)
Adds a scalar value to the Blob.
Definition: Blob.cs:2779
T[] update_cpu_data()
Update the CPU data by transferring the GPU data over to the Host.
Definition: Blob.cs:1470
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739
int offset(int n, int c=0, int h=0, int w=0)
Returns the flat offset given the number, channel, height and width.
Definition: Blob.cs:850
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
Definition: Blob.cs:402
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
Definition: Blob.cs:792
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
void copy(int nCount, long hSrc, long hDst, int nSrcOffset=0, int nDstOffset=0, long hStream=-1, bool? bSrcHalfSizeOverride=null, bool? bDstHalfSizeOverride=null)
Copy data from one block of GPU memory to another.
Definition: CudaDnn.cs:6007
long CreateImageOp(int nNum, double dfBrightnessProb, double dfBrightnessDelta, double dfContrastProb, double dfContrastLower, double dfContrastUpper, double dfSaturationProb, double dfSaturationLower, double dfSaturationUpper, long lRandomSeed=0)
Create a new ImageOp used to perform image operations on the GPU.
Definition: CudaDnn.cs:3153
void DistortImage(long h, int nCount, int nNum, int nDim, long hX, long hY)
Distort an image using the ImageOp handle provided.
Definition: CudaDnn.cs:3188
void FreeImageOp(long h)
Free an image op, freeing up all GPU memory used.
Definition: CudaDnn.cs:3171
The PersistCaffe class is used to load and save weight files in the .caffemodel format.
Definition: PersistCaffe.cs:20
BlobProto LoadBlobProto(byte[] rg, int nFieldId)
The LoadBlobProto function loads a BlobProto from a proto buffer.
Applies common transformations to the input data, such as scaling, mirroring, subtracting the image m...
int TransformLabel(SimpleDatum sd)
When active (label_mapping.Active = true), transforms the label if mapped using the label and boost....
Datum UnTransform(Blob< T > blob, bool bIncludeMean=true)
Reverse the transformation made when calling Transform.
SimpleDatum ExpandImage(SimpleDatum d)
Expand the datum and adjust the AnnotationGroup.
int[] InferBlobShape(List< Datum > rgD, int[] rgShape)
Infers the shape the transformed blob will have when the transformation is applied to the data.
virtual void InitRand()
Initialize the underlying random number generator.
SimpleDatum ImageMean
Get/set the image mean.
List< int > InferBlobShape(SimpleDatum d)
Infers the shape the transformed blob will have when the transformation is applied to the data.
List< int > InferBlobShape(int nChannels, int nWidth, int nHeight)
Infers the shape of the transformed blow will have with the given channel, width and height.
void SetRange(Blob< T > b)
Scales the data of a Blob to fit in a given range based on the DataTransformers parameters.
Tuple< double, double > LastRange
Returns the last min/max observed.
SimpleDatum DistortImage(SimpleDatum d)
Distort the SimpleDatum.
SimpleDatum CropImage(SimpleDatum d, NormalizedBBox bbox)
Crop the SimpleDatum according to the bbox.
SimpleDatum MaskImage(SimpleDatum d)
Maks out portions of the SimpleDatum.
T[] Transform(SimpleDatum d, out bool bMirror, NormalizedBBox crop_bbox=null)
Transform the data into an array of transformed values.
DataTransformer(CudaDnn< T > cuda, Log log, TransformationParameter p, Phase phase, int nC, int nH, int nW, SimpleDatum imgMean=null)
The DataTransformer constructor.
void Dispose()
Cleanup all resources used.
float[] MaskData(int[] rgShape, float[] rgData)
Mask out the data based on the shape of the specified SimpleDatum.
int[] InferBlobShape(SimpleDatum d, int[] rgShape)
Infers the shape the transformed blob will have when the transformation is applied to the data.
void Update(int nDataSize=0, SimpleDatum imgMean=null)
Resync the transformer with changes in its parameter.
void Transform(List< Datum > rgDatum, Blob< T > blobTransformed, CudaDnn< T > cuda, Log log)
Transforms a list of Datum and places the transformed data into a Blob.
TransformationParameter param
Returns the TransformationParameter used.
AnnotationGroupCollection TransformAnnotation(SimpleDatum d, NormalizedBBox crop_bbox, bool bMirror, bool bResize)
Transform the annotation data.
T[] Transform(SimpleDatum d, out AnnotationGroupCollection rgTransformedAnnoVec, out bool bMirror, bool bResize=true)
Transform the data into an array of transformed values.
SimpleDatum ExpandImage(SimpleDatum d, NormalizedBBox expand_bbox, float fExpandRatio)
Expand the SimpleDatum according to the bbox.
AnnotationGroupCollection Transform(SimpleDatum d, Blob< T > blob)
Transforms a Datum and places the dat ainto a Blob.
void DistortImage(Blob< T > b)
Distort the images within a Blob.
AnnotationGroupCollection Transform(SimpleDatum d, Blob< T > blob, out bool bDoMirror)
Transforms a Datum and places the dat ainto a Blob.
void Transform(List< SimpleDatum > rgDatum, Blob< T > blobTransformed, CudaDnn< T > cuda, Log log, bool bJustFill=false)
Transforms a list of Datum and places the transformed data into a Blob.
virtual int Rand(int n)
Generates a random integer from Uniform({0, 1, ..., n-1}).
void Transform(SimpleDatum[] rgDatum, Blob< T > blobTransformed, CudaDnn< T > cuda, Log log, bool bJustFill=false)
Transforms a list of Datum and places the transformed data into a Blob.
T[] Transform(SimpleDatum d)
Transform the data into an array of transformed values.
The ImageTransforms class provides several useful image transformation function used with SSD.
void InferNewSize(ResizeParameter p, int nOldWidth, int nOldHeight, out int nNewWidth, out int nNewHeight)
Infer the new shape based on the resize policy.
SimpleDatum ApplyResize(SimpleDatum sd, ResizeParameter p)
The ApplyResize method resizes the SimpleDatum containing an image to a newly resized image as specif...
SimpleDatum ApplyDistortEx(SimpleDatum sd, DistortionParameter p)
The ApplyDistortEx method applies the distortion policy to the simple datum.
SimpleDatum ApplyNoise(SimpleDatum sd, NoiseParameter p)
The ApplyNoise method applies the noise policy to the SimpleDatum.
NormalizedBBox UpdateBBoxByResizePolicy(ResizeParameter p, int nOldWidth, int nOldHeight, NormalizedBBox bbox1)
Update the BBox size based on the Resize policy.
The BlobProto contains the descripion of a blob.
Definition: BlobProto.cs:15
List< float > data
Get/set the data as a List of float.
Definition: BlobProto.cs:180
List< double > double_data
Get/set the data as a List of double.
Definition: BlobProto.cs:162
int MapLabel(int nLabel, int nBoost)
Queries the mapped label for a given label.
int boundary_bottom
Get/set the mask boundary bottom.
int boundary_left
Get/set the mask boundary left.
int boundary_right
Get/set the mask boundary left.
int boundary_top
Get/set the mask boundary top.
bool Active
When active, the parameter is used, otherwise it is ignored.
Stores parameters used to apply transformation to the data layer's data.
int? random_seed
Only used during testing.
COLOR_ORDER
Defines the color ordering used to tranform the input data.
DistortionParameter distortion_param
Optionally, specifies the distortion policy, otherwise this is null.
string mean_file
Specifies the path to file containing the image mean in the proto buffer format of a BlobProto.
List< double > mean_value
If specified can be repeated once (would subtract it from all the channels or can be repeated the sam...
bool mirror
Specify if we want to randomly mirror the data.
NoiseParameter noise_param
Optionally, specifies the noise policy, otherwise this is null.
double forced_positive_range_max
Specifies whether or not to fit the data into a forced range of [0, forced_positive_range_max].
bool use_imagedb_mean
Specifies whether to subtract the mean image from the image database, subtract the mean values,...
SCALE_OPERATOR
Defines the type of scale operator to use (if any).
uint crop_size
Specify if we would like to randomly crop an image.
DataLabelMappingParameter label_mapping
Optionally, specifies the label mapping which defines how to map lables when calling the DataTransfor...
COLOR_ORDER color_order
Specifies the color ordering to use. Native Caffe models often uses COLOR_ORDER.BGR,...
ExpansionParameter expansion_param
Optionally, specifies the expansion policy, otherwise this is null.
SCALE_OPERATOR? scale_operator
Get/set the scale operator used to apply the scale value to the data-mean or data result.
ResizeParameter resize_param
Optionally, specifies the resize policy, otherwise this is null.
MaskParameter mask_param
Optionally, specifies the image mask which defines the boundary area that is set to black on the imag...
EmitConstraint emit_constraint
Optionally, specifies the emit constraint on emitting annotation after transformation,...
double scale
For data pre-processing, we can do simple scaling and subtracting the data mean, if provided....
float contrast_lower
Get/set lower bound for random contrast factor (default = 0.5).
float brightness_delta
Get/set amount to add to the pixel values within [-delta,delta] (default = 0)
float saturation_prob
Get/set probability of adjusting the saturation (default = 0).
float saturation_lower
Get/set lower bound for random saturation factor (default = 0.5).
bool use_gpu
Get/set whether or not to use the GPU for the distortion operations (default = true).
long random_seed
Get/set the random seed (default = 0, only used when testing).
float saturation_upper
Get/set upper bound for random saturation factor (default = 1.5).
float brightness_prob
Get/set probability of adjusting the brightness (default = 0).
float contrast_prob
Get/set probability of adjusting the contrast (default = 0).
float contrast_upper
Get/set upper bound for random contrast factor (default = 1.5).
float max_expand_ratio
Get/set the ratio to expand the image.
float prob
Get/set probability of using this expansion policy.
uint height
Get/set the resizing height.
uint width
Get/set the resizing width.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
Phase
Defines the Phase under which to run a Net.
Definition: Interfaces.cs:61
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe.data namespace contains dataset creators used to create common testing datasets such as M...
Definition: BinaryFile.cs:16
The MyCaffe.param.ssd namespace contains all SSD related parameter objects that correspond to the nat...
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12