MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
ImageDataLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading;
6using System.Diagnostics;
7using MyCaffe.basecode;
8using MyCaffe.db.image;
9using MyCaffe.param;
10using MyCaffe.common;
11using MyCaffe.data;
12using MyCaffe.fillers;
13using System.IO;
14using System.Drawing;
15
16namespace MyCaffe.layers
17{
24 {
25 CryptoRandom m_random = new CryptoRandom();
26 List<Tuple<string, int>> m_rgLines = new List<Tuple<string, int>>();
27 int m_nLinesId = 0;
31 protected Stopwatch m_swTimerBatch;
35 protected Stopwatch m_swTimerTransaction;
39 protected double m_dfReadTime;
43 protected double m_dfTransTime;
44 private T[] m_rgTopData = null;
45
53 public ImageDataLayer(CudaDnn<T> cuda, Log log, LayerParameter p, CancelEvent evtCancel)
54 : base(cuda, log, p, null, evtCancel)
55 {
56 m_type = LayerParameter.LayerType.IMAGE_DATA;
57 }
58
59
61 protected override void dispose()
62 {
63 base.dispose();
64 }
65
69 public override int ExactNumBottomBlobs
70 {
71 get { return 0; }
72 }
73
77 public override int ExactNumTopBlobs
78 {
79 get { return 2; }
80 }
81
85 protected virtual void shuffleImages()
86 {
87 List<Tuple<string, int>> rgLines = new List<Tuple<string, int>>();
88
89 while (m_rgLines.Count > 0)
90 {
91 int nIdx = m_random.Next(m_rgLines.Count);
92 rgLines.Add(m_rgLines[nIdx]);
93 m_rgLines.RemoveAt(nIdx);
94
95 if (m_rgLines.Count == 1)
96 {
97 rgLines.Add(m_rgLines[0]);
98 m_rgLines.Clear();
99 }
100 }
101
102 m_rgLines = rgLines;
103 }
104
105 private void loadFileList()
106 {
107 if (m_rgLines.Count > 0)
108 return;
109
110 // Read the file with filenames and labels.
111 string strSource = m_param.data_param.source;
112 m_log.WriteLine("INFO: Opening file '" + strSource + "'.");
113
114 using (StreamReader sr = new StreamReader(strSource))
115 {
116 string strLine = sr.ReadLine();
117
118 while (strLine != null)
119 {
120 if (!string.IsNullOrEmpty(strLine))
121 {
122 int nPos = strLine.LastIndexOf(' ');
123 if (nPos < 0)
124 {
125 nPos = strLine.LastIndexOf(',');
126 if (nPos < 0)
127 nPos = strLine.LastIndexOf(';');
128 }
129
130 m_log.CHECK_GT(nPos, 0, "The separator character of ' ' or ',' or ';' could not be found!");
131 string strFile = strLine.Substring(0, nPos);
132 string strLabel = strLine.Substring(nPos + 1);
133 int nLabel = int.Parse(strLabel);
134
135 m_rgLines.Add(new Tuple<string, int>(strFile, nLabel));
136 }
137
138 strLine = sr.ReadLine();
139 }
140 }
141
142 m_log.CHECK_GT(m_rgLines.Count, 0, "The file is empty!");
143 }
144
145 private string getRootFolder()
146 {
147 if (string.IsNullOrEmpty(m_param.image_data_param.root_folder))
148 return "";
149
150 return m_param.image_data_param.root_folder.TrimEnd('\\', '/') + "\\";
151 }
152
157 protected override bool setupSourceDescriptor()
158 {
159 string strRootFolder = getRootFolder();
160 int nH = (int)m_param.image_data_param.new_height;
161 int nW = (int)m_param.image_data_param.new_width;
162 int nC = (m_param.image_data_param.is_color) ? 3 : 1;
163
164 if (nH == 0 && nW == 0)
165 {
166 loadFileList();
167 Datum datum = loadImage(strRootFolder, m_rgLines[0], m_param.image_data_param.is_color, nH, nW);
168 nH = datum.height;
169 nW = datum.width;
170 }
171
172 m_src = new basecode.descriptors.SourceDescriptor(0, "Internal", nW, nH, nC, false, false);
173
174 return true;
175 }
176
182 protected override void DataLayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
183 {
184 int nBatchSize = (int)m_param.data_param.batch_size;
185 int nNewHeight = (int)m_param.image_data_param.new_height;
186 int nNewWidth = (int)m_param.image_data_param.new_width;
187 bool bIsColor = m_param.image_data_param.is_color;
188 string strRootFolder = getRootFolder();
189
190 m_log.CHECK((nNewHeight == 0 && nNewWidth == 0) || (nNewHeight > 0 && nNewWidth > 0), "Current implementation requires new_height and new_width to be set at the same time.");
191
192 // Read the file with filenames and labels.
193 loadFileList();
194
195 // Randomly shuffle the images.
197 {
199 }
200 else if (m_param.image_data_param.rand_skip == 0)
201 {
203 if (layer_param != null && layer_param.solver_rank > 0)
204 m_log.WriteLine("WARNING: Shuffling or skipping recommended for multi-GPU.");
205 }
206
207 m_log.WriteLine("A total of " + m_rgLines.Count.ToString("N0") + " images.");
208
209 m_nLinesId = 0;
210 // Check if we would need to randomly skip a few data points.
212 {
213 int nSkip = m_random.Next((int)m_param.image_data_param.rand_skip);
214 m_log.WriteLine("Skipping first " + nSkip.ToString() + " data points.");
215 m_log.CHECK_GT(m_rgLines.Count, nSkip, "Not enough data points to skip.");
216 m_nLinesId = nSkip;
217 }
218
219 // Read an image and use it to initialize the top blob.
220 Datum datum = loadImage(strRootFolder, m_rgLines[m_nLinesId], bIsColor, nNewHeight, nNewWidth);
221 // Use data_transofrmer to infer the expected blob shape from the image.
222 List<int> rgTopShape = m_transformer.InferBlobShape(datum);
223
224 // Reshape colTop[0] and prefetch data according to the batch size.
225 rgTopShape[0] = nBatchSize;
226 colTop[0].Reshape(rgTopShape);
227
228 for (int i = 0; i < m_rgPrefetch.Length; i++)
229 {
230 m_rgPrefetch[i].Data.Reshape(rgTopShape);
231 }
232
233 m_log.WriteLine("output data size: " + colTop[0].ToSizeString());
234
235 // label.
236 List<int> rgLabelShape = new List<int>() { nBatchSize };
237
238 colTop[1].Reshape(rgLabelShape);
239
240 for (int i = 0; i < m_rgPrefetch.Length; i++)
241 {
242 m_rgPrefetch[i].Label.Reshape(rgLabelShape);
243 }
244 }
245
246 private Datum loadImage(string strRootFolder, Tuple<string, int> item, bool bIsColor, int nNewHeight, int nNewWidth)
247 {
248 string strFile = strRootFolder + item.Item1;
249 int nLabel = m_rgLines[m_nLinesId].Item2;
250 m_log.CHECK(File.Exists(strFile), "Could not find the file '" + strFile + "'!");
251 Bitmap bmp = new Bitmap(strFile);
252
253 // Resize the image if needed.
254 if ((nNewWidth > 0 && nNewHeight > 0) && (bmp.Width != nNewWidth || bmp.Height != nNewHeight))
255 {
256 Bitmap bmpNew = ImageTools.ResizeImage(bmp, nNewWidth, nNewHeight);
257 bmp.Dispose();
258 bmp = bmpNew;
259 }
260
261 Datum data;
262 int nChannels = (bIsColor) ? 3 : 1;
263
264 if (typeof(T) == typeof(double))
265 data = ImageData.GetImageDataD(bmp, nChannels, false, nLabel);
266 else
267 data = ImageData.GetImageDataF(bmp, nChannels, false, nLabel);
268
269 bmp.Dispose();
270
271 return data;
272 }
273
278 protected override void load_batch(Batch<T> batch)
279 {
280 m_log.CHECK(batch.Data.count() > 0, "There is no space allocated for data!");
281 int nBatchSize = (int)m_param.data_param.batch_size;
282 int nNewHeight = (int)m_param.image_data_param.new_height;
283 int nNewWidth = (int)m_param.image_data_param.new_width;
284 bool bIsColor = m_param.image_data_param.is_color;
285 string strRootFolder = getRootFolder();
286
287 T[] rgTopLabel = null;
288 int nCount = batch.Label.count();
289 m_log.CHECK_GT(nCount, 0, "The label count cannot be zero!");
290 rgTopLabel = new T[nCount];
291
293 {
294 m_swTimerBatch.Restart();
295 m_dfReadTime = 0;
296 m_dfTransTime = 0;
297 }
298
299 Datum datum;
300 int nDim = 0;
301
302 for (int i = 0; i < nBatchSize; i++)
303 {
305 m_swTimerTransaction.Restart();
306
307 m_log.CHECK_GT(m_rgLines.Count, m_nLinesId, "The lines ID is too small!");
308 int nIdx = m_nLinesId;
309 if (m_param.data_param.enable_random_selection.GetValueOrDefault(false))
310 nIdx = m_random.Next(m_rgLines.Count);
311 datum = loadImage(strRootFolder, m_rgLines[nIdx], bIsColor, nNewHeight, nNewWidth);
312
314 {
315 m_dfReadTime += m_swTimerTransaction.Elapsed.TotalMilliseconds;
316 m_swTimerTransaction.Restart();
317 }
318
319 if (i == 0)
320 {
321 // Reshape according to the first datum of each batch
322 // on single input batches allows for inputs of varying dimension.
323 // Use data transformer to infer the expected blob shape for datum.
324 List<int> rgTopShape = m_transformer.InferBlobShape(datum);
325
326 // Reshape batch according to the batch size.
327 rgTopShape[0] = nBatchSize;
328 batch.Data.Reshape(rgTopShape);
329
330 nDim = 1;
331 for (int k = 1; k < rgTopShape.Count; k++)
332 {
333 nDim *= rgTopShape[k];
334 }
335
336 int nTopLen = nDim * nBatchSize;
337 if (m_rgTopData == null || m_rgTopData.Length != nTopLen)
338 m_rgTopData = new T[nTopLen];
339 }
340
341 // Apply data transformations (mirrow, scaling, crop, etc)
342 int nDimCount = nDim;
343 T[] rgTrans = m_transformer.Transform(datum);
344 Array.Copy(rgTrans, 0, m_rgTopData, nDim * i, nDimCount);
345
346 rgTopLabel[i] = (T)Convert.ChangeType(datum.Label, typeof(T));
347
349 m_dfTransTime += m_swTimerTransaction.Elapsed.TotalMilliseconds;
350
351 if (m_evtCancel.WaitOne(0))
352 return;
353
354 batch.Data.SetCPUData(m_rgTopData);
355 batch.Label.SetCPUData(rgTopLabel);
356
358 {
359 m_swTimerBatch.Stop();
361 m_log.WriteLine("Prefetch batch: " + m_swTimerBatch.ElapsedMilliseconds.ToString() + " ms.", true);
362 m_log.WriteLine(" Read time: " + m_dfReadTime.ToString() + " ms.", true);
363 m_log.WriteLine("Transform time: " + m_dfTransTime.ToString() + " ms.", true);
364 }
365
366 // Go to the next iter.
367 m_nLinesId++;
368
369 // We have reached the end, restart from the first.
370 if (m_nLinesId == m_rgLines.Count)
371 {
372 m_nLinesId = 0;
375 }
376 }
377 }
378 }
379}
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
Definition: CancelEvent.cs:17
bool WaitOne(int nMs=int.MaxValue)
Waits for the signal state to occur.
Definition: CancelEvent.cs:290
The CryptoRandom is a random number generator that can use either the standard .Net Random objec or t...
Definition: CryptoRandom.cs:14
The Datum class is a simple wrapper to the SimpleDatum class to ensure compatibility with the origina...
Definition: Datum.cs:12
int height
Specifies the height of the data.
Definition: Datum.cs:123
int width
Specifies the width of the data.
Definition: Datum.cs:131
The ImageData class is a helper class used to convert between Datum, other raw data,...
Definition: ImageData.cs:14
static Datum GetImageDataF(Bitmap bmp, int nChannels, bool bDataIsReal, int nLabel, bool bUseLockBitmap=true, int[] rgFocusMap=null)
The GetImageDataF function converts a Bitmap into a Datum using the float type for real data.
Definition: ImageData.cs:181
static Datum GetImageDataD(Bitmap bmp, int nChannels, bool bDataIsReal, int nLabel, bool bUseLockBitmap=true, int[] rgFocusMap=null)
The GetImageDataD function converts a Bitmap into a Datum using the double type for real data.
Definition: ImageData.cs:44
The ImageTools class is a helper class used to manipulate image data.
Definition: ImageTools.cs:16
static Bitmap ResizeImage(Image image, int width, int height)
Resize the image to the specified width and height.
Definition: ImageTools.cs:39
The Log class provides general output in text form.
Definition: Log.cs:13
void CHECK(bool b, string str)
Test a flag for true.
Definition: Log.cs:227
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
void CHECK_GT(double df1, double df2, string str)
Test whether one number is greater than another.
Definition: Log.cs:299
int Label
Return the known label of the data.
The BlobCollection contains a list of Blobs.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
SourceDescriptor m_src
Specifies the SourceDescriptor of the data source.
DataTransformer< T > m_transformer
Specifies the DataTransformer used to transform each data item as it loaded.
The BasePrefetchingDataLayer is the base class for data Layers that pre-fetch data before feeding the...
CancelEvent m_evtCancel
Specifies the cancellation event for the internal thread.
Batch< T >[] m_rgPrefetch
Specifies the pre-fetch cache.
The Batch contains both the data and label Blobs of the batch.
Blob< T > Label
Returns the label Blob of the batch.
Blob< T > Data
Returns the data Blob of the batch.
The ImageDataLayer loads data from the image files located in the root directory specified....
Stopwatch m_swTimerTransaction
Specfies a second timer used to calculate the transaction time.
override int ExactNumTopBlobs
Specifies the exact number of top blobs as 2 for this layer: data, label.
override void dispose()
Releases all GPU and host resources used by the Layer.
override int ExactNumBottomBlobs
No bottom blobs are used by this layer.
virtual void shuffleImages()
Shuffle the images so that they are loaded in a random order.
override bool setupSourceDescriptor()
Allows any derivative classes to pre-initialize the m_src which is used in LayerSetup before the Data...
override void load_batch(Batch< T > batch)
Load a batch of data in the background (this is run on an internal thread within the BasePrefetchingD...
Stopwatch m_swTimerBatch
Specifies a first timer used to calcualte the batch time.
double m_dfTransTime
Specifies the transaction time.
ImageDataLayer(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel)
The ImageDataLayer constructor.
double m_dfReadTime
Specifies the read time.
override void DataLayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the ImageDataLayer by starting up the pre-fetching.
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
LayerParameter layer_param
Returns the LayerParameter for this Layer.
Definition: Layer.cs:899
The LayerParameterEx class is used when sharing another Net to conserve GPU memory and extends the La...
Definition: Layer.cs:1750
virtual uint batch_size
Specifies the batch size.
bool display_timing
(optional, default = false) Specifies whether or not to display the timing of each image read.
bool? enable_random_selection
(optional, default = null) Specifies whether or not to randomly query images from the data source....
string source
When used with the DATA parameter, specifies the data 'source' within the database....
string root_folder
Specifies the folder containing the image files.
bool shuffle
Specifies whether or not the ImageLayer should shuffle the list of files at each epoch.
uint new_width
When > 0, specifies the new width of the images fed into the network (default = 0).
uint new_height
When > 0, specifies the new height of the images fed into the network (default = 0).
uint rand_skip
Specifies the amount for the image data layer to skip a few points to avoid all asynchronous sgd clie...
bool is_color
Specififies whether or not the image is color or gray-scale.
Specifies the base parameter for all layers.
int solver_rank
Returns the SolverRank of the Solver using this LayerParameter (if any).
DataParameter data_param
Returns the parameter set when initialized with LayerType.DATA
LayerType
Specifies the layer type.
ImageDataParameter image_data_param
Returns the parameter set when initialized with LayerType.IMAGE_DATA
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe.data namespace contains dataset creators used to create common testing datasets such as M...
Definition: BinaryFile.cs:16
The MyCaffe.db.image namespace contains all image database related classes.
Definition: Database.cs:18
The MyCaffe.fillers namespace contains all fillers including the Filler class.
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12