MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
MnistDataLoaderLite.cs
1// Copyright (c) 2018-2020 SignalPop LLC, and contributors. All rights reserved.
2// License: Apache 2.0
3// License: https://github.com/MyCaffe/MyCaffe/blob/master/LICENSE
4// Original Source: https://github.com/MyCaffe/MyCaffe/blob/master/MyCaffe.data/MnistDataLoaderLite.cs
5using System;
6using System.Collections.Generic;
7using System.Diagnostics;
8using System.IO;
9using System.IO.Compression;
10using System.Linq;
11
12namespace MyCaffe.data
13{
21 {
22 bool m_bEnableTrace = false;
23 string m_strDataPath;
24 string m_strTestImagesBin;
25 string m_strTestLabelsBin;
26 string m_strTrainImagesBin;
27 string m_strTrainLabelsBin;
28 int m_nChannels = 1;
29 int m_nHeight = 0;
30 int m_nWidth = 0;
31
35 public event EventHandler<ProgressArgs> OnProgress;
39 public event EventHandler<ProgressArgs> OnError;
40
53 public MnistDataLoaderLite(string strDataPath, bool bEnableTrace = false)
54 {
55 m_bEnableTrace = bEnableTrace;
56 m_strDataPath = strDataPath;
57 }
58
63 {
64 get { return m_strTestImagesBin; }
65 }
66
71 {
72 get { return m_strTestLabelsBin; }
73 }
74
79 {
80 get { return m_strTrainImagesBin; }
81 }
82
87 {
88 get { return m_strTrainLabelsBin; }
89 }
90
94 public int Channels
95 {
96 get { return m_nChannels; }
97 }
98
102 public int Height
103 {
104 get { return m_nHeight; }
105 }
106
110 public int Width
111 {
112 get { return m_nWidth; }
113 }
114
119 public void ExtractFiles(string strDstPath)
120 {
121 Trace.WriteLine("Unpacking the files");
122 m_strTestImagesBin = expandFile(m_strDataPath.TrimEnd('\\') + "\\t10k-images-idx3-ubyte.gz");
123 m_strTestLabelsBin = expandFile(m_strDataPath.TrimEnd('\\') + "\\t10k-labels-idx1-ubyte.gz");
124 m_strTrainImagesBin = expandFile(m_strDataPath.TrimEnd('\\') + "\\train-images-idx3-ubyte.gz");
125 m_strTrainLabelsBin = expandFile(m_strDataPath.TrimEnd('\\') + "\\train-labels-idx1-ubyte.gz");
126 }
127
128 private string expandFile(string strFile)
129 {
130 string strDstFile = strFile + ".bin";
131 if (File.Exists(strDstFile))
132 return strDstFile;
133
134 FileInfo fi = new FileInfo(strFile);
135
136 using (FileStream fs = fi.OpenRead())
137 {
138 using (FileStream fsBin = File.Create(strDstFile))
139 {
140 using (GZipStream decompStrm = new GZipStream(fs, CompressionMode.Decompress))
141 {
142 decompStrm.CopyTo(fsBin);
143 }
144 }
145 }
146
147 return strDstFile;
148 }
149
155 public void ExtractImages(out List<Tuple<byte[], int>> rgTrainingData, out List<Tuple<byte[], int>> rgTestingData)
156 {
157 int nIdx = 0;
158 int nTotal = 0;
159
160 try
161 {
162 ExtractFiles(m_strDataPath);
163
164 reportProgress(nIdx, nTotal, "Creating MNIST images...");
165
166 rgTrainingData = loadFile(m_strTrainImagesBin, m_strTrainLabelsBin, m_strDataPath.TrimEnd('\\') + "\\images_training");
167 rgTestingData = loadFile(m_strTestImagesBin, m_strTestLabelsBin, m_strDataPath.TrimEnd('\\') + "\\images_testing");
168 }
169 catch (Exception excpt)
170 {
171 reportError(0, 0, excpt);
172 throw excpt;
173 }
174 }
175
176 private List<Tuple<byte[], int>> loadFile(string strImagesFile, string strLabelsFile, string strExportPath)
177 {
178 if (!Directory.Exists(strExportPath))
179 Directory.CreateDirectory(strExportPath);
180
181 Stopwatch sw = new Stopwatch();
182
183 reportProgress(0, 0, " loading " + strImagesFile + "...");
184
185 BinaryFile image_file = new BinaryFile(strImagesFile);
186 BinaryFile label_file = new BinaryFile(strLabelsFile);
187 List<Tuple<byte[], int>> rgData = new List<Tuple<byte[], int>>();
188
189 try
190 {
191 // Verify the files
192 uint magicImg = image_file.ReadUInt32();
193 uint magicLbl = label_file.ReadUInt32();
194
195 if (magicImg != 2051)
196 throw new Exception("Incorrect image file magic.");
197
198 if (magicLbl != 2049)
199 throw new Exception("Incorrect label file magic.");
200
201 uint num_items = image_file.ReadUInt32();
202 uint num_labels = label_file.ReadUInt32();
203
204 if (num_items != num_labels)
205 throw new Exception("The number of items must be equal to the number of labels!");
206
207
208 // Add the data source to the database.
209 uint rows = image_file.ReadUInt32();
210 uint cols = image_file.ReadUInt32();
211
212 m_nHeight = (int)rows;
213 m_nWidth = (int)cols;
214
215 // Storing to database;
216 byte[] rgLabel;
217 byte[] rgPixels;
218
219 string strAction = "loading";
220
221 reportProgress(0, (int)num_items, " " + strAction + " a total of " + num_items.ToString() + " items.");
222 reportProgress(0, (int)num_items, " (with rows: " + rows.ToString() + ", cols: " + cols.ToString() + ")");
223
224 sw.Start();
225
226 for (int i = 0; i < num_items; i++)
227 {
228 rgPixels = image_file.ReadBytes((int)(rows * cols));
229 rgLabel = label_file.ReadBytes(1);
230
231 rgData.Add(new Tuple<byte[], int>(rgPixels, rgLabel[0]));
232
233 if (sw.Elapsed.TotalMilliseconds > 1000)
234 {
235 reportProgress(i, (int)num_items, " " + strAction + " data...");
236 sw.Restart();
237 }
238 }
239
240 reportProgress((int)num_items, (int)num_items, " " + strAction + " completed.");
241 }
242 finally
243 {
244 image_file.Dispose();
245 label_file.Dispose();
246 }
247
248 return rgData;
249 }
250
251 private void reportProgress(int nIdx, int nTotal, string strMsg)
252 {
253 if (m_bEnableTrace)
254 {
255 double dfPct = (nTotal == 0) ? 0 : (double)nIdx / (double)nTotal;
256 Trace.WriteLine("(" + dfPct.ToString("P") + ") " + strMsg);
257 }
258
259 if (OnProgress != null)
260 OnProgress(this, new ProgressArgs(new ProgressInfo(nIdx, nTotal, strMsg)));
261 }
262
263 private void reportError(int nIdx, int nTotal, Exception err)
264 {
265 if (m_bEnableTrace)
266 {
267 double dfPct = (nTotal == 0) ? 0 : (double)nIdx / (double)nTotal;
268 Trace.WriteLine("(" + dfPct.ToString("P") + ") ERROR: " + err.Message);
269 }
270
271 if (OnError != null)
272 OnError(this, new ProgressArgs(new ProgressInfo(nIdx, nTotal, "ERROR", err)));
273 }
274 }
275}
The BinaryFile class is used to manage binary files used by the MNIST dataset creator.
Definition: BinaryFile.cs:21
UInt32 ReadUInt32()
Reads in a UINT32 and performs an endian swap.
Definition: BinaryFile.cs:89
byte[] ReadBytes(int nCount)
Reads bytes from the file.
Definition: BinaryFile.cs:101
virtual void Dispose(bool disposing)
Dispose if disposing, or ignore if already disposed.
Definition: BinaryFile.cs:51
The MnistDataLoader is used to extrac the MNIST dataset to disk and load the data into the training p...
int Channels
Return the image channel count (should = 1 for black and white images).
void ExtractImages(out List< Tuple< byte[], int > > rgTrainingData, out List< Tuple< byte[], int > > rgTestingData)
Extract the images from the .bin files and save to disk
void ExtractFiles(string strDstPath)
Extract the .gz files, expanding them to .bin files.
int Width
Return the image with.
MnistDataLoaderLite(string strDataPath, bool bEnableTrace=false)
The constructor.
string TrainLabelsBinFileName
Returns the train labels bin filename.
EventHandler< ProgressArgs > OnProgress
The OnProgress event fires during the creation process to show the progress.
string TrainImagesBinFileName
Returns the train images bin filename.
string TestLabelsBinFileName
Returns the test labels bin filename.
EventHandler< ProgressArgs > OnError
The OnError event fires when an error occurs.
int Height
Return the image height.
string TestImagesBinFileName
Returns the test images bin filename.
The MyCaffe.data namespace contains dataset creators used to create common testing datasets such as M...
Definition: BinaryFile.cs:16