MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
CiFar10DataLoader.cs
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.IO;
5using System.IO.Compression;
6using System.Linq;
7using System.Text;
8using System.Threading.Tasks;
9using MyCaffe.basecode;
10using MyCaffe.db.image;
11using System.Drawing;
13using System.Threading;
14
15namespace MyCaffe.data
16{
23 public class CiFar10DataLoader
24 {
25 List<SimpleDatum> m_rgImg = new List<SimpleDatum>();
27 DatasetFactory m_factory = new DatasetFactory();
28 CancelEvent m_evtCancel;
29 Log m_log;
30
34 public event EventHandler<ProgressArgs> OnProgress;
38 public event EventHandler<ProgressArgs> OnError;
42 public event EventHandler OnCompleted;
43
51 {
52 m_param = param;
53 m_log = log;
54 m_evtCancel = evtCancel;
55 m_evtCancel.Reset();
56 }
57
58 private string dataset_name
59 {
60 get { return "CIFAR-10"; }
61 }
62
68 public bool LoadDatabase(int nCreatorID = 0)
69 {
70 try
71 {
72 int nIdx = 0;
73 int nTotal = 50000;
74
75 reportProgress(nIdx, 0, "Loading database " + dataset_name + "...");
76
77 DatasetFactory factory = new DatasetFactory();
78
79 string strTrainSrc = dataset_name + ".training";
80 int nSrcId = factory.GetSourceID(strTrainSrc);
81 if (nSrcId != 0)
82 factory.DeleteSourceData(nSrcId);
83
84 if (!loadFile(m_param.DataBatchFile1, strTrainSrc, nTotal, ref nIdx, m_log))
85 return false;
86
87 if (!loadFile(m_param.DataBatchFile2, strTrainSrc, nTotal, ref nIdx, m_log))
88 return false;
89
90 if (!loadFile(m_param.DataBatchFile3, strTrainSrc, nTotal, ref nIdx, m_log))
91 return false;
92
93 if (!loadFile(m_param.DataBatchFile4, strTrainSrc, nTotal, ref nIdx, m_log))
94 return false;
95
96 if (!loadFile(m_param.DataBatchFile5, strTrainSrc, nTotal, ref nIdx, m_log))
97 return false;
98
99 SourceDescriptor srcTrain = factory.LoadSource(strTrainSrc);
100 m_factory.SaveImageMean(SimpleDatum.CalculateMean(m_log, m_rgImg.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }), true, srcTrain.ID);
101
102 m_rgImg = new List<SimpleDatum>();
103 nIdx = 0;
104 nTotal = 10000;
105
106 string strTestSrc = dataset_name + ".testing";
107 nSrcId = factory.GetSourceID(strTestSrc);
108 if (nSrcId != 0)
109 factory.DeleteSourceData(nSrcId);
110
111 if (!loadFile(m_param.TestBatchFile, strTestSrc, nTotal, ref nIdx, m_log))
112 return false;
113
114 SourceDescriptor srcTest = factory.LoadSource(strTestSrc);
115 m_factory.SaveImageMean(SimpleDatum.CalculateMean(m_log, m_rgImg.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }), true, srcTest.ID);
116
117 DatasetDescriptor ds = new DatasetDescriptor(nCreatorID, dataset_name, null, null, srcTrain, srcTest, dataset_name, dataset_name + " Dataset");
118 factory.AddDataset(ds);
119 factory.UpdateDatasetCounts(ds.ID);
120
121 return true;
122 }
123 catch (Exception excpt)
124 {
125 throw excpt;
126 }
127 finally
128 {
129 if (OnCompleted != null)
130 OnCompleted(this, new EventArgs());
131 }
132 }
133
134 private void Log_OnWriteLine(object sender, LogArg e)
135 {
136 reportProgress((int)(e.Progress * 1000), 1000, e.Message);
137 }
138
139 private bool loadFile(string strImagesFile, string strSourceName, int nTotal, ref int nIdx, Log log)
140 {
141 Stopwatch sw = new Stopwatch();
142 int nStart = nIdx;
143
144 reportProgress(nIdx, nTotal, " Source: " + strSourceName);
145 reportProgress(nIdx, nTotal, " loading " + strImagesFile + "...");
146
147 FileStream fs = null;
148
149 try
150 {
151 fs = new FileStream(strImagesFile, FileMode.Open, FileAccess.Read);
152 using (BinaryReader br = new BinaryReader(fs))
153 {
154 fs = null;
155
156 int nSrcId = m_factory.AddSource(strSourceName, 3, 32, 32, false, 0, true);
157
158 m_factory.Open(nSrcId, 500, Database.FORCE_LOAD.NONE, log);
159 if (nIdx == 0)
160 m_factory.DeleteSourceData();
161
162 sw.Start();
163
164 for (int i = 0; i < 10000; i++)
165 {
166 int nLabel = (int)br.ReadByte();
167 byte[] rgImgBytes = br.ReadBytes(3072);
168 Bitmap img = createImage(rgImgBytes);
169
170 Datum d = ImageData.GetImageDataD(img, 3, false, nLabel);
171
172 m_factory.PutRawImageCache(nIdx, d, 5);
173 m_rgImg.Add(new SimpleDatum(d));
174
175 nIdx++;
176
177 if (sw.ElapsedMilliseconds > 1000)
178 {
179 reportProgress(nStart + i, nTotal, "loading " + strImagesFile + " " + i.ToString("N0") + " of 10,000...");
180 sw.Restart();
181 }
182
183 if (m_evtCancel.WaitOne(0))
184 return false;
185 }
186
187 m_factory.ClearImageCache(true);
188
189 if (nIdx == nTotal)
190 m_factory.UpdateSourceCounts();
191 }
192 }
193 finally
194 {
195 if (fs != null)
196 fs.Dispose();
197 }
198
199 return true;
200 }
201
202 private Bitmap createImage(byte[] rgImg)
203 {
204 int nRoffset = 0;
205 int nGoffset = 1024;
206 int nBoffset = 2048;
207 int nX = 0;
208 int nY = 0;
209
210 Bitmap bmp = new Bitmap(32, 32);
211
212 for (int i = 0; i < 1024; i++)
213 {
214 byte bR = rgImg[nRoffset + i];
215 byte bG = rgImg[nGoffset + i];
216 byte bB = rgImg[nBoffset + i];
217 Color clr = Color.FromArgb(bR, bG, bB);
218
219 bmp.SetPixel(nX, nY, clr);
220
221 nX++;
222
223 if (nX == 32)
224 {
225 nY++;
226 nX = 0;
227 }
228 }
229
230 return bmp;
231 }
232
233 private void reportProgress(int nIdx, int nTotal, string strMsg)
234 {
235 if (OnProgress != null)
236 OnProgress(this, new ProgressArgs(new ProgressInfo(nIdx, nTotal, strMsg)));
237 }
238
239 private void reportError(int nIdx, int nTotal, Exception err)
240 {
241 if (OnError != null)
242 OnError(this, new ProgressArgs(new ProgressInfo(nIdx, nTotal, "ERROR", err)));
243 }
244 }
245}
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
Definition: CancelEvent.cs:17
void Reset()
Resets the event clearing any signaled state.
Definition: CancelEvent.cs:279
bool WaitOne(int nMs=int.MaxValue)
Waits for the signal state to occur.
Definition: CancelEvent.cs:290
The Datum class is a simple wrapper to the SimpleDatum class to ensure compatibility with the origina...
Definition: Datum.cs:12
The ImageData class is a helper class used to convert between Datum, other raw data,...
Definition: ImageData.cs:14
static Datum GetImageDataD(Bitmap bmp, int nChannels, bool bDataIsReal, int nLabel, bool bUseLockBitmap=true, int[] rgFocusMap=null)
The GetImageDataD function converts a Bitmap into a Datum using the double type for real data.
Definition: ImageData.cs:44
The LogArg is passed as an argument to the Log::OnWriteLine event.
Definition: EventArgs.cs:53
string Message
Returns the message logged.
Definition: EventArgs.cs:101
The Log class provides general output in text form.
Definition: Log.cs:13
double Progress
Returns the progress value.
Definition: EventArgs.cs:44
The SimpleDatum class holds a data input within host memory.
Definition: SimpleDatum.cs:161
static SimpleDatum CalculateMean(Log log, SimpleDatum[] rgImg, WaitHandle[] rgAbort)
Calculate the mean of an array of SimpleDatum and return the mean as a new SimpleDatum.
int ID
Get/set the database ID of the item.
The DatasetDescriptor class describes a dataset which contains both a training data source and testin...
The SourceDescriptor class contains all information describing a data source.
The CiFar10DataLoader is used to create the CIFAR-10 dataset and load it into the database managed by...
EventHandler< ProgressArgs > OnError
The OnError event fires when an error occurs.
bool LoadDatabase(int nCreatorID=0)
Create the dataset and load it into the database.
CiFar10DataLoader(CiFar10DataParameters param, Log log, CancelEvent evtCancel)
The constructor.
EventHandler OnCompleted
The OnComplete event fires once the dataset creation has completed.
EventHandler< ProgressArgs > OnProgress
The OnProgress event fires during the creation process to show the progress.
Contains the dataset parameters used to create the CIFAR-10 dataset.
string DataBatchFile2
Specifies the second training dataset file 'data_batch_2.bin'
string TestBatchFile
Specifies the testing dataset file 'test_batch.bin'
string DataBatchFile3
Specifies the third training dataset file 'data_batch_3.bin'
string DataBatchFile1
Specifies the first training dataset file 'data_batch_1.bin'
string DataBatchFile4
Specifies the fourth training dataset file 'data_batch_4.bin'
string DataBatchFile5
Specifies the fifth training dataset file 'data_batch_5.bin'
The Database class manages the actual connection to the physical database using Entity Framworks from...
Definition: Database.cs:23
FORCE_LOAD
Defines the force load type.
Definition: Database.cs:57
The DatasetFactory manages the connection to the Database object.
void PutRawImageCache(int nIdx, SimpleDatum sd, int nBackgroundWritingThreadCount=0, string strDescription=null, bool bActive=true, params ParameterData[] rgParams)
Add a SimpleDatum to the RawImage cache.
int GetSourceID(string strName)
Returns the ID of a data source given its name.
bool SaveImageMean(SimpleDatum sd, bool bUpdate, int nSrcId=0)
Save the SimpleDatum as a RawImageMean in the database.
void DeleteSourceData(int nSrcId=0)
Delete the data source data (images, means, results and parameters) from the database.
int AddSource(SourceDescriptor src, ConnectInfo ci=null, bool? bSaveImagesToFileOverride=null)
Adds a new data source to the database.
void UpdateSourceCounts()
Saves the label cache, updates the label counts from the database and then updates the source counts ...
int AddDataset(DatasetDescriptor ds, ConnectInfo ci=null, bool? bSaveImagesToFileOverride=null)
Adds or updates the training source, testing source, dataset creator and dataset to the database.
void ClearImageCache(bool bSave)
Clear the RawImage cache and optionally save the images.
SourceDescriptor LoadSource(string strSource)
Load the source descriptor from a data source name.
void UpdateDatasetCounts(int nDsId, ConnectInfo ci=null)
Updates the dataset counts, and training/testing source counts.
void Open(SourceDescriptor src, int nCacheMax=500, ConnectInfo ci=null)
Open a given data source.
The descriptors namespace contains all descriptor used to describe various items stored within the da...
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.data namespace contains dataset creators used to create common testing datasets such as M...
Definition: BinaryFile.cs:16
The MyCaffe.db.image namespace contains all image database related classes.
Definition: Database.cs:18
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12