MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
DatasetEx.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading;
6using MyCaffe.basecode;
7using System.Diagnostics;
9
10namespace MyCaffe.db.image
11{
16 public class DatasetEx : IDisposable
17 {
18 CryptoRandom m_random = null;
19 object m_syncObj = new object();
20 DatasetFactory m_factory = null;
21 DatasetDescriptor m_ds = null;
22 ImageSet m_TestingImages = null;
23 ImageSet m_TrainingImages = null;
24 bool m_bUseTrainingImagesForTesting = false;
25 int m_nLastTestingImageIdx = 0;
26 int m_nLastTrainingImageIdx = 0;
27 List<Guid> m_rgUsers = new List<Guid>();
28 int m_nOriginalDsId = 0;
29
33 public event EventHandler<CalculateImageMeanArgs> OnCalculateImageMean;
34
41 public DatasetEx(Guid user, DatasetFactory factory, CryptoRandom random)
42 {
43 m_random = random;
44
45 if (user != Guid.Empty)
46 m_rgUsers.Add(user);
47
48 m_factory = new DatasetFactory(factory);
49 }
50
54 public void Reset()
55 {
56 m_TestingImages.Reset();
57 m_TrainingImages.Reset();
58 }
59
65 public int AddUser(Guid user)
66 {
67 m_rgUsers.Add(user);
68 return m_rgUsers.Count;
69 }
70
76 public int RemoveUser(Guid user)
77 {
78 m_rgUsers.Remove(user);
79 return m_rgUsers.Count;
80 }
81
94 public bool Initialize(DatasetDescriptor ds, WaitHandle[] rgAbort, int nPadW = 0, int nPadH = 0, Log log = null, DB_LOAD_METHOD loadMethod = DB_LOAD_METHOD.LOAD_ALL, int nImageDbLoadLimit = 0, bool bSkipMeanCheck = false)
95 {
96 lock (m_syncObj)
97 {
98 if (loadMethod != DB_LOAD_METHOD.LOAD_ALL && nImageDbLoadLimit > 0)
99 throw new Exception("Currently the load-limit only works with the LOAD_ALLL image loading method.");
100
101 SimpleDatum imgMean = null;
102
103 if (ds != null)
104 m_ds = ds;
105
106 if (m_ds.TrainingSource.Width == -1 || m_ds.TrainingSource.Height == -1)
107 {
108 log.WriteLine("WARNING: Cannot create a mean image for data sources that contain variable sized images. The mean check will be skipped.");
109 bSkipMeanCheck = true;
110 }
111
112 m_TrainingImages = loadImageset("Training", m_ds.TrainingSource, rgAbort, ref imgMean, out m_nLastTrainingImageIdx, nPadW, nPadH, log, loadMethod, nImageDbLoadLimit, m_nLastTrainingImageIdx, (ds == null) ? true : false, bSkipMeanCheck);
113 if (m_nLastTrainingImageIdx >= m_ds.TrainingSource.ImageCount)
114 m_nLastTrainingImageIdx = 0;
115
116 if (EventWaitHandle.WaitAny(rgAbort, 0) != EventWaitHandle.WaitTimeout)
117 return false;
118
119 m_TestingImages = loadImageset("Testing", m_ds.TestingSource, rgAbort, ref imgMean, out m_nLastTestingImageIdx, nPadW, nPadH, log, loadMethod, nImageDbLoadLimit, m_nLastTestingImageIdx, (ds == null) ? true : false, bSkipMeanCheck);
120 if (m_nLastTestingImageIdx >= m_ds.TestingSource.ImageCount)
121 m_nLastTestingImageIdx = 0;
122
123 if (EventWaitHandle.WaitAny(rgAbort, 0) != EventWaitHandle.WaitTimeout)
124 return false;
125
126 return true;
127 }
128 }
129
135 public DatasetEx Clone(bool bReOrganizeByTime = false)
136 {
137 DatasetEx ds = new DatasetEx(Guid.Empty, m_factory, m_random);
138
139 foreach (Guid g in m_rgUsers)
140 {
141 ds.m_rgUsers.Add(g);
142 }
143
144 ds.m_ds = new DatasetDescriptor(m_ds);
145 ds.m_TestingImages = m_TestingImages.Clone();
146 ds.m_TrainingImages = m_TrainingImages.Clone();
147 ds.m_bUseTrainingImagesForTesting = m_bUseTrainingImagesForTesting;
148
149 if (bReOrganizeByTime)
150 {
151 ds.m_nOriginalDsId = ds.DatasetID;
152 ds.DatasetID *= -1;
153
154 int nTestingCount = ds.m_TestingImages.Count;
155 int nTrainingCount = ds.m_TrainingImages.Count;
156
157 List<SimpleDatum> rgSd = new List<SimpleDatum>();
158
159 rgSd.AddRange(ds.m_TestingImages.Images);
160 rgSd.AddRange(ds.m_TrainingImages.Images);
161
162 int nCount = rgSd.Where(p => p != null).Count();
163 if (nCount == 0)
164 throw new Exception("You must first load 'all' images with LOAD_ALL or LOAD_FROM_SERVICE!");
165
166 rgSd = rgSd.OrderBy(p => p.Description).ThenBy(p => p.TimeStamp).ToList();
167
168 for (int i = 0; i < nTrainingCount; i++)
169 {
170 ds.m_TrainingImages.Images[i] = rgSd[i];
171 ds.m_TrainingImages.Images[i].Index = i;
172 }
173
174 for (int i = 0; i < nTestingCount; i++)
175 {
176 ds.m_TestingImages.Images[i] = rgSd[i + nTrainingCount];
177 ds.m_TestingImages.Images[i].Index = i;
178 }
179
180 ds.m_TrainingImages.Source.ID *= -1;
181 ds.m_TestingImages.Source.ID *= -1;
182 ds.Descriptor.TrainingSource.ID *= -1;
183 ds.Descriptor.TestingSource.ID *= -1;
184
185 ds.m_TrainingImages.ReloadLabelSets();
186 ds.m_TestingImages.ReloadLabelSets();
187 }
188
189 return ds;
190 }
191
197 {
198 m_TestingImages.Relabel(col);
199 m_TrainingImages.Relabel(col);
200 }
201
205 public void ReloadLabelSets()
206 {
207 m_TrainingImages.ReloadLabelSets();
208 m_TestingImages.ReloadLabelSets();
209 }
210
215 {
216 get { return m_bUseTrainingImagesForTesting; }
217 set { m_bUseTrainingImagesForTesting = value; }
218 }
219
220 private ImageSet loadImageset(string strType, SourceDescriptor src, WaitHandle[] rgAbort, ref SimpleDatum imgMean, out int nLastImageIdx, int nPadW = 0, int nPadH = 0, Log log = null, DB_LOAD_METHOD loadMethod = DB_LOAD_METHOD.LOAD_ALL, int nImageDbLoadLimit = 0, int nImageDbLoadLimitStartIdx = 0, bool bLoadNext = false, bool bSkipMeanCheck = false)
221 {
222 try
223 {
224 RawImageMean imgMeanRaw = null;
225
226 m_factory.Open(src);
227 nLastImageIdx = nImageDbLoadLimitStartIdx;
228
229 if (loadMethod != DB_LOAD_METHOD.LOAD_ALL && !bSkipMeanCheck)
230 {
231 if (imgMean == null)
232 {
233 imgMeanRaw = m_factory.GetRawImageMean();
234 if (imgMeanRaw == null)
235 {
236 if (log != null)
237 log.WriteLine("WARNING: No image mean exists in the database, changing image database load from " + loadMethod.ToString() + " to " + DB_LOAD_METHOD.LOAD_ALL.ToString());
238
239 loadMethod = DB_LOAD_METHOD.LOAD_ALL;
240 }
241 }
242 }
243
244 int nCount = src.ImageCount;
245 if (nCount == 0)
246 {
247 if (log != null)
248 log.WriteLine("WARNING: Could not find any images with " + strType + " Source = '" + src.Name + "'. If this is a training dataset, you will need to enable the 'UseTrainingSrcForTesting' setting.");
249
250 if (loadMethod != DB_LOAD_METHOD.LOAD_ON_DEMAND && loadMethod != DB_LOAD_METHOD.LOAD_ON_DEMAND_NOCACHE)
251 {
252 if (log != null)
253 log.WriteLine("Because there are no images in this set, the image loading method has been changed to LOAD_ON_DEMAND for this dataset.");
254
255 loadMethod = DB_LOAD_METHOD.LOAD_ON_DEMAND;
256 }
257 }
258
259 ImageSet imgset = new ImageSet(m_factory, src, loadMethod, nImageDbLoadLimit, m_random);
260
261 if (log != null && nCount > 0)
262 log.WriteLine("Loading '" + src.Name + "' - " + nCount.ToString("N0") + " images.");
263
264 if (OnCalculateImageMean != null)
265 imgset.OnCalculateImageMean += OnCalculateImageMean;
266
267 if (loadMethod != DB_LOAD_METHOD.LOAD_ON_DEMAND && loadMethod != DB_LOAD_METHOD.LOAD_ON_DEMAND_NOCACHE)
268 {
269 bool bDataIsReal = src.IsRealData;
270 int nBatchSize = 20000;
271 Stopwatch sw = new Stopwatch();
272
273 int nImageSize = src.Height * src.Width;
274 if (nImageSize > 60000)
275 nBatchSize = 5000;
276 else if (nBatchSize > 20000)
277 nBatchSize = 7500;
278 else if (nImageSize > 3000)
279 nBatchSize = 10000;
280
281 if (nImageDbLoadLimit <= 0)
282 nImageDbLoadLimit = nCount;
283
284 List<int> rgIdx = (nCount == 0) ? new List<int>() : getIndexList(nImageDbLoadLimitStartIdx, nImageDbLoadLimit);
285 int nIdx = 0;
286
287 sw.Start();
288
289 while (nIdx < rgIdx.Count)
290 {
291 int nImageIdx = rgIdx[nIdx];
292 int nImageCount = Math.Min(rgIdx.Count - nIdx, nBatchSize);
293
294 List<RawImage> rgImg = m_factory.GetRawImagesAt(nImageIdx, nImageCount);
295
296 for (int j = 0; j < rgImg.Count; j++)
297 {
298 SimpleDatum sd1 = m_factory.LoadDatum(rgImg[j], nPadW, nPadH);
299 imgset.Add(nIdx + j, sd1);
300
301 if (sw.Elapsed.TotalMilliseconds > 1000)
302 {
303 if (log != null)
304 {
305 double dfPct = (double)(nIdx + j) / (double)nCount;
306 log.Progress = dfPct;
307 log.WriteLine("image loading at " + dfPct.ToString("P") + "...");
308 }
309
310 sw.Restart();
311
312 if (EventWaitHandle.WaitAny(rgAbort, 0) != EventWaitHandle.WaitTimeout)
313 return null;
314 }
315 }
316
317 nIdx += rgImg.Count;
318
319 if (loadMethod == DB_LOAD_METHOD.LOAD_ALL && rgImg.Count == 0 && nIdx < nCount)
320 {
321 log.WriteLine("WARNING: Loaded " + nIdx.ToString("N0") + " images, yet " + (nCount - nIdx).ToString("N0") + " images are unaccounted for. You may need to reindex the dataset.");
322 break;
323 }
324 }
325
326 if (log != null)
327 log.Progress = 0;
328
329 if (rgIdx.Count > 0)
330 nLastImageIdx = rgIdx[rgIdx.Count - 1] + 1;
331 }
332 else if (bLoadNext)
333 {
334 nLastImageIdx += nImageDbLoadLimit;
335 }
336
337 if (imgMean == null && !bSkipMeanCheck)
338 {
339 if (imgMeanRaw == null)
340 imgMeanRaw = m_factory.GetRawImageMean();
341
342 if (imgMeanRaw != null)
343 imgMean = m_factory.LoadDatum(imgMeanRaw, nPadW, nPadH);
344 else
345 {
346 if (log != null)
347 log.WriteLine("Calculating mean...");
348
349 imgMean = imgset.GetImageMean(log, rgAbort);
350 m_factory.PutRawImageMean(imgMean, true);
351 }
352 }
353
354 if (imgMean != null)
355 imgset.SetImageMean(imgMean);
356
357 imgset.CompleteLoad(nLastImageIdx);
358
359 return imgset;
360 }
361 finally
362 {
363 m_factory.Close();
364 }
365 }
366
367 private List<int> getIndexList(int nStartIdx, int nCount)
368 {
369 List<int> rgIdx = new List<int>();
370
371 for (int i = 0; i < nCount; i++)
372 {
373 rgIdx.Add(nStartIdx + i);
374 }
375
376 return rgIdx;
377 }
378
386 public bool SaveImageMean(int nSrcId, SimpleDatum sd, bool bUpdate)
387 {
388 if (m_TestingImages.SourceID != nSrcId &&
389 m_TrainingImages.SourceID != nSrcId)
390 return false;
391
392 return m_factory.SaveImageMean(sd, bUpdate, nSrcId);
393 }
394
400 public SimpleDatum QueryImageMean(int nSrcId)
401 {
402 if (m_TestingImages.SourceID != nSrcId &&
403 m_TrainingImages.SourceID != nSrcId)
404 return null;
405
406 return m_factory.QueryImageMean(nSrcId);
407 }
408
412 public void Unload()
413 {
414 lock (m_syncObj)
415 {
416 m_TestingImages.Unload();
417 m_TrainingImages.Unload();
418 }
419 }
420
427 public double GetPercentageLoaded(out double dfTraining, out double dfTesting)
428 {
429 int nTrainingTotal = m_TrainingImages.GetTotalCount();
430 int nTrainingLoaded = m_TrainingImages.GetLoadedCount();
431 int nTestingTotal = m_TestingImages.GetTotalCount();
432 int nTestingLoaded = m_TestingImages.GetLoadedCount();
433
434 dfTraining = (double)nTrainingLoaded / (double)nTrainingTotal;
435 dfTesting = (double)nTestingLoaded / (double)nTestingTotal;
436
437 int nTotalLoaded = nTrainingLoaded + nTestingLoaded;
438 int nTotalImages = nTrainingTotal + nTestingTotal;
439
440 return (double)nTotalLoaded / (double)nTotalImages;
441 }
442
447 protected virtual void Dispose(bool bDisposing)
448 {
449 m_ds = null;
450
451 if (m_TestingImages != null)
452 {
453 m_TestingImages.Dispose();
454 m_TestingImages = null;
455 }
456
457 if (m_TrainingImages != null)
458 {
459 m_TrainingImages.Dispose();
460 m_TrainingImages = null;
461 }
462
463 if (m_factory != null)
464 {
465 m_factory.Dispose();
466 m_factory = null;
467 }
468 }
469
473 public void Dispose()
474 {
475 Dispose(true);
476 }
477
483 public ImageSet Find(int nSourceID)
484 {
485 if (m_TestingImages.SourceID == nSourceID)
486 {
487 if (m_bUseTrainingImagesForTesting)
488 return m_TrainingImages;
489
490 return m_TestingImages;
491 }
492
493 if (m_TrainingImages.SourceID == nSourceID)
494 {
495 return m_TrainingImages;
496 }
497
498 return null;
499 }
500
506 public ImageSet Find(string strSource)
507 {
508 if (m_TestingImages.SourceName == strSource)
509 {
510 if (m_bUseTrainingImagesForTesting)
511 return m_TrainingImages;
512
513 return m_TestingImages;
514 }
515
516 if (m_TrainingImages.SourceName == strSource)
517 {
518 return m_TrainingImages;
519 }
520
521 return null;
522 }
523
528 {
529 get { return m_ds; }
530 }
531
535 public int DatasetID
536 {
537 get { return m_ds.ID; }
538 set { m_ds.ID = value; }
539 }
540
545 {
546 get { return m_nOriginalDsId; }
547 }
548
552 public string DatasetName
553 {
554 get { return m_ds.Name; }
555 }
556 }
557}
The CryptoRandom is a random number generator that can use either the standard .Net Random objec or t...
Definition: CryptoRandom.cs:14
The LabelMappingCollection manages a collection of LabelMapping's.
Definition: LabelMapping.cs:15
The Log class provides general output in text form.
Definition: Log.cs:13
The SimpleDatum class holds a data input within host memory.
Definition: SimpleDatum.cs:161
SimpleDatum Add(SimpleDatum d)
Creates a new SimpleDatum and adds another SimpleDatum to it.
int ID
Get/set the database ID of the item.
string Name
Get/set the name of the item.
The DatasetDescriptor class describes a dataset which contains both a training data source and testin...
SourceDescriptor TrainingSource
Get/set the training data source.
SourceDescriptor TestingSource
Get/set the testing data source.
The SourceDescriptor class contains all information describing a data source.
bool IsRealData
Returns whether or not the each data point represents a real or integer number. Integer numbers are u...
int Height
Returns the height of each data item in the data source.
int Width
Returns the width of each data item in the data source.
int ImageCount
Returns the number of images within this data source.
The DatasetEx class provides the in-memory dataset functionality that is used by the image database t...
Definition: DatasetEx.cs:17
int DatasetID
Returns the dataset ID of the dataset managesd by the DatasetEx object.
Definition: DatasetEx.cs:536
void Dispose()
Releases all resources used.
Definition: DatasetEx.cs:473
void ReloadLabelSets()
Reloads bot the training and testing label sets.
Definition: DatasetEx.cs:205
bool Initialize(DatasetDescriptor ds, WaitHandle[] rgAbort, int nPadW=0, int nPadH=0, Log log=null, DB_LOAD_METHOD loadMethod=DB_LOAD_METHOD.LOAD_ALL, int nImageDbLoadLimit=0, bool bSkipMeanCheck=false)
Initialize the DatasetEx by loading the training and testing data sources into memory.
Definition: DatasetEx.cs:94
ImageSet Find(string strSource)
Returns the ImageSet corresponding to a data source name.
Definition: DatasetEx.cs:506
DatasetEx Clone(bool bReOrganizeByTime=false)
Copy the DatasetEx and its contents.
Definition: DatasetEx.cs:135
DatasetEx(Guid user, DatasetFactory factory, CryptoRandom random)
The DatasetEx constructor.
Definition: DatasetEx.cs:41
string DatasetName
Returns the dataset name of the dataset managesd by the DatasetEx object.
Definition: DatasetEx.cs:553
void Reset()
Reset the image indexes for both the training and testing image sets.
Definition: DatasetEx.cs:54
virtual void Dispose(bool bDisposing)
Releases all resources used.
Definition: DatasetEx.cs:447
SimpleDatum QueryImageMean(int nSrcId)
Query the image mean for a data source.
Definition: DatasetEx.cs:400
int OriginalDatasetID
Returns the original DatsetID if this is a cloned re-organized dataset, otherwise 0 is returned.
Definition: DatasetEx.cs:545
void Unload()
Unload the images of the training and testing image sets.
Definition: DatasetEx.cs:412
ImageSet Find(int nSourceID)
Returns the ImageSet corresponding to a data source ID.
Definition: DatasetEx.cs:483
bool UseTrainingImagesForTesting
Get/set whether or not to use the training images when testing.
Definition: DatasetEx.cs:215
DatasetDescriptor Descriptor
Returns the dataset descriptor of the dataset managesd by the DatasetEx object.
Definition: DatasetEx.cs:528
int RemoveUser(Guid user)
Remove a user of the dataset.
Definition: DatasetEx.cs:76
bool SaveImageMean(int nSrcId, SimpleDatum sd, bool bUpdate)
Saves the image mean in a SimpleDatum to the database.
Definition: DatasetEx.cs:386
EventHandler< CalculateImageMeanArgs > OnCalculateImageMean
The OnCalculateImageMean event is passed to each image set and fires each time the Image set need to ...
Definition: DatasetEx.cs:33
void Relabel(LabelMappingCollection col)
Relabels both the testing and training image sets using the label mapping collection.
Definition: DatasetEx.cs:196
int AddUser(Guid user)
Adds a user of the dataset.
Definition: DatasetEx.cs:65
double GetPercentageLoaded(out double dfTraining, out double dfTesting)
Returns the total percentage of images loaded for testing, training and combined.
Definition: DatasetEx.cs:427
The DatasetFactory manages the connection to the Database object.
SimpleDatum QueryImageMean(int nSrcId=0)
Return the SimpleDatum for the image mean from the open data source.
bool SaveImageMean(SimpleDatum sd, bool bUpdate, int nSrcId=0)
Save the SimpleDatum as a RawImageMean in the database.
RawImageMean GetRawImageMean()
Return the RawImageMean for the open data source.
List< RawImage > GetRawImagesAt(int nImageIdx, int nImageCount, int nSrcId=0, string strDescription=null)
Returns a list of RawImages from the database for a data source.
int PutRawImageMean(SimpleDatum sd, bool bUpdate, ConnectInfo ci=null)
Save the SimpleDatum as a RawImageMean in the database for the open data source.
void Close()
Close the current data source used.
void Dispose()
Releases all resources used.
void Open(SourceDescriptor src, int nCacheMax=500, ConnectInfo ci=null)
Open a given data source.
SimpleDatum LoadDatum(int nImageId, int nChannels, bool bDataIsReal, int nLabel, int nSrcId=0)
Loads a new SimpleDataum from a RawImage ID.
The ImageSet class contains the list of image for a data source as well as a list of LabelSets that m...
Definition: Imageset.cs:17
ImageSet Clone()
Returns a copy of the ImageSet.
Definition: Imageset.cs:220
string SourceName
Returns the data source name of the image set.
Definition: Imageset.cs:410
virtual void Dispose(bool bDisposing)
Releases the resouces used.
Definition: Imageset.cs:89
int GetLoadedCount()
Returns the number of images loaded.
Definition: Imageset.cs:885
int SourceID
Returns the data source ID of the image set.
Definition: Imageset.cs:402
void Relabel(LabelMappingCollection col)
Applies the label mapping to the image set.
Definition: Imageset.cs:206
void Reset()
Resets the indexes and limited loaded images (if used).
Definition: Imageset.cs:294
int Count
Returns the number of images in the image set.
Definition: Imageset.cs:426
void ReloadLabelSets()
Reload the label sets.
Definition: Imageset.cs:343
void Unload()
Unload all images in the image set.
Definition: Imageset.cs:863
int GetTotalCount()
Returns the total number of images.
Definition: Imageset.cs:894
The descriptors namespace contains all descriptor used to describe various items stored within the da...
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
DB_LOAD_METHOD
Defines how to laod the items into the in-memory database.
Definition: Interfaces.cs:154
The MyCaffe.db.image namespace contains all image database related classes.
Definition: Database.cs:18
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12