MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
HDF5DataLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using MyCaffe.basecode;
6using MyCaffe.common;
7using MyCaffe.param;
8using MyCaffe.fillers;
9using System.IO;
10
11namespace MyCaffe.layers.hdf5
12{
18 public class HDF5DataLayer<T> : Layer<T>
19 {
20 List<string> m_rgstrFileNames = new List<string>();
21 int m_nNumFiles = 0;
22 int m_nCurrentFile = 0;
23 int m_nCurrentRow = 0;
24 BlobCollection<T> m_colHdfBlobs = new BlobCollection<T>();
25 List<int> m_rgDataPermutation = new List<int>();
26 List<int> m_rgFilePermutation = new List<int>();
27 ulong m_lOffset = 0;
28
45 : base(cuda, log, p)
46 {
48 }
49
53 protected override void dispose()
54 {
55 m_colHdfBlobs.Dispose();
56 base.dispose();
57 }
58
63 protected virtual void LoadHDF5FileData(string strFile)
64 {
65 m_log.WriteLine("Loading HDF5 file: '" + strFile + "'");
66 HDF5<T> hdf5 = new HDF5<T>(m_cuda, m_log, strFile);
67
68 int nTopCount = m_param.top.Count;
69 for (int i = 0; i < nTopCount; i++)
70 {
71 // Allow reshape here, as we are loading data not params.
72 Blob<T> blob = null;
73
74 if (m_colHdfBlobs.Count < nTopCount)
75 {
76 blob = new Blob<T>(m_cuda, m_log, false);
77 m_colHdfBlobs.Add(blob);
78 }
79 else
80 {
81 blob = m_colHdfBlobs[i];
82 }
83
84 blob.Name = m_param.top[i];
85
86 hdf5.load_nd_dataset(blob, m_param.top[i], true);
87 }
88
89 hdf5.Dispose();
90
91 // MinTopBlobs=1 guarantees at least one top blob
92 m_log.CHECK_GE(m_colHdfBlobs[0].num_axes, 1, "Input must have at least 1 axis.");
93 int nNum = m_colHdfBlobs[0].shape(0);
94
95 for (int i = 1; i < nTopCount; i++)
96 {
97 m_log.CHECK_EQ(m_colHdfBlobs[i].shape(0), nNum, "The 'num' on all blobs must be equal.");
98 }
99
100 // Default to identity permutation.
101 m_rgDataPermutation = new List<int>();
102 for (int i = 0; i < nNum; i++)
103 {
104 m_rgDataPermutation.Add(i);
105 }
106
107 // Shuffle if needed
109 {
110 m_rgDataPermutation = Utility.RandomShuffle(m_rgDataPermutation);
111 m_log.WriteLine("Successfully loaded " + nNum.ToString() + " rows (shuffled).");
112 }
113 else
114 {
115 m_log.WriteLine("Successfully loaded " + nNum.ToString() + " rows.");
116 }
117 }
118
122 public override int ExactNumBottomBlobs
123 {
124 get { return 0; }
125 }
126
130 public override int MinTopBlobs
131 {
132 get { return 1; }
133 }
134
140 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
141 {
142 // Refuse transformation parameters since HDF5 is totally generic.
143 if (m_param.transform_param != null)
144 m_log.WriteLine("WARNING: " + m_type.ToString() + " does not transform data.");
145
146 // Read the source to parse the filenames.
147 m_log.WriteLine("Loading list of HDF5 file names from: " + m_param.hdf5_data_param.source);
148 m_rgstrFileNames = Utility.LoadTextLines(m_param.hdf5_data_param.source, m_log, true);
149 m_nNumFiles = m_rgstrFileNames.Count;
150 m_nCurrentFile = 0;
151
152 m_log.WriteLine("Number of HDF5 files: " + m_nNumFiles.ToString());
153 m_log.CHECK_GE(m_nNumFiles, 1, "Must have at least one HDF5 filename listed in '" + m_param.hdf5_data_param.source + "'!");
154
155 // Default to identity permutation.
156 m_rgFilePermutation = new List<int>();
157 for (int i = 0; i < m_nNumFiles; i++)
158 {
159 m_rgFilePermutation.Add(i);
160 }
161
162 // Shuffle if needed.
164 m_rgFilePermutation = Utility.RandomShuffle(m_rgFilePermutation);
165
166 // Load the first HDF5 file and initialize the line counter.
167 LoadHDF5FileData(m_rgstrFileNames[m_rgFilePermutation[m_nCurrentFile]]);
168 m_nCurrentRow = 0;
169
170 // Reshape the blobs.
171 int nBatchSize = (int)m_param.hdf5_data_param.batch_size;
172 int nTopSize = m_param.top.Count;
173 List<int> rgTopShape = new List<int>();
174
175 for (int i = 0; i < nTopSize; i++)
176 {
177 rgTopShape = Utility.Clone<int>(m_colHdfBlobs[i].shape());
178 rgTopShape[0] = nBatchSize;
179 colTop[i].Reshape(rgTopShape);
180 }
181 }
182
187 protected bool Skip()
188 {
189 ulong nSize = (ulong)m_param.solver_count;
190 ulong nRank = (ulong)m_param.solver_rank;
191 // In test mode, only rank 0 runs, so avoid skipping.
192 bool bKeep = (m_lOffset % nSize) == nRank || m_param.phase == Phase.TEST;
193
194 return !bKeep;
195 }
196
200 protected void Next()
201 {
202 m_nCurrentRow++;
203
204 if (m_nCurrentRow == m_colHdfBlobs[0].shape(0))
205 {
206 if (m_nNumFiles > 1)
207 {
208 m_nCurrentFile++;
209
210 if (m_nCurrentFile == m_nNumFiles)
211 {
212 m_nCurrentFile = 0;
214 m_rgFilePermutation = Utility.RandomShuffle(m_rgFilePermutation);
215 m_log.WriteLine("Looping around to first file...");
216 }
217
218 LoadHDF5FileData(m_rgstrFileNames[m_rgFilePermutation[m_nCurrentFile]]);
219 }
220
221 m_nCurrentRow = 0;
222
224 m_rgDataPermutation = Utility.RandomShuffle(m_rgDataPermutation);
225 }
226
227 m_lOffset++;
228 }
229
235 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
236 {
237 }
238
247 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
248 {
249 int nBatch = (int)m_param.hdf5_data_param.batch_size;
250
251 for (int i = 0; i < nBatch; i++)
252 {
253 while (Skip())
254 Next();
255
256 for (int j = 0; j < m_param.top.Count; j++)
257 {
258 int nDataDim = colTop[j].count() / colTop[j].shape(0);
259 int nSrcIdx = m_rgDataPermutation[m_nCurrentRow] * nDataDim;
260 int nDstIdx = i * nDataDim;
261 m_cuda.copy(nDataDim, m_colHdfBlobs[j].gpu_data, colTop[j].mutable_gpu_data, nSrcIdx, nDstIdx);
262 }
263
264 Next();
265 }
266 }
267
269 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
270 {
271 }
272 }
273}
The Log class provides general output in text form.
Definition: Log.cs:13
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
Definition: Log.cs:287
The Utility class provides general utility funtions.
Definition: Utility.cs:35
static List< int > RandomShuffle(List< int > rg, int? nSeed=null)
Randomly shuffle the entries in the specified list.
Definition: Utility.cs:1021
static List< string > LoadTextLines(string strFile, Log log=null, bool bPrependPath=true)
Load each line of a text file and return the contents as a list.
Definition: Utility.cs:1054
The BlobCollection contains a list of Blobs.
void Dispose()
Release all resource used by the collection and its Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
The HDF5DataLayer loads data from files in the HDF5 data format. This layer is initialized with the M...
HDF5DataLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The HDF5DataLayer constructor.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Not implemented - data Layers do not perform backward..
override void dispose()
Release all internal blobs.
override int ExactNumBottomBlobs
Returns 0 for data layers have no bottom (input) Blobs.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Data layers have no bottoms, so reshaping is trivial.
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
override int MinTopBlobs
Returns the minimum number of required top (output) Blobs: data
virtual void LoadHDF5FileData(string strFile)
Load the data from an HDF5 file.
bool Skip()
Called to see if the current data item should be skipped.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Run the Forward computation, which fills the data into the top (output) Blobs.
void Next()
Advanced to the next data item.
The HDF5 object provides HDF5 dataset support to the HDF5DataLayer.
Definition: HDF5.cs:33
void Dispose()
Release all resources uses.
Definition: HDF5.cs:325
void load_nd_dataset(Blob< T > blob, string strDatasetName, bool bReshape=false, int nMinDim=1, int nMaxDim=int.MaxValue, H5GroupId id=null, bool bAllowSingleItems=false)
Creates a new dataset from an HDF5 data file.
Definition: HDF5.cs:170
string source
Specifies the data source.
virtual uint batch_size
Specifies the batch size.
bool shuffle
Specifies the whether to shuffle the data or now.
Specifies the base parameter for all layers.
List< string > top
Specifies the active top connections (in the bottom, out the top)
int solver_count
Returns the number of Solvers participating in a multi-GPU session for which the Solver using this La...
int solver_rank
Returns the SolverRank of the Solver using this LayerParameter (if any).
HDF5DataParameter hdf5_data_param
Returns the parameter set when initialized with LayerType.HDF5_DATA
TransformationParameter transform_param
Returns the parameter set when initialized with LayerType.TRANSFORM
Phase phase
Specifies the Phase for which this LayerParameter is run.
LayerType
Specifies the layer type.
override string ToString()
Returns a string representation of the LayerParameter.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
Phase
Defines the Phase under which to run a Net.
Definition: Interfaces.cs:61
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe.fillers namespace contains all fillers including the Filler class.
The MyCaffe.layers.hdf5 namespace contains all HDF5 related layers.
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12