MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
WAVReader.cs
1using System;
2using System.Collections.Generic;
3using System.IO;
4using System.Linq;
5using System.Runtime.InteropServices;
6using System.Text;
7using System.Threading.Tasks;
8
13namespace MyCaffe.db.stream
14{
21 [StructLayout(LayoutKind.Sequential)]
22 public struct WaveFormatExtensible
23 {
27 public ushort wFormatTag;
31 public ushort nChannels;
35 public uint nSamplesPerSec;
39 public uint nAvgBytesPerSec;
43 public ushort nBlockAlign;
47 public ushort wBitsPerSample;
51 public ushort cbSize;
55 public ushort wValidBitsPerSample;
59 public uint dwChannelMask;
63 public Guid SubFormat;
64 }
65
72 [StructLayout(LayoutKind.Sequential)]
73 public struct WaveFormat
74 {
78 public ushort wFormatTag;
82 public ushort nChannels;
86 public uint nSamplesPerSec;
90 public uint nAvgBytesPerSec;
94 public ushort nBlockAlign;
98 public ushort wBitsPerSample;
99 }
100
104 public class WAVReader : BinaryReader
105 {
106 Stream m_stream;
107 WaveFormat m_format = new WaveFormat();
108 Dictionary<string, List<string>> m_rgInfo = new Dictionary<string, List<string>>();
109 long m_lDataPos;
110 int m_nDataSize;
111 List<double[]> m_rgrgSamples;
112
117 public WAVReader(Stream stream) : base(stream)
118 {
119 m_stream = stream;
120 }
121
126 {
127 get { return m_format; }
128 }
129
133 public List<double[]> Samples
134 {
135 get { return m_rgrgSamples; }
136 }
137
141 public Dictionary<string, List<string>> ExtraInformation
142 {
143 get { return m_rgInfo; }
144 }
145
151 public bool ReadToEnd(bool bReadHeaderOnly = false)
152 {
153 if (!readContent())
154 return false;
155
156 if (bReadHeaderOnly)
157 return true;
158
159 if (!readAudioContent())
160 return false;
161
162 return true;
163 }
164
168 public int SampleCount
169 {
170 get { return m_nDataSize / m_format.nBlockAlign; }
171 }
172
173 private bool readAudioContent()
174 {
175 m_stream.Seek(m_lDataPos, SeekOrigin.Begin);
176
177 int nSamples = m_nDataSize / m_format.nBlockAlign;
178
179 m_rgrgSamples = new List<double[]>();
180 for (int i = 0; i < m_format.nChannels; i++)
181 {
182 m_rgrgSamples.Add(new double[nSamples]);
183 }
184
185 for (int s = 0; s < nSamples; s++)
186 {
187 for (int ch = 0; ch < m_format.nChannels; ch++)
188 {
189 double dfSample;
190
191 switch (m_format.wBitsPerSample)
192 {
193 // 8-bit unsigned
194 case 8:
195 {
196 long v = ReadByte();
197 v = v - 0x80;
198 dfSample = (v / (double)0x80);
199 }
200 break;
201
202 case 16:
203 {
204 int b1 = ReadByte();
205 int b2 = ReadByte();
206 int v = ((0xFFFF * (b2 >> 7)) << 16) | (b2 << 8) | b1;
207 dfSample = (v / (double)0x8000);
208 }
209 break;
210
211 case 24:
212 {
213 int b1 = ReadByte();
214 int b2 = ReadByte();
215 int b3 = ReadByte();
216 int v = ((0xFF * (b3 >> 7)) << 24) | (b3 << 16) | (b2 << 8) | b1;
217 dfSample = (v / (double)0x800000);
218 }
219 break;
220
221 case 32:
222 {
223 int b1 = ReadByte();
224 int b2 = ReadByte();
225 int b3 = ReadByte();
226 int b4 = ReadByte();
227 int v = (b4 << 24) | (b3 << 16) | (b2 << 8) | b1;
228 dfSample = (v / (double)0x80000000);
229 }
230 break;
231
232 default:
233 throw new NotImplementedException("The bits per sample of " + m_format.wBitsPerSample.ToString() + " is not supported.");
234 }
235
236 m_rgrgSamples[ch][s] = (float)dfSample;
237 }
238 }
239
240 return true;
241 }
242
243 private bool readContent()
244 {
245 return readRiff();
246 }
247
248 private bool readRiff()
249 {
250 string strRiff = readID();
251 int nSize = ReadInt32();
252 string strType = readID();
253
254 if (strRiff != "RIFF" || strType != "WAVE")
255 return false;
256
257 bool bEof = readChunk();
258 while (!bEof)
259 {
260 bEof = readChunk();
261 }
262
263 return true;
264 }
265
266 private string readID()
267 {
268 int c1 = 0;
269
270 while (c1 < (int)' ' || c1 > 127)
271 {
272 if (m_stream.Position == m_stream.Length)
273 return null;
274
275 c1 = (int)m_stream.ReadByte();
276 if (c1 == -1)
277 return null;
278 }
279
280 string str = "";
281 str += (char)c1;
282 str += (char)m_stream.ReadByte();
283 str += (char)m_stream.ReadByte();
284 str += (char)m_stream.ReadByte();
285
286 return str;
287 }
288
289 private bool readChunk()
290 {
291 if (m_stream.Position == m_stream.Length)
292 return true;
293
294 if (m_stream.Length < m_stream.Position)
295 return true;
296
297 string strID = readID();
298 if (strID == null)
299 return true;
300
301 int nSize = ReadInt32();
302 long lPos = m_stream.Position;
303
304 if (lPos + nSize > m_stream.Length)
305 nSize = (int)(m_stream.Length - lPos);
306
307 switch (strID)
308 {
309 case "fmt ":
310 readFmt(nSize);
311 break;
312
313 case "LIST":
314 string strType = readID();
315 if (strType == "INFO")
316 readListInfo(nSize);
317 break;
318
319 case "data":
320 m_lDataPos = lPos;
321 m_nDataSize = nSize;
322 return true;
323
324 default:
325 m_stream.Seek(lPos + nSize, SeekOrigin.Begin);
326 break;
327 }
328
329 return false;
330 }
331
332 private void readFmt(int nSize)
333 {
334 int nStructSize = Marshal.SizeOf(m_format);
335
336 if (nSize >= nStructSize)
337 {
338 byte[] rgData = ReadBytes(nStructSize);
339 m_format = ByteArrayToStructure<WaveFormat>(rgData);
340 }
341 }
342
343 private void readListInfo(int nSize)
344 {
345 long lPos = m_stream.Position;
346
347 while (m_stream.Position - lPos < nSize - 4)
348 {
349 string strField = readID();
350 if (strField == null)
351 return;
352
353 int nFieldSize = ReadInt32();
354 if (nFieldSize > 0)
355 {
356 byte[] rgData = ReadBytes(nFieldSize);
357 string strVal = Encoding.UTF8.GetString(rgData).Trim();
358 int nIdx = strVal.IndexOf((char)0);
359 if (nIdx != -1)
360 strVal = strVal.Substring(0, nIdx);
361
362 if (!m_rgInfo.ContainsKey(strField))
363 m_rgInfo.Add(strField, new List<string>());
364
365 m_rgInfo[strField].Add(strVal);
366 }
367 }
368 }
369
376 protected static T ByteArrayToStructure<T>(byte[] bytes) where T : struct
377 {
378 GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned);
379 T stuff = (T)Marshal.PtrToStructure(handle.AddrOfPinnedObject(),
380 typeof(T));
381 handle.Free();
382 return stuff;
383 }
384 }
385}
The WAVReader is an extension of the BinaryReader and is used to read WAV files.
Definition: WAVReader.cs:105
static T ByteArrayToStructure< T >(byte[] bytes)
Converts a byte array to a structure.
Definition: WAVReader.cs:376
Dictionary< string, List< string > > ExtraInformation
Returns the WAV file header information in a key=value format.
Definition: WAVReader.cs:142
int SampleCount
Returns the number of samples.
Definition: WAVReader.cs:169
WaveFormat Format
Returns the WAV file header information.
Definition: WAVReader.cs:126
bool ReadToEnd(bool bReadHeaderOnly=false)
Reads the WAV file data.
Definition: WAVReader.cs:151
List< double[]> Samples
Returns the frequency samples of the WAV file.
Definition: WAVReader.cs:134
WAVReader(Stream stream)
The constructor.
Definition: WAVReader.cs:117
The MyCaffe.db.stream namespace contains all data streaming related classes.
The WaveFormatExtensible structure describes the extended set of information of a WAV file.
Definition: WAVReader.cs:23
ushort wValidBitsPerSample
Specifies the valid bits per sample.
Definition: WAVReader.cs:55
uint nAvgBytesPerSec
Specifies the average byte rate per second (nSamplesPerSec * Channels * BitsPerSample / 8)
Definition: WAVReader.cs:39
ushort cbSize
Specifies the extera parameter size.
Definition: WAVReader.cs:51
uint dwChannelMask
Specifies the channel mask.
Definition: WAVReader.cs:59
ushort wBitsPerSample
Specifies the number of bits per sample (8, 16, 32, etc.)
Definition: WAVReader.cs:47
ushort nBlockAlign
Specifies the block alignment (Channels * BitsPerSample / 8)
Definition: WAVReader.cs:43
ushort wFormatTag
Specifies the AudioFormat where PCM = 1 for Linear quantization.
Definition: WAVReader.cs:27
Guid SubFormat
Specifies the sub format GUID.
Definition: WAVReader.cs:63
ushort nChannels
Specifies the number of channels in the data where Mono = 1 and Stero = 2.
Definition: WAVReader.cs:31
uint nSamplesPerSec
Specifies the sample rate (e.g. 8000, 44100, etc.)
Definition: WAVReader.cs:35
The WaveFormat structure describes the header information of a WAV file.
Definition: WAVReader.cs:74
ushort nBlockAlign
Specifies the block alignment (Channels * BitsPerSample / 8)
Definition: WAVReader.cs:94
uint nAvgBytesPerSec
Specifies the average byte rate per second (nSamplesPerSec * Channels * BitsPerSample / 8)
Definition: WAVReader.cs:90
uint nSamplesPerSec
Specifies the sample rate (e.g. 8000, 44100, etc.)
Definition: WAVReader.cs:86
ushort nChannels
Specifies the number of channels in the data where Mono = 1 and Stero = 2.
Definition: WAVReader.cs:82
ushort wBitsPerSample
Specifies the number of bits per sample (8, 16, 32, etc.)
Definition: WAVReader.cs:98
ushort wFormatTag
Specifies the AudioFormat where PCM = 1 for Linear quantization.
Definition: WAVReader.cs:78