MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
SyncedMemory.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using MyCaffe.basecode;
6
7namespace MyCaffe.common
8{
17 public class SyncedMemory<T> : IDisposable
18 {
19 Log m_log;
20 CudaDnn<T> m_cuda;
21 int m_nDeviceID = -1;
22 long m_lCapacity = 0;
23 long m_lCount = 0;
24 long m_hGpuData = 0;
25 T[] m_rgCpuData = null;
26 bool m_bOwnData = true;
27 bool m_bHalfSize = false;
28 object m_tag = null;
29
38 public SyncedMemory(CudaDnn<T> cuda, Log log, long lCapacity = 0, object tag = null, bool bUseHalfSize = false)
39 {
40 if (bUseHalfSize && typeof(T) != typeof(float))
41 {
42 bUseHalfSize = false;
43
44 if (log != null)
45 log.WriteLine("Half size disabled for non 'float' basetypes!");
46 }
47
48 m_bHalfSize = bUseHalfSize;
49 m_cuda = cuda;
50 m_log = log;
51 m_tag = tag;
52
53 if (lCapacity > 0)
54 {
55 m_nDeviceID = m_cuda.GetDeviceID();
56 m_hGpuData = m_cuda.AllocMemory(lCapacity);
57 m_lCapacity = lCapacity;
58 m_lCount = lCapacity;
59 }
60 }
61
62 private void free()
63 {
64 if (m_hGpuData != 0)
65 {
66 check_device();
67 if (m_bOwnData)
68 m_cuda.FreeMemory(m_hGpuData);
69 else
70 m_cuda.FreeMemoryPointer(m_hGpuData);
71 }
72 }
73
77 public void Dispose()
78 {
79 free();
80 m_hGpuData = 0;
81 m_nDeviceID = -1;
82 m_lCapacity = 0;
83 m_lCount = 0;
84 m_rgCpuData = null;
85 }
86
92 public void Allocate(long lCount, bool bUseHalfSize = false)
93 {
94 if (bUseHalfSize && typeof(T) != typeof(float))
95 {
96 bUseHalfSize = false;
97
98 if (m_log != null)
99 m_log.WriteLine("Half size disabled for non 'float' basetypes!");
100 }
101
102 free();
103 m_nDeviceID = m_cuda.GetDeviceID();
104 m_bHalfSize = bUseHalfSize;
105 m_hGpuData = m_cuda.AllocMemory(lCount, m_bHalfSize);
106 m_lCapacity = lCount;
107 m_lCount = 0;
108 m_bOwnData = true;
109 return;
110 }
111
117 public void Allocate(T[] rg, bool bUseHalfSize = false)
118 {
119 if (bUseHalfSize && typeof(T) != typeof(float))
120 {
121 bUseHalfSize = false;
122
123 if (m_log != null)
124 m_log.WriteLine("Half size disabled for non 'float' basetypes!");
125 }
126
127 free();
128 m_nDeviceID = m_cuda.GetDeviceID();
129 m_bHalfSize = bUseHalfSize;
130 m_hGpuData = m_cuda.AllocMemory(rg, 0, m_bHalfSize);
131 m_lCapacity = rg.Length;
132 m_lCount = rg.Length;
133 m_bOwnData = true;
134 check_device();
135
136 return;
137 }
138
142 public void Zero()
143 {
144 if (m_lCount > 0)
145 {
146 check_device();
147 m_cuda.set((int)m_lCount, m_hGpuData, 0.0);
148 }
149 }
150
154 public void ZeroAll()
155 {
156 if (m_lCapacity > 0)
157 {
158 check_device();
159 m_cuda.set((int)m_lCapacity, m_hGpuData, 0.0);
160 }
161 }
162
167 public void Set(double dfVal)
168 {
169 if (m_lCount > 0)
170 {
171 check_device();
172 m_cuda.set((int)m_lCount, m_hGpuData, dfVal);
173 }
174 }
175
181 public void SetAt(int nIdx, T fVal)
182 {
183 check_device();
184 m_cuda.set((int)m_lCount, m_hGpuData, fVal, nIdx);
185 }
186
192 public T GetAt(int nIdx)
193 {
194 check_device();
195 T[] rg = m_cuda.get((int)m_lCount, m_hGpuData, nIdx);
196 return rg[0];
197 }
198
205 public long Copy(SyncedMemory<T> src, long hDstHostBuffer = 0)
206 {
207 if (src == null)
208 {
209 m_lCount = 0;
210 return hDstHostBuffer;
211 }
212
213 if (m_lCapacity < src.m_lCount)
214 Allocate(src.m_lCount);
215
216 m_lCount = src.m_lCount;
217
218 if (m_lCount > 0)
219 {
220 if (m_cuda.KernelHandle == src.m_cuda.KernelHandle)
221 {
222 check_device();
223 m_cuda.copy((int)m_lCount, src.m_hGpuData, m_hGpuData);
224 }
225 else
226 {
227 if (hDstHostBuffer == 0)
228 {
229 hDstHostBuffer = m_cuda.AllocHostBuffer(m_lCount);
230 }
231 else
232 {
233 long lCount = m_cuda.GetHostBufferCapacity(hDstHostBuffer);
234 if (lCount < m_lCount)
235 {
236 m_cuda.FreeHostBuffer(hDstHostBuffer);
237 hDstHostBuffer = m_cuda.AllocHostBuffer(m_lCount);
238 }
239 }
240
241 src.m_cuda.KernelCopy((int)m_lCount, src.m_hGpuData, 0, m_cuda.KernelHandle, m_hGpuData, 0, hDstHostBuffer, m_cuda.KernelHandle);
242 }
243 }
244
245 return hDstHostBuffer;
246 }
247
253 {
254 SyncedMemory<T> dst = new SyncedMemory<T>(m_cuda, m_log, m_lCapacity, null, m_bHalfSize);
255
256 if (m_lCount > 0)
257 dst.Copy(this);
258
259 return dst;
260 }
261
265 public bool HalfSize
266 {
267 get { return m_bHalfSize; }
268 }
269
273 public object Tag
274 {
275 get { return m_tag; }
276 set { m_tag = value; }
277 }
278
282 public int DeviceID
283 {
284 get { return m_nDeviceID; }
285 }
286
290 public long Capacity
291 {
292 get { return m_lCapacity; }
293 }
294
298 public long Count
299 {
300 get { return m_lCount; }
301 set { m_lCount= value; }
302 }
303
307 public long gpu_data
308 {
309 get { return m_hGpuData; }
310 }
311
319 public void set_gpu_data(long hData, long lCount, long lOffset)
320 {
321 free();
322 m_hGpuData = m_cuda.CreateMemoryPointer(hData, lOffset, lCount);
323 m_lCapacity = lCount;
324 m_lCount = lCount;
325 m_bOwnData = false;
326 check_device();
327 }
328
336 {
337 get
338 {
339 check_device();
340 return m_hGpuData;
341 }
342// set { m_hGpuData = value; }
343 }
344
348 public T[] cpu_data
349 {
350 get { return m_rgCpuData; }
351 }
352
359 public void SetData(T[] rgData, int nCount, bool bSetCount = true)
360 {
361 if (nCount == -1)
362 nCount = rgData.Length;
363
364 if (nCount > m_lCapacity || m_hGpuData == 0)
365 {
366 bSetCount = true;
367 Allocate(nCount);
368 }
369
370 m_cuda.SetMemory(m_hGpuData, rgData, 0, nCount);
371
372 if (bSetCount)
373 m_lCount = nCount;
374
375 check_device();
376 }
377
386 {
387 get { return update_cpu_data(); }
388 set
389 {
390 check_device();
391 if (value.Length > m_lCapacity || m_hGpuData == 0)
392 {
393 Allocate(value);
394 }
395 else
396 {
397 m_cuda.SetMemory(m_hGpuData, value);
398 m_lCount = value.Length;
399 check_device();
400 }
401 }
402 }
403
409 public T[] update_cpu_data(long lCount = -1)
410 {
411 if (lCount >= 0)
412 {
413 if (lCount > m_lCapacity)
414 throw new ArgumentOutOfRangeException();
415
416 m_lCount = lCount;
417 }
418
419 check_device();
420
421 if (m_lCount == 0)
422 m_rgCpuData = new List<T>().ToArray();
423 else
424 m_rgCpuData = m_cuda.GetMemory(m_hGpuData, m_lCount);
425
426 return m_rgCpuData;
427 }
428
433 public void set_cpu_data_locally(T[] rg)
434 {
435 m_rgCpuData = rg;
436 }
437
446 public void async_gpu_push(long hStream, T[] rg)
447 {
448 check_device();
449 if (m_hGpuData == 0)
450 {
451 m_hGpuData = m_cuda.AllocMemory(rg, hStream);
452 m_lCapacity = rg.Length;
453 }
454 else
455 {
456 m_cuda.SetMemory(m_hGpuData, rg, hStream);
457 }
458
459 m_lCount = rg.Length;
460 check_device();
461 }
462
463 private void check_device()
464 {
465#if DEBUG
466 if (m_lCount > 0)
467 {
468 int nDeviceId = m_cuda.GetDeviceID();
469 m_log.CHECK_EQ(nDeviceId, m_nDeviceID, "The current device DOES'NT match the device for which the memory was allocated!");
470 }
471#endif
472 }
473 }
474}
The Log class provides general output in text form.
Definition: Log.cs:13
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
T[] GetMemory(long hMem, long lCount=-1)
Retrieves the GPU memory as an array of type 'T'
Definition: CudaDnn.cs:2700
int GetDeviceID()
Returns the current device id set within Cuda.
Definition: CudaDnn.cs:2013
void copy(int nCount, long hSrc, long hDst, int nSrcOffset=0, int nDstOffset=0, long hStream=-1, bool? bSrcHalfSizeOverride=null, bool? bDstHalfSizeOverride=null)
Copy data from one block of GPU memory to another.
Definition: CudaDnn.cs:6007
void FreeMemory(long hMem)
Free previously allocated GPU memory.
Definition: CudaDnn.cs:2517
long CreateMemoryPointer(long hData, long lOffset, long lCount)
Creates a memory pointer into an already existing block of GPU memory.
Definition: CudaDnn.cs:3028
void SetMemory(long hMem, List< double > rg)
Copies a list of doubles into a block of already allocated GPU memory.
Definition: CudaDnn.cs:2734
long AllocMemory(List< double > rg)
Allocate a block of GPU memory and copy a list of doubles to it.
Definition: CudaDnn.cs:2291
long AllocHostBuffer(long lCapacity)
Allocate a block of host memory with a specified capacity.
Definition: CudaDnn.cs:2581
long GetHostBufferCapacity(long hMem)
Returns the host memory capacity.
Definition: CudaDnn.cs:2621
void FreeHostBuffer(long hMem)
Free previously allocated host memory.
Definition: CudaDnn.cs:2602
void set(int nCount, long hHandle, double fVal, int nIdx=-1)
Set the values of GPU memory to a specified value of type
Definition: CudaDnn.cs:5897
long KernelHandle
Returns the Low-Level kernel handle used for this instance. Each Low-Level kernel maintains its own s...
Definition: CudaDnn.cs:1812
void FreeMemoryPointer(long hData)
Frees a memory pointer.
Definition: CudaDnn.cs:3046
T[] get(int nCount, long hHandle, int nIdx=-1)
Queries the GPU memory by copying it into an array of type 'T'.
Definition: CudaDnn.cs:5985
The SyncedMemory manages the low-level connection between the GPU and host memory.
Definition: SyncedMemory.cs:18
bool HalfSize
Returns whether or not the sync memory is half-sized memory.
void Allocate(T[] rg, bool bUseHalfSize=false)
Allocate a number of items and copy the given array into the memory on the GPU.
void Zero()
Set all items in the GPU memory up to the Count, to zero.
void Set(double dfVal)
Set all items up to Count to a given value.
void async_gpu_push(long hStream, T[] rg)
Pushes the host data, previously set with set_cpu_data_locally(), to the GPU.
long gpu_data
Returns the handle to the GPU memory.
object Tag
Get/set data associated with the synced memory.
void set_gpu_data(long hData, long lCount, long lOffset)
Copies a new Memory Pointer within the low-level CudaDnnDLL where a Memory Pointer uses another alrea...
T[] mutable_cpu_data
Get/set the mutable host data.
long Copy(SyncedMemory< T > src, long hDstHostBuffer=0)
Copy another SyncedMemory into this one.
void Dispose()
Releases all GPU and host resources used.
Definition: SyncedMemory.cs:77
SyncedMemory(CudaDnn< T > cuda, Log log, long lCapacity=0, object tag=null, bool bUseHalfSize=false)
The SyncedMemory constructor.
Definition: SyncedMemory.cs:38
void SetData(T[] rgData, int nCount, bool bSetCount=true)
Sets the array of host data on the GPU and re-allocates the GPU memory if needed.
void Allocate(long lCount, bool bUseHalfSize=false)
Allocate a number of items in GPU memory and save the handle.
Definition: SyncedMemory.cs:92
long Capacity
Returns the total amount of GPU memory held by this SyncedMemory.
SyncedMemory< T > Clone()
Copy this SyncedMemory.
T GetAt(int nIdx)
Return a value at a given index.
long mutable_gpu_data
Returns the mutable handle to GPU data.
T[] update_cpu_data(long lCount=-1)
Updates the host data by copying the GPU data to the host data.
T[] cpu_data
Returns the data on the CPU that has already been transferred from GPU to CPU.
void SetAt(int nIdx, T fVal)
Set a specific item at a given index to a value.
void ZeroAll()
Set all items in the GPU memory up to the Capacity, to zero.
void set_cpu_data_locally(T[] rg)
This does not place the data on the GPU - call async_gpu_push() to move it to the GPU.
int DeviceID
Returns the Device ID on which the GPU memory of this SyncedMemory was allocated.
long Count
Returns the current count of items in this SyncedMemory. Note, the Count may be less than the Capacit...
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12