2using System.Collections.Generic;
 
    7using System.Threading.Tasks;
 
   49            m_lCount = total_size(root_solver.
net.learnable_parameters);
 
   85            for (
int i = 0; i < rgParam.
Count; i++)
 
   87                nSize += (long)rgParam[i].
count();               
 
  153                m_cuda.SetDeviceID(nDeviceID);
 
  225            for (
int i = 0; i < rgBlobs.
Count; i++)
 
  227                int nCount = rgBlobs[i].count();
 
  233                        m_cuda.copy(nCount, rgBlobs[i].
data.gpu_data, hBuffer, 0, (
int)lOffset);
 
  237                        rgBlobs[i].data.set_gpu_data(hBuffer, nCount, lOffset);
 
  240                    case Op.replace_gpu_diff:
 
  241                        if (rgBlobs[i].DiffExists)
 
  242                            rgBlobs[i].diff.set_gpu_data(hBuffer, nCount, lOffset);
 
  251            m_log.
CHECK_EQ(lTotalSize - 
m_lExtra, (lOffset == 0) ? 1 : lOffset, 
"The total memory doesn't match.");
 
  270        ManualResetEvent m_evtGradientsReady = 
new ManualResetEvent(
false);
 
  271        List<ManualResetEvent> m_rgGradientReady = 
new List<ManualResetEvent>();
 
  283            : base(cuda, log, root_solver, nDeviceID)
 
  285            m_rgGradientReady = rgGradientReadyEvents;
 
  286            if (rgGradientReadyEvents != 
null && rgGradientReadyEvents.Count > 0)
 
  287                m_evtGradientsReady = rgGradientReadyEvents[root_solver.
solver_rank];
 
  289            m_solver = root_solver;
 
  323                m_cuda.SynchronizeStream();
 
  324                m_evtGradientsReady.Set();
 
  326                while (!WaitHandle.WaitAll(m_rgGradientReady.ToArray(), 250))
 
  338                m_evtGradientsReady.Reset();
 
  351        public void Run(List<int> rgGpus, 
int nIterationOverride = -1)
 
  353            List<long> rghNccl = 
new List<long>();
 
  354            Guid guid = Guid.NewGuid();
 
  356            m_rgGradientReady = 
new List<ManualResetEvent>();
 
  358            for (
int i = 0; i < rgGpus.Count; i++)
 
  362                m_rgGradientReady.Add(
new ManualResetEvent(
false));
 
  365            m_cuda.NcclInitializeSingleProcess(rghNccl.ToArray());
 
  366            m_hNccl = rghNccl[0];
 
  367            m_evtGradientsReady = m_rgGradientReady[0];
 
  369            List<WaitHandle> rgWaitAllInit = 
new List<WaitHandle>();
 
  370            List<Worker<T>> rgWorkers = 
new List<common.Worker<T>>();
 
  371            ManualResetEvent evtAllCreated = 
new ManualResetEvent(
false);
 
  373            for (
int i = 1; i < rghNccl.Count; i++)
 
  380                List<WaitHandle> rgWait = 
new List<WaitHandle>();
 
  381                rgWait.AddRange(m_solver.CancelEvent.Handles);
 
  385                int nWait = WaitHandle.WaitAny(rgWait.ToArray());
 
  386                if (nWait < rgWait.Count - 2)
 
  389                if (nWait == rgWait.Count - 2)
 
  391                    if (info.
Error != 
null)
 
  394                        throw new Exception(
"Error starting the solver.");
 
  398                rgWorkers.Add(worker);
 
  402            while (!WaitHandle.WaitAll(rgWaitAllInit.ToArray(), 250))
 
  408            m_cuda.SynchronizeDevice();
 
  414            m_solver.
Solve(nIterationOverride);
 
  417            for (
int i = 0; i < rgWorkers.Count; i++)
 
  419                rgWorkers[i].StopInternalThread();
 
  437            this.
DoWork += Worker_DoWork;
 
  450                Log log = 
new Log(
"Worker solver for DeviceID = " + e.
DeviceID.ToString());
 
  469                log.
CHECK_EQ((
int)solver.
type, (
int)rank0.
type, 
"The solver types should be the same.");
 
  480                m_cuda.SynchronizeDevice();
 
  482                List<WaitHandle> rgWait = 
new List<WaitHandle>();
 
  486                int nWait = WaitHandle.WaitAny(rgWait.ToArray());
 
  487                if (nWait < rgWait.Count - 1)
 
  496                solver.
Step(nIterations);
 
  499            catch (Exception excpt)
 
  521        string m_strCudaPath;
 
  526        int m_nIterationOverride;
 
  527        ManualResetEvent m_evtInitialized = 
new ManualResetEvent(
false);
 
  528        ManualResetEvent m_evtStarted = 
new ManualResetEvent(
false);
 
  529        ManualResetEvent m_evtAllCreated = 
new ManualResetEvent(
false);
 
  530        AutoResetEvent m_evtError = 
new AutoResetEvent(
false);
 
  531        List<ManualResetEvent> m_rgGradientReadyEvents = 
null;
 
  532        Exception m_error = 
null;
 
  545        public SolverInfo(
Solver<T> rank0, 
long hSrcKernel, 
long hSrcNccl, 
int nSolverRank, 
int nIterationOverride, 
string strCudaPath, List<ManualResetEvent> rgGradientReadyEvents, ManualResetEvent evtAllCreated)
 
  547            m_strCudaPath = strCudaPath;
 
  549            m_hSrcKernel = hSrcKernel;
 
  550            m_hSrcNccl = hSrcNccl;
 
  551            m_nSolverRank = nSolverRank;
 
  552            m_nIterationOverride = nIterationOverride;
 
  553            m_rgGradientReadyEvents = rgGradientReadyEvents;
 
  554            m_evtAllCreated = evtAllCreated;
 
  562            get { 
return m_rank0; }
 
  570            get { 
return m_strCudaPath; }
 
  578            get { 
return m_nIterationOverride; }
 
  586            get { 
return m_hSrcKernel; }
 
  594            get { 
return m_hSrcNccl; }
 
  602            get { 
return m_nSolverRank; }
 
  610            get { 
return m_evtInitialized; }
 
  618            get { 
return m_evtStarted; }
 
  626            get { 
return m_evtAllCreated; }
 
  634            get { 
return m_rgGradientReadyEvents; }
 
  642            get { 
return m_error; }
 
  643            set { m_error = value; }
 
  651            get { 
return m_evtError; }
 
WaitHandle[] Handles
Returns the internal wait handle of the CancelEvent.
 
bool WaitOne(int nMs=int.MaxValue)
Waits for the signal state to occur.
 
The Log class provides general output in text form.
 
bool Enable
Enables/disables the Log. When disabled, the Log does not output any data.
 
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
 
The ActionStateArgs are sent to the DoWork event when fired from the InternalThreadEntry.
 
object Arg
Returns the user supplied argument.
 
int DeviceID
Returns the Device ID of the device to use in the thread.
 
The BlobCollection contains a list of Blobs.
 
int Count
Returns the number of items in the collection.
 
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
 
The GPUParams contains the connection to the low-level Cuda, and the stream associated with this inst...
 
long m_hStream
The handle to the Cuda stream used for synchronization.
 
void Configure(Solver< T > solver)
Configure the GPU Params by copying the Solver training Net parameters into the data and diff buffers...
 
GPUParams(CudaDnn< T > cuda, Log log, Solver< T > root_solver, int nDeviceID)
The GPUParams constructor.
 
void apply_buffers(BlobCollection< T > rgBlobs, long hBuffer, long lTotalSize, Op op)
Transfer between the data/diff buffers and a collection of Blobs (e.g. the learnable parameters).
 
Log m_log
The Log used for output.
 
void SynchronizeStream()
Synchronize with the Cuda stream.
 
void Dispose()
Release all GPU and Host resources used.
 
CudaDnn< T > m_cuda
The instance of CudaDnn that provides the connection to Cuda.
 
The GradientsReadyArgs is sent to the Solver::OnGradientsReady event which fires at the end of each S...
 
The InternalThread manages an internal thread used for Parallel and data collection operations.
 
void StartInternalThread(CudaDnn< T > cuda, Log log, int nDeviceID=0, object arg=null, int nInitialDelay=0)
Starts running the internal thread function which then calls the DoWork event.
 
EventHandler< ActionStateArgs< T > > DoWork
The DoWork event is the working thread function.
 
The NCCL class manages the multi-GPU operations using the low-level NCCL functionality provided by th...
 
new void Dispose()
Release all GPU and Host resources used.
 
NCCL(CudaDnn< T > cuda, Log log, Solver< T > root_solver, int nDeviceID, long hNccl, List< ManualResetEvent > rgGradientReadyEvents)
The NCCL constructor.
 
void Run(List< int > rgGpus, int nIterationOverride=-1)
Run the root Solver and coordinate with all other Solver's participating in the multi-GPU training.
 
void Broadcast()
Broadcast the data to all other solvers participating in the multi-GPU session.
 
The Params contains the base parameters used in multi-GPU training.
 
long data
Returns the handle to the GPU memory containing the Net parameters.
 
Params(Solver< T > root_solver)
The Param constructor.
 
long m_lExtra
size of the padding added to the memory buffers.
 
long count
Returns the size of the buffers (in items).
 
long m_lCount
size of the buffers (in items).
 
long diff
Returns the handle to the GPU memory containing the Net gradients.
 
long m_hDiff
Handle to GPU memory containing the Net gradient.
 
long m_hData
Handle to GPU memory containing the Net parameters.
 
int m_nDeviceID
The Device ID.
 
The SolverInfo defines the user supplied arguments passed to each Worker.
 
int IterationOverride
Returns the training iteration override to use.
 
AutoResetEvent ErrorEvent
Returns the event that is set when an error occurs.
 
Exception Error
Returns the error (if any) that occured when running the solver thread.
 
SolverInfo(Solver< T > rank0, long hSrcKernel, long hSrcNccl, int nSolverRank, int nIterationOverride, string strCudaPath, List< ManualResetEvent > rgGradientReadyEvents, ManualResetEvent evtAllCreated)
The SolverInfo constructor.
 
string CudaPath
Returns the file path to the low-level CudaDnnDll.DLL file to use. Note, when null or emtpy,...
 
ManualResetEvent StartedEvent
Returns the event that is set after the Worker has started running.
 
long KernelHandle
Returns a handle to the kernel where the NCCL for this Solver was created (typically this is the kern...
 
Solver< T > Rank0
Returns rank Solver that will run in the Worker.
 
List< ManualResetEvent > GradientReadyEvents
Returns the event that is set after the gradients of the Solver in this Worker are ready.
 
long NcclHandle
Returns the handle to the NCCL instance for this Solver (typically this is created on the kernel that...
 
ManualResetEvent AllCreatedEvent
Returns the event that is set after all Workers have been created.
 
ManualResetEvent InitializedEvent
Returns the event that is set after the Worker has completed initializing.
 
int SolverRank
Returns the rank of this Solver.
 
The Worker manages each 'non' root sover running, where each Worker operates on a different GPU.
 
Worker()
The Worker constructor.
 
The SolverParameter is a parameter for the solver, specifying the train and test networks.
 
int max_iter
The maximum number of iterations.
 
SolverParameter Clone()
Creates a new copy of the SolverParameter.
 
int device_id
The device id that will be used when run on the GPU.
 
SolverType type
Specifies the solver type.
 
An interface for classes that perform optimization on Nets - this class serves as the base class for ...
 
void Dispose()
Discards the resources (GPU and Host) used by this Solver.
 
static SGDSolver< T > Create(CudaDnn< T > cuda, Log log, ProjectEx p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXDatabaseBase db, IXPersist< T > persist, int nSolverCount=1, int nSolverRank=0, Net< T > shareNet=null, onGetWorkspace getws=null, onSetWorkspace setws=null)
Create a new Solver based on the project containing the SolverParameter.
 
int iter
Returns the current training iteration.
 
SolverParameter.SolverType type
Returns the type of solver.
 
Net< T > net
Returns the main training Net.
 
int solver_count
Returns the solver count in a multi-GPU session.
 
CancelEvent CancelEvent
Returns the cancel event which when set cancels the current operation run by the Solver.
 
SolverParameter parameter
Returns the SolverParameter used.
 
bool Step(int nIters, TRAIN_STEP step=TRAIN_STEP.NONE, bool bZeroDiffs=true, bool bApplyUpdates=true, bool bDisableOutput=false, bool bDisableProgress=false, double? dfLossOverride=null, bool? bAllowSnapshot=null)
Steps a set of iterations through a training cycle.
 
EventHandler< GradientsReadyArgs > OnGradientsReady
The OnGradientsReady event fires after the gradients of a Solver are ready for distribution to other ...
 
int solver_rank
Returns this Solver's rank in a multi-GPU session.
 
IXDatabaseBase Database
Returns the in-memory MyCaffeDatabase used.
 
virtual void Solve(int nIterationOverride=-1, byte[] rgWeights=null, byte[] rgState=null, TRAIN_STEP step=TRAIN_STEP.NONE)
The main entry of the solver function. In default, iter will be zero. Pass in a non-zero iter number ...
 
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
 
The MyCaffe.common namespace contains common MyCaffe classes.
 
DEVINIT
Specifies the initialization flags used when initializing CUDA.
 
NCCL_REDUCTION_OP
Specifies the reduction operation to use with 'Nickel' NCCL.
 
The MyCaffe.param namespace contains parameters used to create models.
 
The MyCaffe.solvers namespace contains all solver classes, including the base Solver.
 
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...