mycaffe/html/_s_g_d_solver_8cs_source.html

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using System.Threading;

using System.IO;

using MyCaffe.basecode;

using MyCaffe.db.image;

using MyCaffe.common;

using MyCaffe.param;


namespace MyCaffe.solvers

{

    public class SGDSolver<T> : Solver<T>

    {

        protected BlobCollection<T> m_colHistory = new BlobCollection<T>();


        //  protected BlobCollection<T> m_colUpdate = new BlobCollection<T>();  // not used in GPU version


        protected BlobCollection<T> m_colTemp = new BlobCollection<T>();


        public SGDSolver(CudaDnn<T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXDatabaseBase db, IXPersist<T> persist, int nSolverCount = 1, int nSolverRank = 0, Net<T> shareNet = null, onGetWorkspace getws = null, onSetWorkspace setws = null)

            : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, db, persist, nSolverCount, nSolverRank, shareNet, getws, setws)

        {

            PreSolve();

        }


        protected override void dispose()

        {

            if (m_colHistory != null)

            {

                m_colHistory.Dispose();

                m_colHistory = null;

            }


            if (m_colTemp != null)

            {

                m_colTemp.Dispose();

                m_colTemp = null;

            }


            base.dispose();

        }


        public BlobCollection<T> history

        {

            get { return m_colHistory; }

        }


        public void PreSolve()

        {

            BlobCollection<T> colNetParams = m_net.learnable_parameters;

            m_colHistory.Clear(true);

//            m_colUpdate.Clear(true);

            m_colTemp.Clear(true);


            for (int i = 0; i < colNetParams.Count; i++)

            {

                List<int> rgShape = colNetParams[i].shape();


                m_colHistory.Add(new Blob<T>(m_cuda, m_log, rgShape, false));   // diff never used

//                m_colUpdate.Add(new Blob<T>(m_cuda, m_log, rgShape, false));

                m_colTemp.Add(new Blob<T>(m_cuda, m_log, rgShape, false));      // diff never used

            }

        }


        public double GetLearningRate(int nIterationOverride = -1)

        {

            double dfRate = 0;


            if (nIterationOverride == -1)

                nIterationOverride = m_nIter;


            switch (m_param.lr_policy)

            {

                case "fixed":

                    dfRate = m_param.base_lr;

                    break;


                case "step":

                    m_log.CHECK_GT(m_param.stepsize, 0, "The stepsize must be greater than 0.");

                    m_nCurrentStep = nIterationOverride / m_param.stepsize;

                    m_log.CHECK_GE(m_param.gamma, 0, "The gamma must be greater than or equal to 0.");

                    dfRate = m_param.base_lr * Math.Pow(m_param.gamma, m_nCurrentStep);

                    break;


                case "exp":

                    m_log.CHECK_GE(m_param.gamma, 0, "The gamma must be greater than or equal to 0.");

                    dfRate = m_param.base_lr * Math.Pow(m_param.gamma, nIterationOverride);

                    break;


                case "inv":

                    m_log.CHECK_GE(m_param.gamma, 0, "The gamma must be greater than or equal to 0.");

                    dfRate = m_param.base_lr * Math.Pow(1.0 + m_param.gamma * nIterationOverride, -1.0 * m_param.power);

                    break;


                case "multistep":

                    if (m_nCurrentStep < m_param.stepvalue.Count && nIterationOverride >= m_param.stepvalue[m_nCurrentStep])

                    {

                        m_nCurrentStep++;

                        m_log.WriteLine("MultiStep Status: Iteration " + nIterationOverride.ToString() + ", step = " + m_nCurrentStep.ToString());

                    }

                    m_log.CHECK_GE(m_param.gamma, 0, "The gamma must be greater than or equal to 0.");

                    dfRate = m_param.base_lr * Math.Pow(m_param.gamma, m_nCurrentStep);

                    break;


                case "poly":

                    dfRate = m_param.base_lr * Math.Pow(1.0 - ((double)nIterationOverride / (double)m_param.max_iter), m_param.power);

                    break;


                case "sigmoid":

                    m_log.CHECK_GE(m_param.gamma, 0, "The gamma must be greater than or equal to 0.");

                    m_log.CHECK_GT(m_param.stepsize, 0, "The stepsize must be greater than 0.");

                    dfRate = m_param.base_lr * (1.0 / (1.0 + Math.Exp(-1.0 * m_param.gamma * nIterationOverride - m_param.stepsize)));

                    break;


                default:

                    m_log.FAIL("Unknown learning rate policy: " + m_param.lr_policy);

                    break;

            }


            return dfRate;

        }


        public override double ApplyUpdate(int nIterationOverride = -1)

        {

            double dfRate = GetLearningRate(nIterationOverride);


            if (LearningRateOverride > 0)

                dfRate = LearningRateOverride;


            if (m_param.display > 0 && (m_nIter % m_param.display) == 0)

            {

                string strOut = "Iteration " + m_nIter.ToString() + ", lr = " + dfRate.ToString() + ", Loss = " + m_dfSmoothedLoss.ToString();

                if (m_dfIterAccuracy.HasValue)

                    strOut += ", Iter Accuracy = " + m_dfIterAccuracy.Value.ToString() + " (" + m_dfIterAccuracy.Value.ToString("P3") + ")";


                m_log.WriteLine(strOut);

            }


            ClipGradients();


            for (int i = 0; i < m_net.learnable_parameters.Count; i++)

            {

                Normalize(i);

                Regularize(i);

                ComputeUpdateValue(i, dfRate, nIterationOverride);

            }


            m_net.Update();


            // Increment the internal iter_ counter -- its value should always indicate

            // the number of times the weights have been updated.

            m_nIter++;


            return dfRate;

        }


        protected override void RestoreSolverState(byte[] rgState)

        {

            SolverState state = m_persist.LoadSolverState(rgState);


            m_nIter = state.iter;

            m_nCurrentStep = state.current_step;


            m_log.CHECK_EQ(state.history.Count, m_colHistory.Count, "Incorrect length of state history blobs.");

            m_log.WriteLine("SGDSolver: restoring state history.");


            for (int i = 0; i < m_colHistory.Count; i++)

            {

                m_colHistory[i].FromProto(state.history[i]);

            }

        }


        protected override byte[] SnapshotSolverState()

        {

            SolverState state = new SolverState();

            state.iter = m_nIter;

            state.current_step = m_nCurrentStep;


            foreach (Blob<T> blob in m_colHistory)

            {

                state.history.Add(blob.ToProto());

            }


            return m_persist.SaveSolverState(state);

        }


        public virtual void Normalize(int param_id)

        {

            if (m_param.iter_size == 1)

                return;


            // Scale gradient to counterbalance accumulation.

            BlobCollection<T> colNetParams = m_net.learnable_parameters;


            if (!colNetParams[param_id].DiffExists)

                return;


            double dfAccumNormalization = 1.0 / m_param.iter_size;

            m_cuda.scal(colNetParams[param_id].count(), dfAccumNormalization, colNetParams[param_id].mutable_gpu_diff);

        }


        public virtual void Regularize(int param_id)

        {

            BlobCollection<T> colNetParams = m_net.learnable_parameters;


            if (!colNetParams[param_id].DiffExists)

                return;


            List<double?> rgNetParamWeightDecay = m_net.params_weight_decay;

            double dfWeightDecay = m_param.weight_decay;

            double dfLocalDecay = dfWeightDecay * rgNetParamWeightDecay[param_id].GetValueOrDefault(0);


            if (dfLocalDecay > 0)

            {

                switch (m_param.regularization_type)

                {

                    case "L2":

                        // add weight decay

                        m_cuda.axpy(colNetParams[param_id].count(), dfLocalDecay, colNetParams[param_id].gpu_data, colNetParams[param_id].mutable_gpu_diff);

                        break;


                    case "L1":

                        m_cuda.sign(colNetParams[param_id].count(), colNetParams[param_id].gpu_data, m_colTemp[param_id].mutable_gpu_data);

                        m_cuda.axpy(colNetParams[param_id].count(), dfLocalDecay, m_colTemp[param_id].gpu_data, colNetParams[param_id].mutable_gpu_diff);

                        break;

                }

            }

        }


        public virtual void ComputeUpdateValue(int param_id, double dfRate, int nIterationOverride = -1)

        {

            BlobCollection<T> colNetParams = m_net.learnable_parameters;


            if (!colNetParams[param_id].DiffExists)

                return;


            List<double?> net_params_lr = m_net.params_lr;

            T fMomentum = Utility.ConvertVal<T>(m_param.momentum);

            T fLocalRate = Utility.ConvertVal<T>(dfRate * net_params_lr[param_id].GetValueOrDefault(0));


            // Compute the update to history, then copy it to the parameter diff.

            if (m_colHistory != null)

                m_cuda.sgd_update(colNetParams[param_id].count(), colNetParams[param_id].mutable_gpu_diff, m_colHistory[param_id].mutable_gpu_data, fMomentum, fLocalRate);

        }


        public virtual void ClipGradients()

        {

            double dfClipGradients = m_param.clip_gradients;


            if (dfClipGradients < 0)

                return;


            BlobCollection<T> colNetParams = m_net.learnable_parameters;

            double dfSumsqDiff = 0;


            for (int i = 0; i < colNetParams.Count; i++)

            {

                if (colNetParams[i].DiffExists)

                    dfSumsqDiff += Utility.ConvertVal<T>(colNetParams[i].sumsq_diff());

            }


            double dfL2NormDiff = Math.Sqrt(dfSumsqDiff);


            if (dfL2NormDiff > dfClipGradients)

            {

                double dfScaleFactor = dfClipGradients / dfL2NormDiff;


                if (m_param.enable_clip_gradient_status)

                    m_log.WriteLine("Gradient clipping: scaling down gradients (L2 norm " + dfL2NormDiff.ToString() + " > " + dfClipGradients.ToString() + ") by scale factor " + dfScaleFactor.ToString());


                for (int i = 0; i < colNetParams.Count; i++)

                {

                    if (colNetParams[i].DiffExists)

                        colNetParams[i].scale_diff(Utility.ConvertVal<T>(dfScaleFactor));

                }

            }

        }

    }

}

MyCaffe.basecode.CancelEvent
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
Definition: CancelEvent.cs:17

MyCaffe.basecode.Log
The Log class provides general output in text form.
Definition: Log.cs:13

MyCaffe.basecode.Log.WriteLine
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80

MyCaffe.basecode.Log.FAIL
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
Definition: Log.cs:394

MyCaffe.basecode.Log.CHECK_EQ
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239

MyCaffe.basecode.Log.CHECK_GT
void CHECK_GT(double df1, double df2, string str)
Test whether one number is greater than another.
Definition: Log.cs:299

MyCaffe.basecode.Log.CHECK_GE
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
Definition: Log.cs:287

MyCaffe.basecode.Utility
The Utility class provides general utility funtions.
Definition: Utility.cs:35

MyCaffe.common.BlobCollection
The BlobCollection contains a list of Blobs.
Definition: BlobCollection.cs:16

MyCaffe.common.BlobCollection.Count
int Count
Returns the number of items in the collection.
Definition: BlobCollection.cs:30

MyCaffe.common.Blob
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25

MyCaffe.common.Blob.ToProto
BlobProto ToProto(bool bWriteDiff=false)
Writes the Blob to a new BlobProto.
Definition: Blob.cs:1663

MyCaffe.common.CudaDnn
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969

MyCaffe.common.Net
Connects Layer's together into a direct acrylic graph (DAG) specified by a NetParameter
Definition: Net.cs:23

MyCaffe.param.SolverParameter
The SolverParameter is a parameter for the solver, specifying the train and test networks.
Definition: SolverParameter.cs:32

MyCaffe.param.SolverParameter.stepsize
int stepsize
The stepsize for learning rate policy 'step'.
Definition: SolverParameter.cs:685

MyCaffe.param.SolverParameter.max_iter
int max_iter
The maximum number of iterations.
Definition: SolverParameter.cs:435

MyCaffe.param.SolverParameter.regularization_type
string regularization_type
Specifies the regularization type (default = 'L2').
Definition: SolverParameter.cs:674

MyCaffe.param.SolverParameter.lr_policy
string lr_policy
The learning rate decay policy.
Definition: SolverParameter.cs:565

MyCaffe.param.SolverParameter.power
double power
The 'power' parameter to compute the learning rate.
Definition: SolverParameter.cs:587

MyCaffe.param.SolverParameter.enable_clip_gradient_status
bool enable_clip_gradient_status
Optionally, enable status output when gradients are clipped (default = true)
Definition: SolverParameter.cs:717

MyCaffe.param.SolverParameter.iter_size
int iter_size
Accumulate gradients over 'iter_size' x 'batch_size' instances.
Definition: SolverParameter.cs:446

MyCaffe.param.SolverParameter.gamma
double gamma
Specifies the 'gamma' parameter to compute the 'step', 'exp', 'inv', and 'sigmoid' learning policy (d...
Definition: SolverParameter.cs:576

MyCaffe.param.SolverParameter.display
int display
The number of iterations between displaying info. If display = 0, no info will be displayed.
Definition: SolverParameter.cs:414

MyCaffe.param.SolverParameter.weight_decay
double weight_decay
Specifies the weight decay (default = 0.0005).
Definition: SolverParameter.cs:609

MyCaffe.param.SolverParameter.stepvalue
List< int > stepvalue
The step values for learning rate policy 'multistep'.
Definition: SolverParameter.cs:696

MyCaffe.param.SolverParameter.momentum
double momentum
Specifies the momentum value - used by all solvers EXCEPT the 'AdaGrad' and 'RMSProp' solvers....
Definition: SolverParameter.cs:599

MyCaffe.param.SolverParameter.base_lr
double base_lr
The base learning rate (default = 0.01).
Definition: SolverParameter.cs:402

MyCaffe.param.SolverParameter.clip_gradients
double clip_gradients
Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm, whenever their actual L2 norm...
Definition: SolverParameter.cs:707

MyCaffe.param.SolverState
The SolverState specifies the state of a given solver.
Definition: SolverState.cs:17

MyCaffe.param.SolverState.iter
int iter
The current iteration.
Definition: SolverState.cs:40

MyCaffe.param.SolverState.history
List< BlobProto > history
The history for SGD solvers.
Definition: SolverState.cs:67

MyCaffe.param.SolverState.current_step
int current_step
The current step for learning rate.
Definition: SolverState.cs:76

MyCaffe.solvers.SGDSolver
Stochastic Gradient Descent solver with momentum updates weights by a linear combination of the negat...
Definition: SGDSolver.cs:22

MyCaffe.solvers.SGDSolver.ComputeUpdateValue
virtual void ComputeUpdateValue(int param_id, double dfRate, int nIterationOverride=-1)
Compute the SGD update value that will be applied to a learnable blobs in the training Net.
Definition: SGDSolver.cs:321

MyCaffe.solvers.SGDSolver.m_colHistory
BlobCollection< T > m_colHistory
History maintains the historical momentum data.
Definition: SGDSolver.cs:26

MyCaffe.solvers.SGDSolver.history
BlobCollection< T > history
Returns the history BlobCollection containing historical momentum data.
Definition: SGDSolver.cs:85

MyCaffe.solvers.SGDSolver.dispose
override void dispose()
Releases all resources (GPU and Host) used by the Solver.
Definition: SGDSolver.cs:64

MyCaffe.solvers.SGDSolver.ApplyUpdate
override double ApplyUpdate(int nIterationOverride=-1)
Compute the update values and apply them to the training Net.
Definition: SGDSolver.cs:192

MyCaffe.solvers.SGDSolver.PreSolve
void PreSolve()
Runs the pre-solve which prepares the Solver to start Solving.
Definition: SGDSolver.cs:92

MyCaffe.solvers.SGDSolver.RestoreSolverState
override void RestoreSolverState(byte[] rgState)
Restore the state of the Solver.
Definition: SGDSolver.cs:230

MyCaffe.solvers.SGDSolver.Normalize
virtual void Normalize(int param_id)
Normalize a learnable Blob of the training Net.
Definition: SGDSolver.cs:268

MyCaffe.solvers.SGDSolver.SGDSolver
SGDSolver(CudaDnn< T > cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXDatabaseBase db, IXPersist< T > persist, int nSolverCount=1, int nSolverRank=0, Net< T > shareNet=null, onGetWorkspace getws=null, onSetWorkspace setws=null)
The SGDSolver constructor.
Definition: SGDSolver.cs:55

MyCaffe.solvers.SGDSolver.ClipGradients
virtual void ClipGradients()
Clip the gradients of all learnable blobs in the training Net.
Definition: SGDSolver.cs:340

MyCaffe.solvers.SGDSolver.SnapshotSolverState
override byte[] SnapshotSolverState()
Take a snapshot of the Solver state.
Definition: SGDSolver.cs:250

MyCaffe.solvers.SGDSolver.m_colTemp
BlobCollection< T > m_colTemp
Update maintains update related data and is not needed in snapshots.
Definition: SGDSolver.cs:37

MyCaffe.solvers.SGDSolver.GetLearningRate
double GetLearningRate(int nIterationOverride=-1)
Return the current learning rate.
Definition: SGDSolver.cs:129

MyCaffe.solvers.SGDSolver.Regularize
virtual void Regularize(int param_id)
Regularize a learnable Blob of the training net.
Definition: SGDSolver.cs:287

MyCaffe.solvers.Solver
An interface for classes that perform optimization on Nets - this class serves as the base class for ...
Definition: Solver.cs:28

MyCaffe.solvers.Solver.m_dfSmoothedLoss
double m_dfSmoothedLoss
Specifies the smoothed loss protected for derived classes to use.
Definition: Solver.cs:70

MyCaffe.solvers.Solver.m_param
SolverParameter m_param
Specifies the SolverParameter that defines how the Solver operates.
Definition: Solver.cs:40

MyCaffe.solvers.Solver.m_cuda
CudaDnn< T > m_cuda
Specifies the instance of CudaDnn used by the Solver that provides a connection to Cuda.
Definition: Solver.cs:32

MyCaffe.solvers.Solver.m_dfIterAccuracy
double? m_dfIterAccuracy
Specifies the iteration accuracy calculated when a blob exists with the name 'accuracy'.
Definition: Solver.cs:74

MyCaffe.solvers.Solver.LearningRateOverride
double LearningRateOverride
Get/set the learning rate override. When 0, this setting is ignored.
Definition: Solver.cs:227

MyCaffe.solvers.Solver.m_nIter
int m_nIter
Specifies the current iteration.
Definition: Solver.cs:52

MyCaffe.solvers.Solver.m_persist
IXPersist< T > m_persist
Specifies the persistance object used to save weight and solver states.
Definition: Solver.cs:90

MyCaffe.solvers.Solver.m_net
Net< T > m_net
Specifies the training Net.
Definition: Solver.cs:44

MyCaffe.solvers.Solver.m_nCurrentStep
int m_nCurrentStep
Specifies the current step.
Definition: Solver.cs:56

MyCaffe.solvers.Solver.m_log
Log m_log
Specifies the Log for output.
Definition: Solver.cs:36

MyCaffe.basecode.IXDatabaseBase
The IXDatabaseBase interface defines the general interface to the in-memory database.
Definition: Interfaces.cs:444

MyCaffe.common.IXPersist
The IXPersist interface is used by the CaffeControl to load and save weights.
Definition: Interfaces.cs:187

MyCaffe.basecode
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12

MyCaffe.common
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8

MyCaffe.db.image
The MyCaffe.db.image namespace contains all image database related classes.
Definition: Database.cs:18

MyCaffe.db
Definition: Database.cs:18

MyCaffe.param
The MyCaffe.param namespace contains parameters used to create models.
Definition: AttentionParameter.cs:9

MyCaffe.solvers
The MyCaffe.solvers namespace contains all solver classes, including the base Solver.
Definition: AdaDeltaSolver.cs:13

MyCaffe
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12

System
Definition: Component.cs:11