mycaffe/html/pg_8mt_2_trainer_p_g_8cs_source.html

using System;

using System.Collections;

using System.Collections.Generic;

using System.Diagnostics;

using System.Drawing;

using System.Linq;

using System.Text;

using System.Threading;

using System.Threading.Tasks;

using MyCaffe.basecode;

using MyCaffe.common;

using MyCaffe.fillers;

using MyCaffe.layers;

using MyCaffe.param;

using MyCaffe.solvers;


namespace MyCaffe.trainers.pg.mt

{

    public class TrainerPG<T> : IxTrainerRL, IDisposable

    {

        IxTrainerCallback m_icallback;

        CryptoRandom m_random = new CryptoRandom();

        MyCaffeControl<T> m_mycaffe;

        PropertySet m_properties;

        int m_nThreads = 1;

        List<int> m_rgGpuID = new List<int>();

        Optimizer<T> m_optimizer = null;


        public TrainerPG(MyCaffeControl<T> mycaffe, PropertySet properties, CryptoRandom random, IxTrainerCallback icallback)

        {

            m_icallback = icallback;

            m_mycaffe = mycaffe;

            m_properties = properties;

            m_random = random;


            m_nThreads = m_properties.GetPropertyAsInt("Threads", 1);

            m_rgGpuID.Add(m_mycaffe.Cuda.GetDeviceID());


            string strGpuID = m_properties.GetProperty("GPUIDs", false);

            if (strGpuID != null && m_nThreads > 1)

            {

                int nDeviceCount = m_mycaffe.Cuda.GetDeviceCount();


                m_rgGpuID.Clear();

                string[] rgstrGpuIDs = strGpuID.Split(',');

                foreach (string strID in rgstrGpuIDs)

                {

                    int nDevId = int.Parse(strID);


                    if (nDevId < 0 || nDevId >= nDeviceCount)

                        throw new Exception("Invalid device ID - value must be within the range [0," + (nDeviceCount - 1).ToString() + "].");


                    m_rgGpuID.Add(nDevId);

                }

            }

        }


        public void Dispose()

        {

        }


        public bool Initialize()

        {

            m_mycaffe.CancelEvent.Reset();

            m_icallback.OnInitialize(new InitializeArgs(m_mycaffe));

            return true;

        }


        private void wait(int nWait)

        {

            int nWaitInc = 250;

            int nTotalWait = 0;


            while (nTotalWait < nWait)

            {

                m_icallback.OnWait(new WaitArgs(nWaitInc));

                nTotalWait += nWaitInc;

            }

        }


        public bool Shutdown(int nWait)

        {

            if (m_mycaffe != null)

            {

                m_mycaffe.CancelEvent.Set();

                wait(nWait);

            }


            m_icallback.OnShutdown();


            return true;

        }


        public ResultCollection RunOne(int nDelay = 1000)

        {

            m_mycaffe.CancelEvent.Reset();

            Agent<T> agent = new Agent<T>(0, m_icallback, m_mycaffe, m_properties, m_random, Phase.TRAIN, 0, 1);

            Tuple<int,int> res = agent.Run(nDelay);


            List<Result> rgActions = new List<Result>();

            for (int i = 0; i < res.Item2; i++)

            {

                if (res.Item1 == i)

                    rgActions.Add(new Result(i, 1.0));

                else

                    rgActions.Add(new Result(i, 0.0));

            }


            agent.Dispose();


            return new ResultCollection(rgActions, LayerParameter.LayerType.SOFTMAX);

        }


        public byte[] Run(int nN, PropertySet runProp, out string type)

        {

            m_mycaffe.CancelEvent.Reset();

            Agent<T> agent = new Agent<T>(0, m_icallback, m_mycaffe, m_properties, m_random, Phase.RUN, 0, 1);

            byte[] rgResults = agent.Run(nN, out type);

            agent.Dispose();


            return rgResults;

        }


        public bool Test(int nN, ITERATOR_TYPE type)

        {

            int nDelay = 1000;

            string strProp = m_properties.ToString();


            // Turn off the num-skip to run at normal speed.

            strProp += "EnableNumSkip=False;";

            PropertySet properties = new PropertySet(strProp);


            m_mycaffe.CancelEvent.Reset();

            Agent<T> agent = new Agent<T>(0, m_icallback, m_mycaffe, properties, m_random, Phase.TRAIN, 0, 1);

            agent.Run(Phase.TEST, nN, type, TRAIN_STEP.NONE);


            agent.Dispose();

            Shutdown(nDelay);


            return true;

        }


        public bool Train(int nN, ITERATOR_TYPE type, TRAIN_STEP step)

        {

            List<Agent<T>> rgAgents = new List<Agent<T>>();

            int nGpuIdx = 0;


            m_mycaffe.CancelEvent.Reset();


            if (m_nThreads > 1)

                m_optimizer = new Optimizer<T>(m_mycaffe);


            for (int i = 0; i < m_nThreads; i++)

            {

                int nGpuID = m_rgGpuID[nGpuIdx];


                Agent<T> agent = new Agent<T>(i, m_icallback, m_mycaffe, m_properties, m_random, Phase.TRAIN, nGpuID, m_nThreads);

                agent.OnApplyUpdates += Agent_OnApplyUpdates;

                rgAgents.Add(agent);


                nGpuIdx++;

                if (nGpuIdx == m_rgGpuID.Count)

                    nGpuIdx = 0;

            }


            if (m_optimizer != null)

                m_optimizer.Start(new WorkerStartArgs(0, Phase.TRAIN, nN, type, step));


            WorkerStartArgs args = new WorkerStartArgs(1, Phase.TRAIN, nN, type, step);

            foreach (Agent<T> agent in rgAgents)

            {

                agent.Start(args);

            }


            while (!m_mycaffe.CancelEvent.WaitOne(250))

            {

            }


            foreach (Agent<T> agent in rgAgents)

            {

                agent.Stop(1000);

                agent.Dispose();

            }


            if (m_optimizer != null)

            {

                m_optimizer.Stop(1000);

                m_optimizer.Dispose();

                m_optimizer = null;

            }


            Shutdown(3000);


            return false;

        }


        private void Agent_OnApplyUpdates(object sender, ApplyUpdateArgs<T> e)

        {

            if (m_optimizer != null)

                m_optimizer.ApplyUpdates(e.MyCaffeWorker, e.Iteration);

        }

    }


    class WorkerStartArgs

    {

        int m_nCycleDelay;

        Phase m_phase;

        int m_nN;

        ITERATOR_TYPE m_type;

        TRAIN_STEP m_step = TRAIN_STEP.NONE;


        public WorkerStartArgs(int nCycleDelay, Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step)

        {

            m_nCycleDelay = nCycleDelay;

            m_phase = phase;

            m_nN = nN;

            m_type = type;

            m_step = step;

        }


        public TRAIN_STEP Step

        {

            get { return m_step; }

        }


        public int CycleDelay

        {

            get { return m_nCycleDelay; }

        }


        public Phase Phase

        {

            get { return m_phase; }

        }


        public int N

        {

            get { return m_nN; }

        }


        public ITERATOR_TYPE IterationType

        {

            get { return m_type; }

        }

    }


    class Worker

    {

        protected int m_nIndex = -1;

        protected AutoResetEvent m_evtCancel = new AutoResetEvent(false);

        protected ManualResetEvent m_evtDone = new ManualResetEvent(false);

        protected Task m_workTask = null;


        public Worker(int nIdx)

        {

            m_nIndex = nIdx;

        }


        protected virtual void doWork(object arg)

        {

        }


        public void Start(WorkerStartArgs args)

        {

            if (m_workTask == null)

                m_workTask = Task.Factory.StartNew(new Action<object>(doWork), args);

        }


        public void Stop(int nWait)

        {

            m_evtCancel.Set();

            m_workTask = null;

            m_evtDone.WaitOne(nWait);

        }

    }


    class Optimizer<T> : Worker, IDisposable

    {

        MyCaffeControl<T> m_mycaffePrimary;

        MyCaffeControl<T> m_mycaffeWorker;

        int m_nIteration;

        double m_dfLearningRate;

        AutoResetEvent m_evtApplyUpdates = new AutoResetEvent(false);

        ManualResetEvent m_evtDoneApplying = new ManualResetEvent(false);

        object m_syncObj = new object();


        public Optimizer(MyCaffeControl<T> mycaffePrimary)

            : base(0)

        {

            m_mycaffePrimary = mycaffePrimary;

        }


        public void Dispose()

        {

        }


        protected override void doWork(object arg)

        {

            WorkerStartArgs args = arg as WorkerStartArgs;


            m_mycaffePrimary.Cuda.SetDeviceID();


            List<WaitHandle> rgWait = new List<WaitHandle>();

            rgWait.Add(m_evtApplyUpdates);

            rgWait.AddRange(m_mycaffePrimary.CancelEvent.Handles);


            int nWait = WaitHandle.WaitAny(rgWait.ToArray());


            while (nWait == 0)

            {

                if (args.Step != TRAIN_STEP.FORWARD)

                {

                    m_mycaffePrimary.CopyGradientsFrom(m_mycaffeWorker);

                    m_mycaffePrimary.Log.Enable = false;

                    m_dfLearningRate = m_mycaffePrimary.ApplyUpdate(m_nIteration);

                    m_mycaffePrimary.Log.Enable = true;

                    m_mycaffeWorker.CopyWeightsFrom(m_mycaffePrimary);

                }


                m_evtDoneApplying.Set();


                nWait = WaitHandle.WaitAny(rgWait.ToArray());


                if (args.Step != TRAIN_STEP.NONE)

                    break;

            }

        }


        public double ApplyUpdates(MyCaffeControl<T> mycaffeWorker, int nIteration)

        {

            lock (m_syncObj)

            {

                m_mycaffeWorker = mycaffeWorker;

                m_nIteration = nIteration;


                m_evtDoneApplying.Reset();

                m_evtApplyUpdates.Set();


                List<WaitHandle> rgWait = new List<WaitHandle>();

                rgWait.Add(m_evtDoneApplying);

                rgWait.AddRange(m_mycaffePrimary.CancelEvent.Handles);


                int nWait = WaitHandle.WaitAny(rgWait.ToArray());

                if (nWait != 0)

                    return 0;


                return m_dfLearningRate;

            }

        }

    }


    class Agent<T> : Worker, IDisposable

    {

        IxTrainerCallback m_icallback;

        Brain<T> m_brain;

        PropertySet m_properties;

        CryptoRandom m_random;

        float m_fGamma;

        bool m_bAllowDiscountReset = false;

        bool m_bUseRawInput = false;

        int m_nEpsSteps = 0;

        double m_dfEpsStart = 0;

        double m_dfEpsEnd = 0;

        double m_dfExplorationRate = 0;

        int m_nEpisodeBatchSize = 1;

        double m_dfEpisodeElitePercentile = 1;

        static object m_syncObj = new object();

        bool m_bShowActionProb = false;

        bool m_bVerbose = false;


        public event EventHandler<ApplyUpdateArgs<T>> OnApplyUpdates;


        public Agent(int nIdx, IxTrainerCallback icallback, MyCaffeControl<T> mycaffe, PropertySet properties, CryptoRandom random, Phase phase, int nGpuID, int nThreadCount)

            : base(nIdx)

        {

            m_icallback = icallback;

            m_brain = new Brain<T>(mycaffe, properties, random, phase, nGpuID, nThreadCount);

            m_brain.OnApplyUpdate += brain_OnApplyUpdate;

            m_properties = properties;

            m_random = random;


            m_fGamma = (float)properties.GetPropertyAsDouble("Gamma", 0.99);

            m_bAllowDiscountReset = properties.GetPropertyAsBool("AllowDiscountReset", false);

            m_bUseRawInput = properties.GetPropertyAsBool("UseRawInput", false);

            m_nEpsSteps = properties.GetPropertyAsInt("EpsSteps", 0);

            m_dfEpsStart = properties.GetPropertyAsDouble("EpsStart", 0);

            m_dfEpsEnd = properties.GetPropertyAsDouble("EpsEnd", 0);

            m_nEpisodeBatchSize = m_properties.GetPropertyAsInt("EpisodeBatchSize", 1);

            m_dfEpisodeElitePercentile = properties.GetPropertyAsDouble("EpisodeElitePercent", 1.0);

            m_bShowActionProb = properties.GetPropertyAsBool("ShowActionProb", false);

            m_bVerbose = properties.GetPropertyAsBool("Verbose", false);


            if (m_dfEpsStart < 0 || m_dfEpsStart > 1)

                throw new Exception("The 'EpsStart' is out of range - please specify a real number in the range [0,1]");


            if (m_dfEpsEnd < 0 || m_dfEpsEnd > 1)

                throw new Exception("The 'EpsEnd' is out of range - please specify a real number in the range [0,1]");


            if (m_dfEpsEnd > m_dfEpsStart)

                throw new Exception("The 'EpsEnd' must be less than the 'EpsStart' value.");

        }


        private void brain_OnApplyUpdate(object sender, ApplyUpdateArgs<T> e)

        {

            if (OnApplyUpdates != null)

                OnApplyUpdates(sender, e);

        }


        public void Dispose()

        {

            if (m_brain != null)

            {

                m_brain.Dispose();

                m_brain = null;

            }

        }


        protected override void doWork(object arg)

        {

            try

            {

                WorkerStartArgs args = arg as WorkerStartArgs;


                lock (m_syncObj)

                {

                    m_brain.Create();

                }


                m_evtDone.Reset();

                m_evtCancel.Reset();

                Run(args.Phase, args.N, args.IterationType, args.Step);

                m_evtDone.Set();

            }

            catch (Exception excpt)

            {

                m_brain.OutputLog.WriteError(excpt);

            }


            m_brain.Cancel.Set();

        }


        private double getEpsilon(int nEpisode)

        {

            if (m_nEpsSteps == 0)

                return 0;


            if (nEpisode >= m_nEpsSteps)

                return m_dfEpsEnd;


            return m_dfEpsStart + (double)(nEpisode * (m_dfEpsEnd - m_dfEpsStart)/m_nEpsSteps);

        }


        private StateBase getData(Phase phase, int nIdx, int nAction, bool? bResetOverride = null)

        {

            GetDataArgs args = m_brain.getDataArgs(phase, nIdx, nAction, bResetOverride);

            m_icallback.OnGetData(args);

            return args.State;

        }


        private int getAction(int nEpisode, SimpleDatum sd, SimpleDatum sdClip, int nActionCount, TRAIN_STEP step, out float[] rgfAprob)

        {

            if (step == TRAIN_STEP.NONE)

            {

                m_dfExplorationRate = getEpsilon(nEpisode);


                if (m_dfExplorationRate > 0 && m_random.NextDouble() < m_dfExplorationRate)

                {

                    rgfAprob = new float[nActionCount];

                    int nAction = m_random.Next(nActionCount);

                    rgfAprob[nAction] = 1.0f;

                    return nAction;

                }

            }


            return m_brain.act(sd, sdClip, out rgfAprob);

        }


        private int updateStatus(int nIteration, int nEpisodeCount, double dfRunningReward, double dfRewardSum, double dfLoss, double dfLearningRate)

        {

            GetStatusArgs args = new GetStatusArgs(m_nIndex, nIteration, nEpisodeCount, 1000000, dfRunningReward, dfRewardSum, m_dfExplorationRate, 0, dfLoss, dfLearningRate);

            m_icallback.OnUpdateStatus(args);

            return args.NewFrameCount;

        }


        public Tuple<int, int> Run(int nDelay = 1000)

        {

            // Reset the environment and get the initial state.

            getData(Phase.RUN, m_nIndex, -1);

            Thread.Sleep(nDelay);


            StateBase state = getData(Phase.RUN, m_nIndex, -1, false);

            float[] rgfAprob;


            m_brain.Create();


            int a = m_brain.act(state.Data, state.Clip, out rgfAprob);


            return new Tuple<int, int>(a, state.ActionCount);

        }


        public byte[] Run(int nIterations, out string type)

        {

            IxTrainerCallbackRNN icallback = m_icallback as IxTrainerCallbackRNN;

            if (icallback == null)

                throw new Exception("The Run method requires an IxTrainerCallbackRNN interface to convert the results into the native format!");


            m_brain.Create();


            StateBase s = getData(Phase.RUN, m_nIndex, -1);

            int nIteration = 0;

            List<float> rgResults = new List<float>();

            int nLookahead = m_properties.GetPropertyAsInt("Lookahead", 0);


            while (!m_brain.Cancel.WaitOne(0) && (nIterations == -1 || nIteration < nIterations))

            {

                // Preprocess the observation.

                SimpleDatum x = m_brain.Preprocess(s, m_bUseRawInput);


                // Forward the policy network and sample an action.

                float[] rgfAprob;

                int nAction = m_brain.act(x, s.Clip, out rgfAprob);


                if (m_bShowActionProb && m_bVerbose)

                {

                    string strOut = "Action Prob: " + Utility.ToString<float>(rgfAprob.ToList(), 4) + " -> " + nAction.ToString();

                    m_brain.OutputLog.WriteLine(strOut);

                }


                int nSeqLen = m_brain.RecurrentSequenceLength;

                int nItemLen = s.Data.ItemCount / nSeqLen;

                int nData1Idx = s.Data.ItemCount - (nItemLen * (nLookahead + 1));


                rgResults.Add(s.Data.TimeStamp.ToFileTime());

                rgResults.Add((float)s.Data.GetDataAtF(nData1Idx));

                rgResults.Add(nAction);


                // Take the next step using the action

                s = getData(Phase.RUN, m_nIndex, nAction);

                nIteration++;


                m_brain.OutputLog.Progress = ((double)nIteration / (double)nIterations);

            }


            ConvertOutputArgs args = new ConvertOutputArgs(nIterations, rgResults.ToArray());

            icallback.OnConvertOutput(args);


            type = args.RawType;

            return args.RawOutput;

        }


        private bool isAtIteration(int nN, ITERATOR_TYPE type, int nIteration, int nEpisode)

        {

            if (nN == -1)

                return false;


            if (type == ITERATOR_TYPE.EPISODE)

            {

                if (nEpisode < nN)

                    return false;


                return true;

            }

            else

            {

                if (nIteration < nN)

                    return false;


                return true;

            }

        }


        public void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step)

        {

            MemoryCache rgMemoryCache = new MemoryCache(m_nEpisodeBatchSize);

            Memory rgMemory = new Memory();

            double? dfRunningReward = null;

            double dfEpisodeReward = 0;

            int nEpisode = 0;

            int nIteration = 0;


            m_brain.Create();


            StateBase s = getData(phase, m_nIndex, -1);


            while (!m_brain.Cancel.WaitOne(0) && !isAtIteration(nN, type, nIteration, nEpisode))

            {

                // Preprocess the observation.

                SimpleDatum x = m_brain.Preprocess(s, m_bUseRawInput);


                // Forward the policy network and sample an action.

                float[] rgfAprob;

                int action = getAction(nIteration, x, s.Clip, s.ActionCount, step, out rgfAprob);


                if (m_bShowActionProb && m_bVerbose)

                {

                    string strOut = "Action Prob: " + Utility.ToString<float>(rgfAprob.ToList(), 4) + " -> " + action.ToString();

                    m_brain.OutputLog.WriteLine(strOut);

                }


                if (step == TRAIN_STEP.FORWARD)

                    return;


                // Take the next step using the action

                StateBase s_ = getData(phase, m_nIndex, action);

                dfEpisodeReward += s_.Reward;


                if (phase == Phase.TRAIN)

                {

                    // Build up episode memory, using reward for taking the action.

                    rgMemory.Add(new MemoryItem(s, x, action, rgfAprob, (float)s_.Reward));


                    // An episode has finished.

                    if (s_.Done)

                    {

                        nEpisode++;

                        nIteration++;


                        if (rgMemoryCache.Add(rgMemory))

                        {

                            if (m_bShowActionProb)

                                m_brain.OutputLog.WriteLine("---learning---");


                            rgMemoryCache.PurgeNonElite(m_dfEpisodeElitePercentile);


                            for (int i=0; i<rgMemoryCache.Count; i++)

                            {

                                Memory rgMemory1 = rgMemoryCache[i];


                                m_brain.Reshape(rgMemory1);


                                // Compute the discounted reward (backwards through time)

                                float[] rgDiscountedR = rgMemory1.GetDiscountedRewards(m_fGamma, m_bAllowDiscountReset);

                                // Rewards are normalized when set to be unit normal (helps control the gradient estimator variance)

                                m_brain.SetDiscountedR(rgDiscountedR);


                                // Sigmoid models, set the probabilities up font.

                                if (!m_brain.UsesSoftMax)

                                {

                                    // Get the action probabilities.

                                    float[] rgfAprobSet = rgMemory1.GetActionProbabilities();

                                    // The action probabilities are used to calculate the initial gradient within the loss function.

                                    m_brain.SetActionProbabilities(rgfAprobSet);

                                }


                                // Get the action one-hot vectors.  When using Softmax, this contains the one-hot vector containing

                                // each action set (e.g. 3 actions with action 0 set would return a vector <1,0,0>).

                                // When using a binary probability (e.g. with Sigmoid), the each action set only contains a

                                // single element which is set to the action value itself (e.g. 0 for action '0' and 1 for action '1')

                                float[] rgfAonehotSet = rgMemory1.GetActionOneHotVectors();

                                m_brain.SetActionOneHotVectors(rgfAonehotSet);


                                // Train for one iteration, which triggers the loss function.

                                List<Datum> rgData = rgMemory1.GetData();

                                List<Datum> rgClip = rgMemory1.GetClip();


                                m_brain.SetData(rgData, rgClip);


                                bool bApplyGradients = (i == rgMemoryCache.Count - 1) ? true : false;

                                m_brain.Train(nIteration, step, bApplyGradients);


                                // Update reward running

                                if (!dfRunningReward.HasValue)

                                    dfRunningReward = dfEpisodeReward;

                                else

                                    dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01;


                                nEpisode = updateStatus(nIteration, nEpisode, dfRunningReward.Value, dfEpisodeReward, m_brain.LastLoss, m_brain.LearningRate);

                                dfEpisodeReward = 0;

                            }


                            rgMemoryCache.Clear();

                        }


                        s = getData(phase, m_nIndex, -1);

                        rgMemory = new Memory();


                        if (step != TRAIN_STEP.NONE)

                            return;

                    }

                    else

                    {

                        s = s_;

                    }

                }

                else

                {

                    if (s_.Done)

                    {

                        nEpisode++;


                        // Update reward running

                        if (!dfRunningReward.HasValue)

                            dfRunningReward = dfEpisodeReward;

                        else

                            dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01;


                        nEpisode = updateStatus(nIteration, nEpisode, dfRunningReward.Value, dfEpisodeReward, m_brain.LastLoss, m_brain.LearningRate);

                        dfEpisodeReward = 0;


                        s = getData(phase, m_nIndex, -1);

                    }

                    else

                    {

                        s = s_;

                    }


                    nIteration++;

                }

            }

        }

    }


    class Brain<T> : IDisposable

    {

        MyCaffeControl<T> m_mycaffePrimary;

        MyCaffeControl<T> m_mycaffeWorker;

        Net<T> m_net;

        Solver<T> m_solver;

        MemoryDataLayer<T> m_memData;

        MemoryLossLayer<T> m_memLoss;

        SoftmaxLayer<T> m_softmax = null;

        SoftmaxCrossEntropyLossLayer<T> m_softmaxCe = null;

        bool m_bSoftmaxCeSetup = false;

        PropertySet m_properties;

        CryptoRandom m_random;

        BlobCollection<T> m_colAccumulatedGradients = new BlobCollection<T>();

        Blob<T> m_blobDiscountedR;

        Blob<T> m_blobPolicyGradient;

        Blob<T> m_blobActionOneHot;

        Blob<T> m_blobDiscountedR1;

        Blob<T> m_blobPolicyGradient1;

        Blob<T> m_blobActionOneHot1;

        Blob<T> m_blobLoss;

        Blob<T> m_blobAprobLogit;

        bool m_bSkipLoss;

        int m_nMiniBatch = 10;

        SimpleDatum m_sdLast = null;

        double m_dfLastLoss = 0;

        double m_dfLearningRate = 0;

        Phase m_phase;

        int m_nGpuID = 0;

        int m_nThreadCount = 1;

        bool m_bCreated = false;

        bool m_bUseAcceleratedTraining = false;

        int m_nRecurrentSequenceLength = 0;

        List<Datum> m_rgData = null;

        List<Datum> m_rgClip = null;


        public event EventHandler<ApplyUpdateArgs<T>> OnApplyUpdate;


        public Brain(MyCaffeControl<T> mycaffe, PropertySet properties, CryptoRandom random, Phase phase, int nGpuID, int nThreadCount)

        {

            m_properties = properties;

            m_random = random;

            m_phase = phase;

            m_nGpuID = nGpuID;

            m_nThreadCount = nThreadCount;

            m_mycaffePrimary = mycaffe;


            int nMiniBatch = mycaffe.CurrentProject.GetBatchSize(phase);

            if (nMiniBatch != 0)

                m_nMiniBatch = nMiniBatch;


            m_nMiniBatch = m_properties.GetPropertyAsInt("MiniBatch", m_nMiniBatch);


            double? dfRate = mycaffe.CurrentProject.GetSolverSettingAsNumeric("base_lr");

            if (dfRate.HasValue)

                m_dfLearningRate = dfRate.Value;


            m_bUseAcceleratedTraining = properties.GetPropertyAsBool("UseAcceleratedTraining", false);

        }


        public void Create()

        {

            if (m_bCreated)

                return;


            m_mycaffePrimary.Log.Enable = false;


            if (m_nThreadCount == 1)

            {

                m_mycaffeWorker = m_mycaffePrimary;

                m_mycaffePrimary.Cuda.SetDeviceID();

            }

            else

            {

                m_mycaffeWorker = m_mycaffePrimary.Clone(m_nGpuID);

            }


            m_mycaffePrimary.Log.Enable = true;


            m_mycaffeWorker.Cuda.SetDeviceID();


            m_net = m_mycaffeWorker.GetInternalNet(m_phase);

            m_solver = m_mycaffeWorker.GetInternalSolver();


            m_memData = m_net.FindLayer(LayerParameter.LayerType.MEMORYDATA, null) as MemoryDataLayer<T>;

            m_memLoss = m_net.FindLayer(LayerParameter.LayerType.MEMORY_LOSS, null) as MemoryLossLayer<T>;

            m_softmax = m_net.FindLayer(LayerParameter.LayerType.SOFTMAX, null) as SoftmaxLayer<T>;


            if (m_memData == null)

                throw new Exception("Could not find the MemoryData Layer!");


            if (m_memLoss == null && m_phase != Phase.RUN)

                throw new Exception("Could not find the MemoryLoss Layer!");


            m_memData.OnDataPack += memData_OnDataPack;


            if (m_memLoss != null)

                m_memLoss.OnGetLoss += memLoss_OnGetLoss;


            m_blobDiscountedR = new Blob<T>(m_mycaffeWorker.Cuda, m_mycaffeWorker.Log);

            m_blobPolicyGradient = new Blob<T>(m_mycaffeWorker.Cuda, m_mycaffeWorker.Log);

            m_blobActionOneHot = new Blob<T>(m_mycaffeWorker.Cuda, m_mycaffeWorker.Log);

            m_blobDiscountedR1 = new Blob<T>(m_mycaffeWorker.Cuda, m_mycaffeWorker.Log);

            m_blobPolicyGradient1 = new Blob<T>(m_mycaffeWorker.Cuda, m_mycaffeWorker.Log);

            m_blobActionOneHot1 = new Blob<T>(m_mycaffeWorker.Cuda, m_mycaffeWorker.Log);

            m_blobLoss = new Blob<T>(m_mycaffeWorker.Cuda, m_mycaffeWorker.Log);

            m_blobAprobLogit = new Blob<T>(m_mycaffeWorker.Cuda, m_mycaffeWorker.Log);


            if (m_softmax != null)

            {

                LayerParameter p = new LayerParameter(LayerParameter.LayerType.SOFTMAXCROSSENTROPY_LOSS);

                p.loss_weight.Add(1);

                p.loss_weight.Add(0);

                p.loss_param.normalization = LossParameter.NormalizationMode.NONE;

                m_softmaxCe = new SoftmaxCrossEntropyLossLayer<T>(m_mycaffeWorker.Cuda, m_mycaffeWorker.Log, p);

            }


            m_colAccumulatedGradients = m_net.learnable_parameters.Clone();

            m_colAccumulatedGradients.SetDiff(0);


            m_bCreated = true;

        }


        private void dispose(ref Blob<T> b)

        {

            if (b != null)

            {

                b.Dispose();

                b = null;

            }

        }


        public void Dispose()

        {

            if (m_memLoss != null)

                m_memLoss.OnGetLoss -= memLoss_OnGetLoss;


            if (m_memData != null)

                m_memData.OnDataPack -= memData_OnDataPack;


            dispose(ref m_blobDiscountedR);

            dispose(ref m_blobPolicyGradient);

            dispose(ref m_blobActionOneHot);

            dispose(ref m_blobDiscountedR1);

            dispose(ref m_blobPolicyGradient1);

            dispose(ref m_blobActionOneHot1);

            dispose(ref m_blobLoss);

            dispose(ref m_blobAprobLogit);


            if (m_colAccumulatedGradients != null)

            {

                m_colAccumulatedGradients.Dispose();

                m_colAccumulatedGradients = null;

            }


            if (m_mycaffeWorker != m_mycaffePrimary && m_mycaffeWorker != null)

                m_mycaffeWorker.Dispose();


            m_mycaffeWorker = null;

        }


        public int RecurrentSequenceLength

        {

            get { return m_nRecurrentSequenceLength; }

        }


        public Log OutputLog

        {

            get { return m_mycaffePrimary.Log; }

        }


        public bool UsesSoftMax

        {

            get { return (m_softmax == null) ? false : true; }

        }


        public int Reshape(Memory mem)

        {

            int nNum = mem.Count;

            int nChannels = mem[0].Data.Channels;

            int nHeight = mem[0].Data.Height;

            int nWidth = mem[0].Data.Height;

            int nActionProbs = 1;

            int nFound = 0;


            for (int i = 0; i < m_net.output_blobs.Count; i++)

            {

                if (m_net.output_blobs[i].type != BLOB_TYPE.LOSS)

                {

                    int nCh = m_net.output_blobs[i].channels;

                    nActionProbs = Math.Max(nCh, nActionProbs);

                    nFound++;

                }

            }


            if (nFound == 0)

                throw new Exception("Could not find a non-loss output!  Your model should output the loss and the action probabilities.");


            m_blobDiscountedR.Reshape(nNum, nActionProbs, 1, 1);

            m_blobPolicyGradient.Reshape(nNum, nActionProbs, 1, 1);

            m_blobActionOneHot.Reshape(nNum, nActionProbs, 1, 1);

            m_blobDiscountedR1.Reshape(nNum, nActionProbs, 1, 1);

            m_blobPolicyGradient1.Reshape(nNum, nActionProbs, 1, 1);

            m_blobActionOneHot1.Reshape(nNum, nActionProbs, 1, 1);

            m_blobLoss.Reshape(1, 1, 1, 1);


            return nActionProbs;

        }


        public void SetDiscountedR(float[] rg)

        {

            double dfMean = m_blobDiscountedR.mean(rg);

            double dfStd = m_blobDiscountedR.std(dfMean, rg);

            int nC = m_blobDiscountedR.channels;


            // Fill all items in each channel with the same discount value.

            if (nC > 1)

            {

                List<float> rgR = new List<float>();


                for (int i = 0; i < rg.Length; i++)

                {

                    for (int j = 0; j < nC; j++)

                    {

                        rgR.Add(rg[i]);

                    }

                }


                rg = rgR.ToArray();

            }


            m_blobDiscountedR.SetData(Utility.ConvertVec<T>(rg));

            m_blobDiscountedR.NormalizeData(dfMean, dfStd);

        }


        public void SetActionProbabilities(float[] rg)

        {

            m_blobPolicyGradient.SetData(Utility.ConvertVec<T>(rg));

        }


        public void SetActionOneHotVectors(float[] rg)

        {

            m_blobActionOneHot.SetData(Utility.ConvertVec<T>(rg));

        }


        public void SetData(List<Datum> rgData, List<Datum> rgClip)

        {

            if (m_nRecurrentSequenceLength != 1 && rgData.Count > 1 && rgClip != null)

            {

                m_rgData = rgData;

                m_rgClip = rgClip;

            }

            else

            {

                m_memData.AddDatumVector(rgData, rgClip, 1, true, true);

                m_rgData = null;

                m_rgClip = null;

            }

        }


        public GetDataArgs getDataArgs(Phase phase, int nIdx, int nAction, bool? bResetOverride = null)

        {

            bool bReset = (nAction == -1) ? true : false;

            return new GetDataArgs(phase, nIdx, m_mycaffePrimary, m_mycaffePrimary.Log, m_mycaffePrimary.CancelEvent, bReset, nAction, false, true);

        }


        public double LastLoss

        {

            get { return m_dfLastLoss; }

        }


        public double LearningRate

        {

            get { return m_dfLearningRate; }

        }


        public Log Log

        {

            get { return m_mycaffePrimary.Log; }

        }


        public CancelEvent Cancel

        {

            get { return m_mycaffePrimary.CancelEvent; }

        }


        public SimpleDatum Preprocess(StateBase s, bool bUseRawInput)

        {

            SimpleDatum sd = new SimpleDatum(s.Data, true);


            if (bUseRawInput)

                return sd;


            if (m_sdLast == null)

                sd.Zero();

            else

                sd.Sub(m_sdLast);


            m_sdLast = s.Data;


            return sd;

        }


        public int act(SimpleDatum sd, SimpleDatum sdClip, out float[] rgfAprob)

        {

            List<Datum> rgData = new List<Datum>();

            rgData.Add(new Datum(sd));

            List<Datum> rgClip = null;


            if (sdClip != null)

            {

                rgClip = new List<Datum>();

                rgClip.Add(new Datum(sdClip));

            }


            double dfLoss;

            float fRandom = (float)m_random.NextDouble(); // Roll the dice.


            m_memData.AddDatumVector(rgData, rgClip, 1, true, true);

            m_bSkipLoss = true;

            BlobCollection<T> res = m_net.Forward(out dfLoss);

            m_bSkipLoss = false;


            rgfAprob = null;


            for (int i = 0; i < res.Count; i++)

            {

                if (res[i].type != BLOB_TYPE.LOSS)

                {

                    int nStart = 0;

                    // When using recurrent learning, only act on the last outputs.

                    if (m_nRecurrentSequenceLength > 1 && res[i].num > 1)

                    {

                        int nCount = res[i].count();

                        int nOutput = nCount / res[i].num;

                        nStart = nCount - nOutput;


                        if (nStart < 0)

                            throw new Exception("The start must be zero or greater!");

                    }


                    rgfAprob = Utility.ConvertVecF<T>(res[i].update_cpu_data(), nStart);

                    break;

                }

            }


            if (rgfAprob == null)

                throw new Exception("Could not find a non-loss output!  Your model should output the loss and the action probabilities.");


            // Select the action from the probability distribution.

            float fSum = 0;

            for (int i = 0; i < rgfAprob.Length; i++)

            {

                fSum += rgfAprob[i];


                if (fRandom < fSum)

                    return i;

            }


            if (rgfAprob.Length == 1)

                return 1;


            return rgfAprob.Length - 1;

        }


        private void prepareBlob(Blob<T> b1, Blob<T> b)

        {

            b1.CopyFrom(b, 0, 0, b1.count(), true, true);

            b.Reshape(1, b.channels, b.height, b.width);

        }


        private void copyBlob(int nIdx, Blob<T> src, Blob<T> dst)

        {

            int nCount = dst.count();

            dst.CopyFrom(src, nIdx * nCount, 0, nCount, true, false);

        }


        public void Train(int nIteration, TRAIN_STEP step, bool bApplyGradients = true)

        {

            // Run data/clip groups > 1 in non batch mode.

            if (m_nRecurrentSequenceLength != 1 && m_rgData != null && m_rgData.Count > 1 && m_rgClip != null)

            {

                prepareBlob(m_blobActionOneHot1, m_blobActionOneHot);

                prepareBlob(m_blobDiscountedR1, m_blobDiscountedR);

                prepareBlob(m_blobPolicyGradient1, m_blobPolicyGradient);


                for (int i = 0; i < m_rgData.Count; i++)

                {

                    copyBlob(i, m_blobActionOneHot1, m_blobActionOneHot);

                    copyBlob(i, m_blobDiscountedR1, m_blobDiscountedR);

                    copyBlob(i, m_blobPolicyGradient1, m_blobPolicyGradient);


                    List<Datum> rgData1 = new List<Datum>() { m_rgData[i] };

                    List<Datum> rgClip1 = new List<Datum>() { m_rgClip[i] };


                    m_memData.AddDatumVector(rgData1, rgClip1, 1, true, true);


                    m_solver.Step(1, step, true, m_bUseAcceleratedTraining, true, true);

                    m_colAccumulatedGradients.Accumulate(m_mycaffeWorker.Cuda, m_net.learnable_parameters, true);

                }


                m_blobActionOneHot.ReshapeLike(m_blobActionOneHot1);

                m_blobDiscountedR.ReshapeLike(m_blobDiscountedR1);

                m_blobPolicyGradient.ReshapeLike(m_blobPolicyGradient1);


                m_rgData = null;

                m_rgClip = null;

            }

            else

            {

                m_solver.Step(1, step, true, m_bUseAcceleratedTraining, true, true);

                m_colAccumulatedGradients.Accumulate(m_mycaffeWorker.Cuda, m_net.learnable_parameters, true);

            }


            if (nIteration % m_nMiniBatch == 0 || bApplyGradients || step == TRAIN_STEP.BACKWARD || step == TRAIN_STEP.BOTH)

            {

                m_net.learnable_parameters.CopyFrom(m_colAccumulatedGradients, true);

                m_colAccumulatedGradients.SetDiff(0);


                if (m_mycaffePrimary == m_mycaffeWorker)

                {

                    m_dfLearningRate = m_solver.ApplyUpdate(nIteration);

                }

                else

                {

                    ApplyUpdateArgs<T> args = new ApplyUpdateArgs<T>(nIteration, m_mycaffeWorker);

                    OnApplyUpdate(this, args);

                    m_dfLearningRate = args.LearningRate;

                }


                m_net.ClearParamDiffs();

            }

        }


        private T[] unpackLabel(Datum d)

        {

            if (d.DataCriteria == null)

                return null;


            if (d.DataCriteriaFormat == SimpleDatum.DATA_FORMAT.LIST_FLOAT)

            {

                List<float> rgf = BinaryData.UnPackFloatList(d.DataCriteria, SimpleDatum.DATA_FORMAT.LIST_FLOAT);

                return Utility.ConvertVec<T>(rgf.ToArray());

            }

            else if (d.DataCriteriaFormat == SimpleDatum.DATA_FORMAT.LIST_DOUBLE)

            {

                List<double> rgf = BinaryData.UnPackDoubleList(d.DataCriteria, SimpleDatum.DATA_FORMAT.LIST_DOUBLE);

                return Utility.ConvertVec<T>(rgf.ToArray());

            }


            return null;

        }


        private void memData_OnDataPack(object sender, MemoryDataLayerPackDataArgs<T> e)

        {

            List<int> rgDataShape = e.Data.shape();

            List<int> rgClipShape = e.Clip.shape();

            List<int> rgLabelShape = e.Label.shape();

            int nBatch = e.DataItems.Count;

            int nSeqLen = rgDataShape[0];


            e.Data.Log.CHECK_GT(nSeqLen, 0, "The sequence lenth must be greater than zero!");

            e.Data.Log.CHECK_EQ(nBatch, e.ClipItems.Count, "The data and clip should have the same number of items.");

            e.Data.Log.CHECK_EQ(nSeqLen, rgClipShape[0], "The data and clip should have the same sequence count.");


            rgDataShape[1] = nBatch;  // LSTM uses sizing: seq, batch, data1, data2

            rgClipShape[1] = nBatch;

            rgLabelShape[1] = nBatch;


            e.Data.Reshape(rgDataShape);

            e.Clip.Reshape(rgClipShape);

            e.Label.Reshape(rgLabelShape);


            T[] rgRawData = new T[e.Data.count()];

            T[] rgRawClip = new T[e.Clip.count()];

            T[] rgRawLabel = new T[e.Label.count()];


            int nDataSize = e.Data.count(2);

            T[] rgDataItem = new T[nDataSize];

            T dfClip;

            int nIdx;


            for (int i = 0; i < nBatch; i++)

            {

                Datum data = e.DataItems[i];

                Datum clip = e.ClipItems[i];


                T[] rgLabel = unpackLabel(data);


                for (int j = 0; j < nSeqLen; j++)

                {

                    dfClip = clip.GetDataAt<T>(j);


                    for (int k = 0; k < nDataSize; k++)

                    {

                        rgDataItem[k] = data.GetDataAt<T>(j * nDataSize + k);

                    }


                    // LSTM: Create input data, the data must be in the order

                    // seq1_val1, seq2_val1, ..., seqBatch_Size_val1, seq1_val2, seq2_val2, ..., seqBatch_Size_valSequence_Length

                    if (e.LstmType == LayerParameter.LayerType.LSTM)

                        nIdx = nBatch * j + i;


                    // LSTM_SIMPLE: Create input data, the data must be in the order

                    // seq1_val1, seq1_val2, ..., seq1_valBatchSize, seq2_val1, seq2_val2, ..., seqSequenceLength_valBatchSize

                    else

                        nIdx = i * nBatch + j;


                    Array.Copy(rgDataItem, 0, rgRawData, nIdx * nDataSize, nDataSize);

                    rgRawClip[nIdx] = dfClip;


                    if (rgLabel != null)

                    {

                        if (rgLabel.Length == nSeqLen)

                            rgRawLabel[nIdx] = rgLabel[j];

                        else if (rgLabel.Length == 1)

                        {

                            if (j == nSeqLen - 1)

                                rgRawLabel[0] = rgLabel[0];

                        }

                        else

                        {

                            throw new Exception("The Solver SequenceLength parameter does not match the actual sequence length!  The label length '" + rgLabel.Length.ToString() + "' must be either '1' for SINGLE labels, or the sequence length of '" + nSeqLen.ToString() + "' for MULTI labels.  Stopping training.");

                        }

                    }

                }

            }


            e.Data.mutable_cpu_data = rgRawData;

            e.Clip.mutable_cpu_data = rgRawClip;

            e.Label.mutable_cpu_data = rgRawLabel;

            m_nRecurrentSequenceLength = nSeqLen;

        }


        private void memLoss_OnGetLoss(object sender, MemoryLossLayerGetLossArgs<T> e)

        {

            if (m_bSkipLoss)

                return;


            int nCount = m_blobActionOneHot.count();

            long hActionOneHot = m_blobActionOneHot.gpu_data;

            long hPolicyGrad = 0;

            long hDiscountedR = m_blobDiscountedR.gpu_data;

            double dfLoss;

            int nDataSize = e.Bottom[0].count(1);

            bool bUsingEndData = false;


            // When using a recurrent model and receiving data with more than one sequence,

            // copy and only use the last sequence data.

            if (m_nRecurrentSequenceLength > 1)

            {

                if (e.Bottom[0].num > 1)

                {

                    m_blobAprobLogit.CopyFrom(e.Bottom[0], false, true);

                    m_blobAprobLogit.CopyFrom(e.Bottom[0], true);


                    List<int> rgShape = e.Bottom[0].shape();

                    rgShape[0] = 1;

                    e.Bottom[0].Reshape(rgShape);

                    e.Bottom[0].CopyFrom(m_blobAprobLogit, (m_blobAprobLogit.num - 1) * nDataSize, 0, nDataSize, true, true);

                    bUsingEndData = true;

                }

            }


            long hBottomDiff = e.Bottom[0].mutable_gpu_diff;


            // Calculate the initial gradients (policy grad initially just contains the action probabilities)

            if (m_softmax != null)

            {

                BlobCollection<T> colBottom = new BlobCollection<T>();

                BlobCollection<T> colTop = new BlobCollection<T>();


                colBottom.Add(e.Bottom[0]);             // aprob logit

                colBottom.Add(m_blobActionOneHot);      // action one-hot vectors

                colTop.Add(m_blobLoss);

                colTop.Add(m_blobPolicyGradient);


                if (!m_bSoftmaxCeSetup)

                {

                    m_softmaxCe.Setup(colBottom, colTop);

                    m_bSoftmaxCeSetup = true;

                }


                dfLoss = m_softmaxCe.Forward(colBottom, colTop);

                m_softmaxCe.Backward(colTop, new List<bool>() { true, false }, colBottom);

                hPolicyGrad = colBottom[0].gpu_diff;

            }

            else

            {

                hPolicyGrad = m_blobPolicyGradient.mutable_gpu_data;


                // Calculate (a=0) ? 1-aprob : 0-aprob

                m_mycaffeWorker.Cuda.add_scalar(nCount, -1.0, hActionOneHot); // invert one hot

                m_mycaffeWorker.Cuda.abs(nCount, hActionOneHot, hActionOneHot);

                m_mycaffeWorker.Cuda.mul_scalar(nCount, -1.0, hPolicyGrad);   // negate Aprob

                m_mycaffeWorker.Cuda.add(nCount, hActionOneHot, hPolicyGrad, hPolicyGrad);  // gradient = ((a=0)?1:0) - Aprob

                dfLoss = Utility.ConvertVal<T>(m_blobPolicyGradient.sumsq_data());


                m_mycaffeWorker.Cuda.mul_scalar(nCount, -1.0, hPolicyGrad); // invert for ApplyUpdate subtracts the gradients

            }


            // Modulate the gradient with the advantage (PG magic happens right here.)

            m_mycaffeWorker.Cuda.mul(nCount, hPolicyGrad, hDiscountedR, hPolicyGrad);


            e.Loss = dfLoss;

            e.EnableLossUpdate = false; // dont apply loss to loss weight.


            if (hPolicyGrad != hBottomDiff)

                m_mycaffeWorker.Cuda.copy(nCount, hPolicyGrad, hBottomDiff);


            // When using recurrent model with more than one sequence of data, only

            // copy the diff to the last in the sequence and zero out the rest in the sequence.

            if (m_nRecurrentSequenceLength > 1 && bUsingEndData)

            {

                m_blobAprobLogit.SetDiff(0);

                m_blobAprobLogit.CopyFrom(e.Bottom[0], 0, (m_blobAprobLogit.num - 1) * nDataSize, nDataSize, false, true);

                e.Bottom[0].CopyFrom(m_blobAprobLogit, false, true);

                e.Bottom[0].CopyFrom(m_blobAprobLogit, true);

            }


            m_dfLastLoss = e.Loss;

        }

    }


    class MemoryCache : IEnumerable<Memory>

    {

        int m_nMax;

        List<Memory> m_rgMemory = new List<Memory>();


        public MemoryCache(int nMax)

        {

            m_nMax = nMax;

        }


        public int Count

        {

            get { return m_rgMemory.Count; }

        }


        public Memory this[int nIdx]

        {

            get { return m_rgMemory[nIdx]; }

        }


        public bool Add(Memory mem)

        {

            m_rgMemory.Add(mem);


            if (m_rgMemory.Count == m_nMax)

                return true;


            return false;

        }


        public void Clear()

        {

            m_rgMemory.Clear();

        }


        public void PurgeNonElite(double dfElitePercent)

        {

            if (dfElitePercent <= 0.0 || dfElitePercent >= 1.0)

                return;


            double dfMin = m_rgMemory.Min(p => p.RewardSum);

            double dfMax = m_rgMemory.Max(p => p.RewardSum);

            double dfRange = dfMax - dfMin;

            double dfCutoff = dfMin + ((1.0 - dfElitePercent) * dfRange);

            List<Memory> rgMem = m_rgMemory.OrderByDescending(p => p.RewardSum).ToList();

            List<Memory> rgElite = new List<Memory>();


            for (int i = 0; i < rgMem.Count; i++)

            {

                double dfSum = rgMem[i].RewardSum;


                if (dfSum >= dfCutoff)

                    rgElite.Add(rgMem[i]);

                else

                    break;

            }


            m_rgMemory = rgElite;

        }


        public IEnumerator<Memory> GetEnumerator()

        {

            return m_rgMemory.GetEnumerator();

        }


        IEnumerator IEnumerable.GetEnumerator()

        {

            return m_rgMemory.GetEnumerator();

        }

    }


    class Memory

    {

        List<MemoryItem> m_rgItems = new List<MemoryItem>();

        int m_nEpisodeNumber = 0;

        double m_dfRewardSum = 0;


        public Memory()

        {

        }


        public int Count

        {

            get { return m_rgItems.Count; }

        }


        public void Add(MemoryItem item)

        {

            m_dfRewardSum += item.Reward;

            m_rgItems.Add(item);

        }


        public void Clear()

        {

            m_dfRewardSum = 0;

            m_rgItems.Clear();

        }


        public MemoryItem this[int nIdx]

        {

            get { return m_rgItems[nIdx]; }

            set { m_rgItems[nIdx] = value; }

        }


        public int EpisodeNumber

        {

            get { return m_nEpisodeNumber; }

            set { m_nEpisodeNumber = value; }

        }


        public double RewardSum

        {

            get { return m_dfRewardSum; }

            set { m_dfRewardSum = value; }

        }


        public float[] GetDiscountedRewards(float fGamma, bool bAllowReset)

        {

            float[] rgR = m_rgItems.Select(p => p.Reward).ToArray();

            float fRunningAdd = 0;

            float[] rgDiscountedR = new float[rgR.Length];


            for (int t = Count - 1; t >= 0; t--)

            {

                if (bAllowReset && rgR[t] != 0)

                    fRunningAdd = 0;


                fRunningAdd = fRunningAdd * fGamma + rgR[t];

                rgDiscountedR[t] = fRunningAdd;

            }


            return rgDiscountedR;

        }


        public float[] GetActionProbabilities()

        {

            List<float> rgfAprob = new List<float>();


            for (int i = 0; i < m_rgItems.Count; i++)

            {

                rgfAprob.AddRange(m_rgItems[i].Aprob);

            }


            return rgfAprob.ToArray();

        }


        public float[] GetActionOneHotVectors()

        {

            List<float> rgfAonehot = new List<float>();


            for (int i = 0; i < m_rgItems.Count; i++)

            {

                float[] rgfOneHot = new float[m_rgItems[0].Aprob.Length];


                if (rgfOneHot.Length == 1)

                    rgfOneHot[0] = m_rgItems[i].Action;

                else

                    rgfOneHot[m_rgItems[i].Action] = 1;


                rgfAonehot.AddRange(rgfOneHot);

            }


            return rgfAonehot.ToArray();

        }


        public List<Datum> GetData()

        {

            List<Datum> rgData = new List<Datum>();


            for (int i = 0; i < m_rgItems.Count; i++)

            {

                rgData.Add(new Datum(m_rgItems[i].Data));

            }


            return rgData;

        }


        public List<Datum> GetClip()

        {

            if (m_rgItems.Count == 0)

                return null;


            if (m_rgItems[0].State.Clip == null)

                return null;


            List<Datum> rgData = new List<Datum>();


            for (int i = 0; i < m_rgItems.Count; i++)

            {

                if (m_rgItems[i].State.Clip == null)

                    return null;


                rgData.Add(new Datum(m_rgItems[i].State.Clip));

            }


            return rgData;

        }

    }


    class MemoryItem

    {

        StateBase m_state;

        SimpleDatum m_x;

        int m_nAction;

        float[] m_rgfAprob;

        float m_fReward;


        public MemoryItem(StateBase s, SimpleDatum x, int nAction, float[] rgfAprob, float fReward)

        {

            m_state = s;

            m_x = x;

            m_nAction = nAction;

            m_rgfAprob = rgfAprob;

            m_fReward = fReward;

        }


        public StateBase State

        {

            get { return m_state; }

        }


        public SimpleDatum Data

        {

            get { return m_x; }

        }


        public int Action

        {

            get { return m_nAction; }

        }


        public float Reward

        {

            get { return m_fReward; }

        }


        public float[] Aprob

        {

            get { return m_rgfAprob; }

        }


        public override string ToString()

        {

            return "action = " + m_nAction.ToString() + " reward = " + m_fReward.ToString("N2") + " aprob = " + tostring(m_rgfAprob);

        }


        private string tostring(float[] rg)

        {

            string str = "{";


            for (int i = 0; i < rg.Length; i++)

            {

                str += rg[i].ToString("N5");

                str += ",";

            }


            str = str.TrimEnd(',');

            str += "}";


            return str;

        }

    }

}

MyCaffe.MyCaffeControl
The MyCaffeControl is the main object used to manage all training, testing and running of the MyCaffe...
Definition: MyCaffeControl.cs:35

MyCaffe.MyCaffeControl.CancelEvent
CancelEvent CancelEvent
Returns the CancelEvent used.
Definition: MyCaffeControl.cs:648

MyCaffe.MyCaffeControl.GetInternalNet
Net< T > GetInternalNet(Phase phase=Phase.RUN)
Returns the internal net based on the Phase specified: TRAIN, TEST or RUN.
Definition: MyCaffeControl.cs:3328

MyCaffe.MyCaffeControl.CopyWeightsFrom
void CopyWeightsFrom(MyCaffeControl< T > src)
Copy the learnable parameter data from the source MyCaffeControl into this one.
Definition: MyCaffeControl.cs:337

MyCaffe.MyCaffeControl.GetInternalSolver
Solver< T > GetInternalSolver()
Get the internal solver.
Definition: MyCaffeControl.cs:3349

MyCaffe.MyCaffeControl.Clone
MyCaffeControl< T > Clone(int nGpuID)
Clone the current instance of the MyCaffeControl creating a second instance.
Definition: MyCaffeControl.cs:285

MyCaffe.MyCaffeControl.Log
Log Log
Returns the Log (for output) used.
Definition: MyCaffeControl.cs:624

MyCaffe.MyCaffeControl.Cuda
CudaDnn< T > Cuda
Returns the CudaDnn connection used.
Definition: MyCaffeControl.cs:616

MyCaffe.MyCaffeControl.CurrentProject
ProjectEx CurrentProject
Returns the name of the currently loaded project.
Definition: MyCaffeControl.cs:709

MyCaffe.basecode.BinaryData
The BinaryData class is used to pack and unpack DataCriteria binary data, optionally stored within ea...
Definition: BinaryData.cs:15

MyCaffe.basecode.BinaryData.UnPackDoubleList
static List< double > UnPackDoubleList(byte[] rg, DATA_FORMAT fmtExpected)
Unpack the byte array into a list of double values.
Definition: BinaryData.cs:75

MyCaffe.basecode.BinaryData.UnPackFloatList
static List< float > UnPackFloatList(byte[] rg, DATA_FORMAT fmtExpected)
Unpack the byte array into a list of float values.
Definition: BinaryData.cs:132

MyCaffe.basecode.CancelEvent
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
Definition: CancelEvent.cs:17

MyCaffe.basecode.CancelEvent.Reset
void Reset()
Resets the event clearing any signaled state.
Definition: CancelEvent.cs:279

MyCaffe.basecode.CancelEvent.WaitOne
bool WaitOne(int nMs=int.MaxValue)
Waits for the signal state to occur.
Definition: CancelEvent.cs:290

MyCaffe.basecode.CancelEvent.CancelEvent
CancelEvent()
The CancelEvent constructor.
Definition: CancelEvent.cs:28

MyCaffe.basecode.CancelEvent.Set
void Set()
Sets the event to the signaled state.
Definition: CancelEvent.cs:270

MyCaffe.basecode.CryptoRandom
The CryptoRandom is a random number generator that can use either the standard .Net Random objec or t...
Definition: CryptoRandom.cs:14

MyCaffe.basecode.CryptoRandom.Next
int Next(int nMinVal, int nMaxVal, bool bMaxInclusive=true)
Returns a random int within the range
Definition: CryptoRandom.cs:113

MyCaffe.basecode.CryptoRandom.NextDouble
double NextDouble()
Returns a random double within the range .
Definition: CryptoRandom.cs:83

MyCaffe.basecode.Datum
The Datum class is a simple wrapper to the SimpleDatum class to ensure compatibility with the origina...
Definition: Datum.cs:12

MyCaffe.basecode.Log
The Log class provides general output in text form.
Definition: Log.cs:13

MyCaffe.basecode.Log.WriteLine
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80

MyCaffe.basecode.Log.Progress
double Progress
Get/set the progress associated with the Log.
Definition: Log.cs:147

MyCaffe.basecode.Log.WriteError
void WriteError(Exception e)
Write an error as output.
Definition: Log.cs:130

MyCaffe.basecode.Log.Log
Log(string strSrc)
The Log constructor.
Definition: Log.cs:33

MyCaffe.basecode.ProjectEx.GetSolverSettingAsNumeric
double? GetSolverSettingAsNumeric(string strParam)
Get a setting from the solver descriptor as a double value.
Definition: ProjectEx.cs:470

MyCaffe.basecode.ProjectEx.GetBatchSize
int GetBatchSize(Phase phase)
Returns the batch size of the project used in a given Phase.
Definition: ProjectEx.cs:359

MyCaffe.basecode.PropertySet
Specifies a key-value pair of properties.
Definition: PropertySet.cs:16

MyCaffe.basecode.PropertySet.GetProperty
string GetProperty(string strName, bool bThrowExceptions=true)
Returns a property as a string value.
Definition: PropertySet.cs:146

MyCaffe.basecode.PropertySet.GetPropertyAsInt
int GetPropertyAsInt(string strName, int nDefault=0)
Returns a property as an integer value.
Definition: PropertySet.cs:287

MyCaffe.basecode.PropertySet.GetPropertyAsBool
bool GetPropertyAsBool(string strName, bool bDefault=false)
Returns a property as a boolean value.
Definition: PropertySet.cs:267

MyCaffe.basecode.PropertySet.GetPropertyAsDouble
double GetPropertyAsDouble(string strName, double dfDefault=0)
Returns a property as an double value.
Definition: PropertySet.cs:307

MyCaffe.basecode.PropertySet.ToString
override string ToString()
Returns the string representation of the properties.
Definition: PropertySet.cs:325

MyCaffe.basecode.Result
The Result class contains a single result.
Definition: Result.cs:14

MyCaffe.basecode.SimpleDatum
The SimpleDatum class holds a data input within host memory.
Definition: SimpleDatum.cs:161

MyCaffe.basecode.SimpleDatum.Copy
void Copy(SimpleDatum d, bool bCopyData, int? nHeight=null, int? nWidth=null)
Copy another SimpleDatum into this one.
Definition: SimpleDatum.cs:1294

MyCaffe.basecode.SimpleDatum.GetDataAtF
float GetDataAtF(int nIdx)
Returns the item at a specified index in the float type.
Definition: SimpleDatum.cs:1680

MyCaffe.basecode.SimpleDatum.Sub
bool Sub(SimpleDatum sd, bool bSetNegativeToZero=false)
Subtract the data of another SimpleDatum from this one, so this = this - sd.
Definition: SimpleDatum.cs:1064

MyCaffe.basecode.SimpleDatum.Zero
void Zero()
Zero out all data in the datum but keep the size and other settings.
Definition: SimpleDatum.cs:1046

MyCaffe.basecode.SimpleDatum.ItemCount
int ItemCount
Returns the number of data items.
Definition: SimpleDatum.cs:1596

MyCaffe.basecode.SimpleDatum.TimeStamp
DateTime TimeStamp
Get/set the Timestamp.
Definition: SimpleDatum.cs:2254

MyCaffe.basecode.SimpleDatum.DataCriteria
byte[] DataCriteria
Get/set data criteria associated with the data.
Definition: SimpleDatum.cs:2371

MyCaffe.basecode.SimpleDatum.DATA_FORMAT
DATA_FORMAT
Defines the data format of the DebugData and DataCriteria when specified.
Definition: SimpleDatum.cs:223

MyCaffe.basecode.SimpleDatum.DataCriteriaFormat
DATA_FORMAT DataCriteriaFormat
Get/set the data format of the data criteria.
Definition: SimpleDatum.cs:2362

MyCaffe.basecode.Utility
The Utility class provides general utility funtions.
Definition: Utility.cs:35

MyCaffe.basecode.Utility.ConvertVec
static double[] ConvertVec(float[] rgf)
Convert an array of float to an array of generics.
Definition: Utility.cs:550

MyCaffe.common.BlobCollection
The BlobCollection contains a list of Blobs.
Definition: BlobCollection.cs:16

MyCaffe.common.BlobCollection.Dispose
void Dispose()
Release all resource used by the collection and its Blobs.
Definition: BlobCollection.cs:542

MyCaffe.common.BlobCollection.Add
void Add(Blob< T > b)
Add a new Blob to the collection.
Definition: BlobCollection.cs:92

MyCaffe.common.BlobCollection.Accumulate
void Accumulate(CudaDnn< T > cuda, BlobCollection< T > src, bool bAccumulateDiff)
Accumulate the diffs from one BlobCollection into another.
Definition: BlobCollection.cs:283

MyCaffe.common.BlobCollection.SetDiff
void SetDiff(double df)
Set all blob diff to the value specified.
Definition: BlobCollection.cs:311

MyCaffe.common.BlobCollection.Count
int Count
Returns the number of items in the collection.
Definition: BlobCollection.cs:30

MyCaffe.common.Blob
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25

MyCaffe.common.Blob.channels
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
Definition: Blob.cs:800

MyCaffe.common.Blob.SetData
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
Definition: Blob.cs:1922

MyCaffe.common.Blob.height
int height
DEPRECIATED; legacy shape accessor height: use shape(2) instead.
Definition: Blob.cs:808

MyCaffe.common.Blob.mutable_gpu_data
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487

MyCaffe.common.Blob.Reshape
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442

MyCaffe.common.Blob.std
double std(double? dfMean=null, float[] rgDf=null)
Calculate the standard deviation of the blob data.
Definition: Blob.cs:3007

MyCaffe.common.Blob.mean
double mean(float[] rgDf=null, bool bDiff=false)
Calculate the mean of the blob data.
Definition: Blob.cs:2965

MyCaffe.common.Blob.CopyFrom
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
Definition: Blob.cs:903

MyCaffe.common.Blob.width
int width
DEPRECIATED; legacy shape accessor width: use shape(3) instead.
Definition: Blob.cs:816

MyCaffe.common.Blob.sumsq_data
T sumsq_data()
Calcualte the sum of squares (L2 norm squared) of the data.
Definition: Blob.cs:1730

MyCaffe.common.Blob.NormalizeData
void NormalizeData(double? dfMean=null, double? dfStd=null)
Normalize the blob data by subtracting the mean and dividing by the standard deviation.
Definition: Blob.cs:2942

MyCaffe.common.Blob.count
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739

MyCaffe.common.Blob.ReshapeLike
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648

MyCaffe.common.Blob.SetDiff
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
Definition: Blob.cs:1981

MyCaffe.common.Blob.num
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
Definition: Blob.cs:792

MyCaffe.common.Blob.gpu_data
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479

MyCaffe.common.Net
Connects Layer's together into a direct acrylic graph (DAG) specified by a NetParameter
Definition: Net.cs:23

MyCaffe.common.Net.Forward
BlobCollection< T > Forward()
Run forward with the input Blob's already fed separately.
Definition: Net.cs:1445

MyCaffe.common.Net.FindLayer
Layer< T > FindLayer(LayerParameter.LayerType? type, string strName)
Find the layer with the matching type, name and or both.
Definition: Net.cs:2748

MyCaffe.common.Net.output_blobs
BlobCollection< T > output_blobs
Returns the collection of output Blobs.
Definition: Net.cs:2209

MyCaffe.common.Net.ClearParamDiffs
void ClearParamDiffs()
Zero out the diffs of all netw parameters. This should be run before Backward.
Definition: Net.cs:1907

MyCaffe.common.Net.learnable_parameters
BlobCollection< T > learnable_parameters
Returns the learnable parameters.
Definition: Net.cs:2117

MyCaffe.common.ResultCollection
The ResultCollection contains the result of a given CaffeControl::Run.
Definition: ResultCollection.cs:17

MyCaffe.layers.Layer.Backward
void Backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Given the top Blob error gradients, compute the bottom Blob error gradients.
Definition: Layer.cs:815

MyCaffe.layers.Layer.Forward
double Forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Given the bottom (input) Blobs, this function computes the top (output) Blobs and the loss.
Definition: Layer.cs:728

MyCaffe.layers.Layer.Setup
void Setup(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Implements common Layer setup functionality.
Definition: Layer.cs:439

MyCaffe.layers.MemoryDataLayer
The MemoryDataLayer provides data to the Net from memory. This layer is initialized with the MyCaffe....
Definition: MemoryDataLayer.cs:21

MyCaffe.layers.MemoryDataLayer.AddDatumVector
virtual void AddDatumVector(Datum[] rgData, Datum[] rgClip=null, int nLblAxis=1, bool bReset=false, bool bResizeBatch=false)
This method is used to add a list of Datums to the memory.
Definition: MemoryDataLayer.cs:276

MyCaffe.layers.MemoryDataLayer.OnDataPack
EventHandler< MemoryDataLayerPackDataArgs< T > > OnDataPack
The OnDataPack event fires from within the AddDatumVector method and is used to pack the data into a ...
Definition: MemoryDataLayer.cs:50

MyCaffe.layers.MemoryDataLayerPackDataArgs
The MemoryDataLayerPackDataArgs is passed to the OnDataPack event which fires each time the data rece...
Definition: MemoryDataLayer.cs:602

MyCaffe.layers.MemoryDataLayerPackDataArgs.Label
Blob< T > Label
Returns the label data to fill with ordered label information.
Definition: MemoryDataLayer.cs:657

MyCaffe.layers.MemoryDataLayerPackDataArgs.Clip
Blob< T > Clip
Returns the clip data to fill with ordered data for clipping.
Definition: MemoryDataLayer.cs:649

MyCaffe.layers.MemoryDataLayerPackDataArgs.ClipItems
List< Datum > ClipItems
Returns the raw clip items to use to fill.
Definition: MemoryDataLayer.cs:673

MyCaffe.layers.MemoryDataLayerPackDataArgs.LstmType
LayerParameter.LayerType LstmType
Returns the LSTM type.
Definition: MemoryDataLayer.cs:633

MyCaffe.layers.MemoryDataLayerPackDataArgs.Data
Blob< T > Data
Returns the blob data to fill with ordered data.
Definition: MemoryDataLayer.cs:641

MyCaffe.layers.MemoryDataLayerPackDataArgs.DataItems
List< Datum > DataItems
Returns the raw data items to use to fill.
Definition: MemoryDataLayer.cs:665

MyCaffe.layers.MemoryLossLayerGetLossArgs
The MemoryLossLayerGetLossArgs class is passed to the OnGetLoss event.
Definition: MemoryLossLayer.cs:242

MyCaffe.layers.MemoryLossLayerGetLossArgs.EnableLossUpdate
bool EnableLossUpdate
Get/set enabling the loss update within the backpropagation pass.
Definition: MemoryLossLayer.cs:300

MyCaffe.layers.MemoryLossLayerGetLossArgs.Loss
double Loss
Get/set the externally calculated total loss.
Definition: MemoryLossLayer.cs:291

MyCaffe.layers.MemoryLossLayerGetLossArgs.Bottom
BlobCollection< T > Bottom
Specifies the bottom passed in during the forward pass.
Definition: MemoryLossLayer.cs:275

MyCaffe.layers.MemoryLossLayer
The MemoryLossLayer provides a method of performing a custom loss functionality. Similar to the Memor...
Definition: MemoryLossLayer.cs:21

MyCaffe.layers.MemoryLossLayer.OnGetLoss
EventHandler< MemoryLossLayerGetLossArgs< T > > OnGetLoss
The OnGetLoss event fires during each forward pass. The value returned is saved, and applied on the b...
Definition: MemoryLossLayer.cs:30

MyCaffe.layers.SoftmaxCrossEntropyLossLayer
The SoftmaxCrossEntropyLossLayer computes the cross-entropy (logisitic) loss and is often used for pr...
Definition: SoftmaxCrossEntropyLossLayer.cs:20

MyCaffe.layers.SoftmaxLayer
The SoftmaxLayer computes the softmax function. This layer is initialized with the MyCaffe....
Definition: SoftmaxLayer.cs:24

MyCaffe.param.LayerParameter
Specifies the base parameter for all layers.
Definition: LayerParameter.cs:24

MyCaffe.param.LayerParameter.loss_weight
List< double > loss_weight
Specifies the loss weight.
Definition: LayerParameter.cs:1955

MyCaffe.param.LayerParameter.LayerType
LayerType
Specifies the layer type.
Definition: LayerParameter.cs:110

MyCaffe.param.LayerParameter.loss_param
LossParameter loss_param
Returns the parameter set when initialized with LayerType.LOSS
Definition: LayerParameter.cs:2027

MyCaffe.param.LossParameter
Stores the parameters used by loss layers.
Definition: LossParameter.cs:16

MyCaffe.param.LossParameter.NormalizationMode
NormalizationMode
How to normalize the loss for loss layers that aggregate across batches, spatial dimensions,...
Definition: LossParameter.cs:27

MyCaffe.param.LossParameter.normalization
NormalizationMode? normalization
Specifies the normalization mode (default = VALID).
Definition: LossParameter.cs:81

MyCaffe.solvers.Solver
An interface for classes that perform optimization on Nets - this class serves as the base class for ...
Definition: Solver.cs:28

MyCaffe.solvers.Solver.Step
bool Step(int nIters, TRAIN_STEP step=TRAIN_STEP.NONE, bool bZeroDiffs=true, bool bApplyUpdates=true, bool bDisableOutput=false, bool bDisableProgress=false, double? dfLossOverride=null, bool? bAllowSnapshot=null)
Steps a set of iterations through a training cycle.
Definition: Solver.cs:818

MyCaffe.solvers.Solver.ApplyUpdate
abstract double ApplyUpdate(int nIterationOverride=-1)
Make and apply the update value for the current iteration.

MyCaffe.trainers.ApplyUpdateArgs
The ApplyUpdateArgs is passed to the OnApplyUpdates event.
Definition: EventArgs.cs:19

MyCaffe.trainers.ApplyUpdateArgs.Iteration
int Iteration
Returns the iteration from which the gradients are to be applied.
Definition: EventArgs.cs:47

MyCaffe.trainers.ApplyUpdateArgs.LearningRate
double LearningRate
Returns the learning rate at the time the gradients were applied.
Definition: EventArgs.cs:55

MyCaffe.trainers.ApplyUpdateArgs.MyCaffeWorker
MyCaffeControl< T > MyCaffeWorker
Returns the MyCaffe worker instance whos gradients are to be applied.
Definition: EventArgs.cs:39

MyCaffe.trainers.ConvertOutputArgs
The ConvertOutputArgs is passed to the OnConvertOutput event.
Definition: EventArgs.cs:311

MyCaffe.trainers.ConvertOutputArgs.RawOutput
byte[] RawOutput
Specifies the raw output byte stream.
Definition: EventArgs.cs:356

MyCaffe.trainers.ConvertOutputArgs.RawType
string RawType
Specifies the type of the raw output byte stream.
Definition: EventArgs.cs:348

MyCaffe.trainers.GetDataArgs
The GetDataArgs is passed to the OnGetData event to retrieve data.
Definition: EventArgs.cs:402

MyCaffe.trainers.GetDataArgs.State
StateBase State
Specifies the state data of the observations.
Definition: EventArgs.cs:517

MyCaffe.trainers.InitializeArgs
The InitializeArgs is passed to the OnInitialize event.
Definition: EventArgs.cs:90

MyCaffe.trainers.StateBase
The StateBase is the base class for the state of each observation - this is defined by actual trainer...
Definition: StateBase.cs:16

MyCaffe.trainers.StateBase.Done
bool Done
Get/set whether the state is done or not.
Definition: StateBase.cs:72

MyCaffe.trainers.StateBase.Reward
double Reward
Get/set the reward of the state.
Definition: StateBase.cs:63

MyCaffe.trainers.StateBase.Data
SimpleDatum Data
Returns other data associated with the state.
Definition: StateBase.cs:98

MyCaffe.trainers.StateBase.ActionCount
int ActionCount
Returns the number of actions.
Definition: StateBase.cs:90

MyCaffe.trainers.StateBase.Clip
SimpleDatum Clip
Returns the clip data assoicated with the state.
Definition: StateBase.cs:116

MyCaffe.trainers.WaitArgs
The WaitArgs is passed to the OnWait event.
Definition: EventArgs.cs:65

MyCaffe.trainers.pg.mt.Agent
The Agent both builds episodes from the envrionment and trains on them using the Brain.
Definition: TrainerPG.cs:490

MyCaffe.trainers.pg.mt.Agent.Dispose
void Dispose()
Release all resources used.
Definition: TrainerPG.cs:563

MyCaffe.trainers.pg.mt.Agent.Run
byte[] Run(int nIterations, out string type)
Run the action on a set number of iterations and return the results with no training.
Definition: TrainerPG.cs:670

MyCaffe.trainers.pg.mt.Agent.doWork
override void doWork(object arg)
This is the main agent thread that runs the agent.
Definition: TrainerPG.cs:576

MyCaffe.trainers.pg.mt.Agent.Agent
Agent(int nIdx, IxTrainerCallback icallback, MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, Phase phase, int nGpuID, int nThreadCount)
The constructor.
Definition: TrainerPG.cs:524

MyCaffe.trainers.pg.mt.Agent.OnApplyUpdates
EventHandler< ApplyUpdateArgs< T > > OnApplyUpdates
The OnApplyUpdates event fires each time the Agent needs to apply its updates to the primary instance...
Definition: TrainerPG.cs:511

MyCaffe.trainers.pg.mt.Agent.Run
void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step)
The Run method provides the main 'actor' loop that performs the following steps: 1....
Definition: TrainerPG.cs:752

MyCaffe.trainers.pg.mt.Agent.Run
Tuple< int, int > Run(int nDelay=1000)
Run a single action on the model.
Definition: TrainerPG.cs:648

MyCaffe.trainers.pg.mt.Brain
The Brain uses the instance of MyCaffe (e.g. the open project) to run new actions and train the netwo...
Definition: TrainerPG.cs:898

MyCaffe.trainers.pg.mt.Brain.SetDiscountedR
void SetDiscountedR(float[] rg)
Sets the discounted returns in the Discounted Returns Blob.
Definition: TrainerPG.cs:1142

MyCaffe.trainers.pg.mt.Brain.OnApplyUpdate
EventHandler< ApplyUpdateArgs< T > > OnApplyUpdate
The OnApplyUpdate event fires when the Brain needs to apply its gradients to the primary instance of ...
Definition: TrainerPG.cs:936

MyCaffe.trainers.pg.mt.Brain.LearningRate
double LearningRate
Return the learning rate used.
Definition: TrainerPG.cs:1232

MyCaffe.trainers.pg.mt.Brain.LastLoss
double LastLoss
Return the last loss received.
Definition: TrainerPG.cs:1224

MyCaffe.trainers.pg.mt.Brain.Reshape
int Reshape(Memory mem)
Reshape all Blobs used based on the Memory specified.
Definition: TrainerPG.cs:1105

MyCaffe.trainers.pg.mt.Brain.getDataArgs
GetDataArgs getDataArgs(Phase phase, int nIdx, int nAction, bool? bResetOverride=null)
Returns the GetDataArgs used to retrieve new data from the envrionment implemented by derived parent ...
Definition: TrainerPG.cs:1214

MyCaffe.trainers.pg.mt.Brain.Train
void Train(int nIteration, TRAIN_STEP step, bool bApplyGradients=true)
Train the model at the current iteration.
Definition: TrainerPG.cs:1363

MyCaffe.trainers.pg.mt.Brain.act
int act(SimpleDatum sd, SimpleDatum sdClip, out float[] rgfAprob)
Returns the action from running the model. The action returned is either randomly selected (when usin...
Definition: TrainerPG.cs:1283

MyCaffe.trainers.pg.mt.Brain.SetData
void SetData(List< Datum > rgData, List< Datum > rgClip)
Add the data to the model by adding it to the MemoryData layer.
Definition: TrainerPG.cs:1191

MyCaffe.trainers.pg.mt.Brain.Cancel
CancelEvent Cancel
Returns the Cancel event used to cancel all MyCaffe tasks.
Definition: TrainerPG.cs:1248

MyCaffe.trainers.pg.mt.Brain.UsesSoftMax
bool? UsesSoftMax
Returns true if the current model uses a SoftMax, false otherwise.
Definition: TrainerPG.cs:1096

MyCaffe.trainers.pg.mt.Brain.Brain
Brain(MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, Phase phase, int nGpuID, int nThreadCount)
The constructor.
Definition: TrainerPG.cs:947

MyCaffe.trainers.pg.mt.Brain.OutputLog
Log OutputLog
Returns the primary MyCaffe output log for writing output information.
Definition: TrainerPG.cs:1088

MyCaffe.trainers.pg.mt.Brain.RecurrentSequenceLength
int RecurrentSequenceLength
Returns the recurrent sequence length detected when training a recurrent network, otherwise 0 is retu...
Definition: TrainerPG.cs:1080

MyCaffe.trainers.pg.mt.Brain.Dispose
void Dispose()
Release all resources used by the Brain.
Definition: TrainerPG.cs:1047

MyCaffe.trainers.pg.mt.Brain.SetActionProbabilities
void SetActionProbabilities(float[] rg)
Set the action probabilities in the Policy Gradient Blob.
Definition: TrainerPG.cs:1172

MyCaffe.trainers.pg.mt.Brain.Create
void Create()
Create the Brain CUDA objects - this is called on the thread from which the Brain runs.
Definition: TrainerPG.cs:972

MyCaffe.trainers.pg.mt.Brain.Preprocess
SimpleDatum Preprocess(StateBase s, bool bUseRawInput)
Preprocesses the data.
Definition: TrainerPG.cs:1258

MyCaffe.trainers.pg.mt.Brain.SetActionOneHotVectors
void SetActionOneHotVectors(float[] rg)
Set the action one-hot vectors in the Action OneHot Vector Blob.
Definition: TrainerPG.cs:1181

MyCaffe.trainers.pg.mt.MemoryCache
Contains the best memory episodes (best by highest total rewards)
Definition: TrainerPG.cs:1639

MyCaffe.trainers.pg.mt.MemoryCache.Clear
void Clear()
Clear all items from the memory cache.
Definition: TrainerPG.cs:1688

MyCaffe.trainers.pg.mt.MemoryCache.Add
bool Add(Memory mem)
Add a new episode to the memory cache.
Definition: TrainerPG.cs:1675

MyCaffe.trainers.pg.mt.MemoryCache.PurgeNonElite
void PurgeNonElite(double dfElitePercent)
Purge all non elite episodes.
Definition: TrainerPG.cs:1697

MyCaffe.trainers.pg.mt.MemoryCache.Count
int Count
Returns the number of items in the cache.
Definition: TrainerPG.cs:1656

MyCaffe.trainers.pg.mt.MemoryCache.MemoryCache
MemoryCache(int nMax)
Constructor.
Definition: TrainerPG.cs:1647

MyCaffe.trainers.pg.mt.MemoryCache.GetEnumerator
IEnumerator< Memory > GetEnumerator()
Returns the enumerator.
Definition: TrainerPG.cs:1726

MyCaffe.trainers.pg.mt.Memory
Specifies a single Memory (e.g. an episode).
Definition: TrainerPG.cs:1745

MyCaffe.trainers.pg.mt.Memory.EpisodeNumber
int EpisodeNumber
Get/set the episode number of this memory.
Definition: TrainerPG.cs:1802

MyCaffe.trainers.pg.mt.Memory.Add
void Add(MemoryItem item)
Add a new item to the memory.
Definition: TrainerPG.cs:1772

MyCaffe.trainers.pg.mt.Memory.Memory
Memory()
The constructor.
Definition: TrainerPG.cs:1753

MyCaffe.trainers.pg.mt.Memory.GetActionProbabilities
float[] GetActionProbabilities()
Retrieve the action probabilities of the episode.
Definition: TrainerPG.cs:1847

MyCaffe.trainers.pg.mt.Memory.RewardSum
double RewardSum
Get/set the reward sum of this memory.
Definition: TrainerPG.cs:1811

MyCaffe.trainers.pg.mt.Memory.GetDiscountedRewards
float[] GetDiscountedRewards(float fGamma, bool bAllowReset)
Retrieve the discounted rewards for this episode.
Definition: TrainerPG.cs:1822

MyCaffe.trainers.pg.mt.Memory.Clear
void Clear()
Remove all items in the list.
Definition: TrainerPG.cs:1781

MyCaffe.trainers.pg.mt.Memory.Count
int Count
Returns the number of memory items in the memory.
Definition: TrainerPG.cs:1761

MyCaffe.trainers.pg.mt.Memory.GetClip
List< Datum > GetClip()
Returns the clip data if it exists, or null.
Definition: TrainerPG.cs:1902

MyCaffe.trainers.pg.mt.Memory.GetActionOneHotVectors
float[] GetActionOneHotVectors()
Retrieve the action one-hot vectors for the episode.
Definition: TrainerPG.cs:1863

MyCaffe.trainers.pg.mt.Memory.GetData
List< Datum > GetData()
Retrieve the data of each step in the episode.
Definition: TrainerPG.cs:1886

MyCaffe.trainers.pg.mt.MemoryItem
The MemoryItem stores the information for one step in an episode.
Definition: TrainerPG.cs:1928

MyCaffe.trainers.pg.mt.MemoryItem.Aprob
float[] Aprob
Returns the action probabilities which are only used with non-Softmax models.
Definition: TrainerPG.cs:1988

MyCaffe.trainers.pg.mt.MemoryItem.Action
int Action
Returns the action of this episode step.
Definition: TrainerPG.cs:1972

MyCaffe.trainers.pg.mt.MemoryItem.Reward
float Reward
Returns the reward for taking the action in this episode step.
Definition: TrainerPG.cs:1980

MyCaffe.trainers.pg.mt.MemoryItem.Data
SimpleDatum Data
Returns the pre-processed data (run through the model) of this episode step.
Definition: TrainerPG.cs:1964

MyCaffe.trainers.pg.mt.MemoryItem.MemoryItem
MemoryItem(StateBase s, SimpleDatum x, int nAction, float[] rgfAprob, float fReward)
The constructor.
Definition: TrainerPG.cs:1943

MyCaffe.trainers.pg.mt.MemoryItem.State
StateBase State
Returns the state and data of this episode step.
Definition: TrainerPG.cs:1956

MyCaffe.trainers.pg.mt.MemoryItem.ToString
override string ToString()
Returns the string representation of this episode step.
Definition: TrainerPG.cs:1996

MyCaffe.trainers.pg.mt.Optimizer
The Optimizer manages a single thread used to apply updates to the primary instance of MyCaffe....
Definition: TrainerPG.cs:392

MyCaffe.trainers.pg.mt.Optimizer.Optimizer
Optimizer(MyCaffeControl< T > mycaffePrimary)
The constructor.
Definition: TrainerPG.cs:405

MyCaffe.trainers.pg.mt.Optimizer.Dispose
void Dispose()
Release all resources used.
Definition: TrainerPG.cs:414

MyCaffe.trainers.pg.mt.Optimizer.ApplyUpdates
double ApplyUpdates(MyCaffeControl< T > mycaffeWorker, int nIteration)
The ApplyUpdates function sets the parameters, signals the Apply Updates thread, blocks for the opera...
Definition: TrainerPG.cs:462

MyCaffe.trainers.pg.mt.Optimizer.doWork
override void doWork(object arg)
This override is the thread used to apply all updates, its CUDA DeviceID is set to the same device ID...
Definition: TrainerPG.cs:423

MyCaffe.trainers.pg.mt.TrainerPG
The TrainerPG implements a simple Policy Gradient trainer inspired by Andrej Karpathy's blog posed re...
Definition: TrainerPG.cs:28

MyCaffe.trainers.pg.mt.TrainerPG.Train
bool Train(int nN, ITERATOR_TYPE type, TRAIN_STEP step)
Train the network using a modified PG training algorithm optimized for GPU use.
Definition: TrainerPG.cs:195

MyCaffe.trainers.pg.mt.TrainerPG.Dispose
void Dispose()
Releases all resources used.
Definition: TrainerPG.cs:76

MyCaffe.trainers.pg.mt.TrainerPG.TrainerPG
TrainerPG(MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, IxTrainerCallback icallback)
The constructor.
Definition: TrainerPG.cs:44

MyCaffe.trainers.pg.mt.TrainerPG.RunOne
ResultCollection RunOne(int nDelay=1000)
Run a single cycle on the environment after the delay.
Definition: TrainerPG.cs:126

MyCaffe.trainers.pg.mt.TrainerPG.Run
byte[] Run(int nN, PropertySet runProp, out string type)
Run a set of iterations and return the resuts.
Definition: TrainerPG.cs:153

MyCaffe.trainers.pg.mt.TrainerPG.Shutdown
bool Shutdown(int nWait)
Shutdown the trainer.
Definition: TrainerPG.cs:108

MyCaffe.trainers.pg.mt.TrainerPG.Initialize
bool Initialize()
Initialize the trainer.
Definition: TrainerPG.cs:84

MyCaffe.trainers.pg.mt.TrainerPG.Test
bool Test(int nN, ITERATOR_TYPE type)
Run the test cycle - currently this is not implemented.
Definition: TrainerPG.cs:169

MyCaffe.trainers.pg.mt.Worker
The Worker class provides the base class for both the Environment and Optimizer and provides the basi...
Definition: TrainerPG.cs:329

MyCaffe.trainers.pg.mt.Worker.m_evtCancel
AutoResetEvent m_evtCancel
Specfies the cancel event used to cancel this worker.
Definition: TrainerPG.cs:337

MyCaffe.trainers.pg.mt.Worker.m_evtDone
ManualResetEvent m_evtDone
Specfies the done event set when this worker completes.
Definition: TrainerPG.cs:341

MyCaffe.trainers.pg.mt.Worker.m_nIndex
int m_nIndex
Specifies the index of this worker.
Definition: TrainerPG.cs:333

MyCaffe.trainers.pg.mt.Worker.doWork
virtual void doWork(object arg)
This is the actual thread function that is overriden by each derivative class.
Definition: TrainerPG.cs:360

MyCaffe.trainers.pg.mt.Worker.m_workTask
Task m_workTask
Specifies the worker task that runs the thread function.
Definition: TrainerPG.cs:345

MyCaffe.trainers.pg.mt.Worker.Start
void Start(WorkerStartArgs args)
Start running the thread.
Definition: TrainerPG.cs:368

MyCaffe.trainers.pg.mt.Worker.Stop
void Stop(int nWait)
Stop running the thread.
Definition: TrainerPG.cs:378

MyCaffe.trainers.pg.mt.Worker.Worker
Worker(int nIdx)
The constructor.
Definition: TrainerPG.cs:351

MyCaffe.trainers.pg.mt.WorkerStartArgs
The WorkerStartArgs provides the arguments used when starting the agent thread.
Definition: TrainerPG.cs:260

MyCaffe.trainers.pg.mt.WorkerStartArgs.WorkerStartArgs
WorkerStartArgs(int nCycleDelay, Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step)
The constructor.
Definition: TrainerPG.cs:275

MyCaffe.trainers.pg.mt.WorkerStartArgs.Step
TRAIN_STEP Step
Returns the training step to take (if any). This is used for debugging.
Definition: TrainerPG.cs:288

MyCaffe.trainers.pg.mt.WorkerStartArgs.CycleDelay
int CycleDelay
Returns the cycle delay which specifies the amount of time to wait for a cancel.
Definition: TrainerPG.cs:296

MyCaffe.trainers.pg.mt.WorkerStartArgs.Phase
Phase Phase
Return the phase on which to run.
Definition: TrainerPG.cs:304

MyCaffe.trainers.pg.mt.WorkerStartArgs.IterationType
ITERATOR_TYPE IterationType
Returns the iteration type.
Definition: TrainerPG.cs:320

MyCaffe.trainers.pg.mt.WorkerStartArgs.N
int N
Returns the maximum number of episodes to run.
Definition: TrainerPG.cs:312

MyCaffe.trainers.IxTrainerCallback
The IxTrainerCallback provides functions used by each trainer to 'call-back' to the parent for inform...
Definition: Interfaces.cs:303

MyCaffe.trainers.IxTrainerCallbackRNN
The IxTrainerCallbackRNN provides functions used by each trainer to 'call-back' to the parent for inf...
Definition: Interfaces.cs:348

MyCaffe.trainers.IxTrainerCallbackRNN.OnConvertOutput
void OnConvertOutput(ConvertOutputArgs e)
The OnConvertOutput callback fires from within the Run method and is used to convert the network's ou...

MyCaffe.trainers.IxTrainerRL
The IxTrainerRL interface is implemented by each RL Trainer.
Definition: Interfaces.cs:257

MyCaffe.basecode
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12

MyCaffe.basecode.Phase
Phase
Defines the Phase under which to run a Net.
Definition: Interfaces.cs:61

MyCaffe.common
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8

MyCaffe.common.BLOB_TYPE
BLOB_TYPE
Defines the tpe of data held by a given Blob.
Definition: Interfaces.cs:62

MyCaffe.common.TRAIN_STEP
TRAIN_STEP
Defines the training stepping method (if any).
Definition: Interfaces.cs:131

MyCaffe.fillers
The MyCaffe.fillers namespace contains all fillers including the Filler class.
Definition: BilinearFiller.cs:10

MyCaffe.layers
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15

MyCaffe.param
The MyCaffe.param namespace contains parameters used to create models.
Definition: AttentionParameter.cs:9

MyCaffe.solvers
The MyCaffe.solvers namespace contains all solver classes, including the base Solver.
Definition: AdaDeltaSolver.cs:13

MyCaffe.trainers.pg.mt
Definition: TrainerPG.cs:18

MyCaffe.trainers.ITERATOR_TYPE
ITERATOR_TYPE
Specifies the iterator type to use.
Definition: Interfaces.cs:22

MyCaffe
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12

System
Definition: Component.cs:11