mycaffe/html/dqn_8noisy_8st_2_trainer_noisy_dqn_8cs_source.html

using MyCaffe.basecode;

using MyCaffe.common;

using MyCaffe.data;

using MyCaffe.layers;

using MyCaffe.param;

using MyCaffe.solvers;

using MyCaffe.trainers.common;

using System;

using System.Collections;

using System.Collections.Generic;

using System.Diagnostics;

using System.Drawing;

using System.Linq;

using System.Text;

using System.Threading.Tasks;


namespace MyCaffe.trainers.dqn.noisy.st

{

    public class TrainerNoisyDqn<T> : IxTrainerRL, IDisposable

    {

        IxTrainerCallback m_icallback;

        CryptoRandom m_random = new CryptoRandom();

        MyCaffeControl<T> m_mycaffe;

        PropertySet m_properties;


        public TrainerNoisyDqn(MyCaffeControl<T> mycaffe, PropertySet properties, CryptoRandom random, IxTrainerCallback icallback)

        {

            m_icallback = icallback;

            m_mycaffe = mycaffe;

            m_properties = properties;

            m_random = random;

        }


        public void Dispose()

        {

        }


        public bool Initialize()

        {

            m_mycaffe.CancelEvent.Reset();

            m_icallback.OnInitialize(new InitializeArgs(m_mycaffe));

            return true;

        }


        public bool Shutdown(int nWait)

        {

            if (m_mycaffe != null)

            {

                m_mycaffe.CancelEvent.Set();

                wait(nWait);

            }


            m_icallback.OnShutdown();


            return true;

        }


        private void wait(int nWait)

        {

            int nWaitInc = 250;

            int nTotalWait = 0;


            while (nTotalWait < nWait)

            {

                m_icallback.OnWait(new WaitArgs(nWaitInc));

                nTotalWait += nWaitInc;

            }

        }


        public ResultCollection RunOne(int nDelay = 1000)

        {

            m_mycaffe.CancelEvent.Reset();

            DqnAgent<T> agent = new DqnAgent<T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.TRAIN);

            agent.Run(Phase.TEST, 1, ITERATOR_TYPE.ITERATION, TRAIN_STEP.NONE);

            agent.Dispose();

            return null;

        }


        public byte[] Run(int nN, PropertySet runProp, out string type)

        {

            m_mycaffe.CancelEvent.Reset();

            DqnAgent<T> agent = new DqnAgent<T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.RUN);

            byte[] rgResults = agent.Run(nN, out type);

            agent.Dispose();


            return rgResults;

        }


        public bool Test(int nN, ITERATOR_TYPE type)

        {

            int nDelay = 1000;

            string strProp = m_properties.ToString();


            // Turn off the num-skip to run at normal speed.

            strProp += "EnableNumSkip=False;";

            PropertySet properties = new PropertySet(strProp);


            m_mycaffe.CancelEvent.Reset();

            DqnAgent<T> agent = new DqnAgent<T>(m_icallback, m_mycaffe, properties, m_random, Phase.TRAIN);

            agent.Run(Phase.TEST, nN, type, TRAIN_STEP.NONE);


            agent.Dispose();

            Shutdown(nDelay);


            return true;

        }


        public bool Train(int nN, ITERATOR_TYPE type, TRAIN_STEP step)

        {

            m_mycaffe.CancelEvent.Reset();

            DqnAgent<T> agent = new DqnAgent<T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.TRAIN);

            agent.Run(Phase.TRAIN, nN, type, step);

            agent.Dispose();


            return false;

        }

    }


    class DqnAgent<T> : IDisposable

    {

        IxTrainerCallback m_icallback;

        Brain<T> m_brain;

        PropertySet m_properties;

        CryptoRandom m_random;

        float m_fGamma = 0.95f;

        bool m_bUseRawInput = true;

        int m_nMaxMemory = 10000;

        int m_nTrainingUpdateFreq = 1000;

        int m_nExplorationNum = 50000;

        int m_nEpsSteps = 0;

        double m_dfEpsStart = 0;

        double m_dfEpsEnd = 0;

        double m_dfEpsDelta = 0;

        double m_dfExplorationRate = 0;

        STATE m_state = STATE.EXPLORING;

        double m_dfBetaStart = 0.4;

        int m_nBetaFrames = 1000;

        int m_nMemorySize = 10000;

        float m_fPriorityAlpha = 0.6f;

        MEMTYPE m_memType = MEMTYPE.PRIORITY;


        enum STATE

        {

            EXPLORING,

            TRAINING

        }


        public DqnAgent(IxTrainerCallback icallback, MyCaffeControl<T> mycaffe, PropertySet properties, CryptoRandom random, Phase phase)

        {

            m_icallback = icallback;

            m_brain = new Brain<T>(mycaffe, properties, random, phase);

            m_properties = properties;

            m_random = random;


            m_fGamma = (float)properties.GetPropertyAsDouble("Gamma", m_fGamma);

            m_bUseRawInput = properties.GetPropertyAsBool("UseRawInput", m_bUseRawInput);

            m_nMaxMemory = properties.GetPropertyAsInt("MaxMemory", m_nMaxMemory);

            m_nTrainingUpdateFreq = properties.GetPropertyAsInt("TrainingUpdateFreq", m_nTrainingUpdateFreq);

            m_nExplorationNum = properties.GetPropertyAsInt("ExplorationNum", m_nExplorationNum);

            m_nEpsSteps = properties.GetPropertyAsInt("EpsSteps", m_nEpsSteps);

            m_dfEpsStart = properties.GetPropertyAsDouble("EpsStart", m_dfEpsStart);

            m_dfEpsEnd = properties.GetPropertyAsDouble("EpsEnd", m_dfEpsEnd);

            m_dfEpsDelta = (m_dfEpsStart - m_dfEpsEnd) / m_nEpsSteps;

            m_dfExplorationRate = m_dfEpsStart;


            if (m_dfEpsStart < 0 || m_dfEpsStart > 1)

                throw new Exception("The 'EpsStart' is out of range - please specify a real number in the range [0,1]");


            if (m_dfEpsEnd < 0 || m_dfEpsEnd > 1)

                throw new Exception("The 'EpsEnd' is out of range - please specify a real number in the range [0,1]");


            if (m_dfEpsEnd > m_dfEpsStart)

                throw new Exception("The 'EpsEnd' must be less than the 'EpsStart' value.");

        }


        public void Dispose()

        {

            if (m_brain != null)

            {

                m_brain.Dispose();

                m_brain = null;

            }

        }


        private StateBase getData(Phase phase, int nAction, int nIdx)

        {

            GetDataArgs args = m_brain.getDataArgs(phase, nAction);

            m_icallback.OnGetData(args);

            args.State.Data.Index = nIdx;

            return args.State;

        }


        private int getAction(int nIteration, SimpleDatum sd, SimpleDatum sdClip, int nActionCount, TRAIN_STEP step)

        {

            if (step == TRAIN_STEP.NONE)

            {

                switch (m_state)

                {

                    case STATE.EXPLORING:

                        return m_random.Next(nActionCount);


                    case STATE.TRAINING:

                        if (m_dfExplorationRate > m_dfEpsEnd)

                            m_dfExplorationRate -= m_dfEpsDelta;


                        if (m_random.NextDouble() < m_dfExplorationRate)

                            return m_random.Next(nActionCount);

                        break;

                }

            }


            return m_brain.act(sd, sdClip, nActionCount);

        }


        private void updateStatus(int nIteration, int nEpisodeCount, double dfRewardSum, double dfRunningReward, double dfLoss, double dfLearningRate, bool bModelUpdated)

        {

            GetStatusArgs args = new GetStatusArgs(0, nIteration, nEpisodeCount, 1000000, dfRunningReward, dfRewardSum, m_dfExplorationRate, 0, dfLoss, dfLearningRate, bModelUpdated);

            m_icallback.OnUpdateStatus(args);

        }


        public byte[] Run(int nIterations, out string type)

        {

            IxTrainerCallbackRNN icallback = m_icallback as IxTrainerCallbackRNN;

            if (icallback == null)

                throw new Exception("The Run method requires an IxTrainerCallbackRNN interface to convert the results into the native format!");


            StateBase s = getData(Phase.RUN, -1, 0);

            int nIteration = 0;

            List<float> rgResults = new List<float>();

            bool bDifferent;


            while (!m_brain.Cancel.WaitOne(0) && (nIterations == -1 || nIteration < nIterations))

            {

                // Preprocess the observation.

                SimpleDatum x = m_brain.Preprocess(s, m_bUseRawInput, out bDifferent);


                // Forward the policy network and sample an action.

                int action = m_brain.act(x, s.Clip, s.ActionCount);


                rgResults.Add(s.Data.TimeStamp.ToFileTime());

                rgResults.Add(s.Data.GetDataAtF(0));

                rgResults.Add(action);


                nIteration++;


                // Take the next step using the action

                s = getData(Phase.RUN, action, nIteration);

            }


            ConvertOutputArgs args = new ConvertOutputArgs(nIterations, rgResults.ToArray());

            icallback.OnConvertOutput(args);


            type = args.RawType;

            return args.RawOutput;

        }


        private bool isAtIteration(int nN, ITERATOR_TYPE type, int nIteration, int nEpisode)

        {

            if (nN == -1)

                return false;


            if (type == ITERATOR_TYPE.EPISODE)

            {

                if (nEpisode < nN)

                    return false;


                return true;

            }

            else

            {

                if (nIteration < nN)

                    return false;


                return true;

            }

        }


        private double beta_by_frame(int nFrameIdx)

        {

            return Math.Min(1.0, m_dfBetaStart + nFrameIdx * (1.0 - m_dfBetaStart) / m_nBetaFrames);

        }


        public void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step)

        {

            IMemoryCollection iMemory = MemoryCollectionFactory.CreateMemory(m_memType, m_nMemorySize, m_fPriorityAlpha);

            int nIteration = 1;

            double dfRunningReward = 0;

            double dfEpisodeReward = 0;

            int nEpisode = 0;

            bool bDifferent = false;


            StateBase state = getData(phase, -1, -1);

            // Preprocess the observation.

            SimpleDatum x = m_brain.Preprocess(state, m_bUseRawInput, out bDifferent, true);


            // Set the initial target model to the current model.

            m_brain.UpdateTargetModel();


            while (!m_brain.Cancel.WaitOne(0) && !isAtIteration(nN, type, nIteration, nEpisode))

            {

                if (nIteration > m_nExplorationNum && iMemory.Count > m_brain.BatchSize)

                    m_state = STATE.TRAINING;


                // Forward the policy network and sample an action.

                int action = getAction(nIteration, x, state.Clip, state.ActionCount, step);


                // Take the next step using the action

                StateBase state_next = getData(phase, action, nIteration);


                // Preprocess the next observation.

                SimpleDatum x_next = m_brain.Preprocess(state_next, m_bUseRawInput, out bDifferent);

                if (!bDifferent)

                    m_brain.Log.WriteLine("WARNING: The current state is the same as the previous state!");


                // Build up episode memory, using reward for taking the action.

                iMemory.Add(new MemoryItem(state, x, action, state_next, x_next, state_next.Reward, state_next.Done, nIteration, nEpisode));

                dfEpisodeReward += state_next.Reward;


                // Do the training

                if (m_state == STATE.TRAINING)

                {

                    double dfBeta = beta_by_frame(nIteration + 1);

                    MemoryCollection rgSamples = iMemory.GetSamples(m_random, m_brain.BatchSize, dfBeta);

                    m_brain.Train(nIteration, rgSamples, state.ActionCount);

                    iMemory.Update(rgSamples);


                    if (nIteration % m_nTrainingUpdateFreq == 0)

                        m_brain.UpdateTargetModel();

                }


                if (state_next.Done)

                {

                    // Update reward running

                    dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01;


                    nEpisode++;

                    updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward, 0, 0, m_brain.GetModelUpdated());


                    state = getData(phase, -1, -1);

                    x = m_brain.Preprocess(state, m_bUseRawInput, out bDifferent, true);

                    dfEpisodeReward = 0;

                }

                else

                {

                    state = state_next;

                    x = x_next;

                }


                nIteration++;

            }


            iMemory.CleanUp();

        }

    }


    class Brain<T> : IDisposable, IxTrainerGetDataCallback

    {

        MyCaffeControl<T> m_mycaffe;

        Solver<T> m_solver;

        Net<T> m_netOutput;

        Net<T> m_netTarget;

        PropertySet m_properties;

        CryptoRandom m_random;

        SimpleDatum m_sdLast = null;

        DataTransformer<T> m_transformer;

        MemoryLossLayer<T> m_memLoss;

        Blob<T> m_blobActions = null;

        Blob<T> m_blobQValue = null;

        Blob<T> m_blobNextQValue = null;

        Blob<T> m_blobExpectedQValue = null;

        Blob<T> m_blobDone = null;

        Blob<T> m_blobLoss = null;

        Blob<T> m_blobWeights = null;

        BlobCollection<T> m_colAccumulatedGradients = new BlobCollection<T>();

        bool m_bUseAcceleratedTraining = false;

        double m_dfLearningRate;

        int m_nMiniBatch = 1;

        float m_fGamma = 0.99f;

        int m_nFramesPerX = 4;

        int m_nStackPerX = 4;

        int m_nBatchSize = 32;

        MemoryCollection m_rgSamples;

        int m_nActionCount = 3;

        bool m_bModelUpdated = false;

        Font m_font = null;

        Dictionary<Color, Tuple<Brush, Brush, Pen, Brush>> m_rgStyle = new Dictionary<Color, Tuple<Brush, Brush, Pen, Brush>>();

        List<SimpleDatum> m_rgX = new List<SimpleDatum>();

        float[] m_rgOverlay = null;


        public Brain(MyCaffeControl<T> mycaffe, PropertySet properties, CryptoRandom random, Phase phase)

        {

            m_mycaffe = mycaffe;

            m_solver = mycaffe.GetInternalSolver();

            m_netOutput = mycaffe.GetInternalNet(phase);

            m_netTarget = new Net<T>(m_mycaffe.Cuda, m_mycaffe.Log, m_netOutput.net_param, m_mycaffe.CancelEvent, null, phase);

            m_properties = properties;

            m_random = random;


            Blob<T> data = m_netOutput.blob_by_name("data");

            if (data == null)

                m_mycaffe.Log.FAIL("Missing the expected input 'data' blob!");


            m_nFramesPerX = data.channels;

            m_nBatchSize = data.num;


            Blob<T> logits = m_netOutput.blob_by_name("logits");

            if (logits == null)

                m_mycaffe.Log.FAIL("Missing the expected input 'logits' blob!");


            m_nActionCount = logits.channels;


            m_transformer = m_mycaffe.DataTransformer;

            if (m_transformer == null)

            {

                TransformationParameter trans_param = new TransformationParameter();

                int nC = m_mycaffe.CurrentProject.Dataset.TrainingSource.Channels;

                int nH = m_mycaffe.CurrentProject.Dataset.TrainingSource.Height;

                int nW = m_mycaffe.CurrentProject.Dataset.TrainingSource.Width;

                m_transformer = new DataTransformer<T>(m_mycaffe.Cuda, m_mycaffe.Log, trans_param, phase, nC, nH, nW);

            }


            for (int i = 0; i < m_nFramesPerX; i++)

            {

                m_transformer.param.mean_value.Add(255 / 2); // center each frame

            }


            m_transformer.param.scale = 1.0 / 255; // normalize

            m_transformer.Update();


            m_blobActions = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log, false);

            m_blobQValue = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log);

            m_blobNextQValue = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log);

            m_blobExpectedQValue = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log);

            m_blobDone = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log, false);

            m_blobLoss = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log);

            m_blobWeights = new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log, false);


            m_fGamma = (float)properties.GetPropertyAsDouble("Gamma", m_fGamma);


            m_memLoss = m_netOutput.FindLastLayer(LayerParameter.LayerType.MEMORY_LOSS) as MemoryLossLayer<T>;

            if (m_memLoss == null)

                m_mycaffe.Log.FAIL("Missing the expected MEMORY_LOSS layer!");


            double? dfRate = mycaffe.CurrentProject.GetSolverSettingAsNumeric("base_lr");

            if (dfRate.HasValue)

                m_dfLearningRate = dfRate.Value;


            m_nMiniBatch = m_properties.GetPropertyAsInt("MiniBatch", m_nMiniBatch);

            m_bUseAcceleratedTraining = properties.GetPropertyAsBool("UseAcceleratedTraining", false);


            if (m_nMiniBatch > 1)

            {

                m_colAccumulatedGradients = m_netOutput.learnable_parameters.Clone();

                m_colAccumulatedGradients.SetDiff(0);

            }

        }


        private void dispose(ref Blob<T> b)

        {

            if (b != null)

            {

                b.Dispose();

                b = null;

            }

        }


        public void Dispose()

        {

            dispose(ref m_blobActions);

            dispose(ref m_blobQValue);

            dispose(ref m_blobNextQValue);

            dispose(ref m_blobExpectedQValue);

            dispose(ref m_blobDone);

            dispose(ref m_blobLoss);

            dispose(ref m_blobWeights);


            if (m_colAccumulatedGradients != null)

            {

                m_colAccumulatedGradients.Dispose();

                m_colAccumulatedGradients = null;

            }


            if (m_netTarget != null)

            {

                m_netTarget.Dispose();

                m_netTarget = null;

            }


            if (m_font != null)

            {

                m_font.Dispose();

                m_font = null;

            }


            foreach (KeyValuePair<Color, Tuple<Brush, Brush, Pen, Brush>> kv in m_rgStyle)

            {

                kv.Value.Item1.Dispose();

                kv.Value.Item2.Dispose();

                kv.Value.Item3.Dispose();

                kv.Value.Item4.Dispose();

            }


            m_rgStyle.Clear();

        }


        public GetDataArgs getDataArgs(Phase phase, int nAction)

        {

            bool bReset = (nAction == -1) ? true : false;

            return new GetDataArgs(phase, 0, m_mycaffe, m_mycaffe.Log, m_mycaffe.CancelEvent, bReset, nAction, true, false, false, this);

        }


        public int FrameStack

        {

            get { return m_nFramesPerX; }

        }


        public int BatchSize

        {

            get { return m_nBatchSize; }

        }


        public Log Log

        {

            get { return m_mycaffe.Log; }

        }


        public CancelEvent Cancel

        {

            get { return m_mycaffe.CancelEvent; }

        }


        public SimpleDatum Preprocess(StateBase s, bool bUseRawInput, out bool bDifferent, bool bReset = false)

        {

            bDifferent = false;


            SimpleDatum sd = new SimpleDatum(s.Data, true);


            if (!bUseRawInput)

            {

                if (bReset)

                    m_sdLast = null;


                if (m_sdLast == null)

                    sd.Zero();

                else

                    bDifferent = sd.Sub(m_sdLast);


                m_sdLast = new SimpleDatum(s.Data, true);

            }

            else

            {

                bDifferent = true;

            }


            sd.Tag = bReset;


            if (bReset)

            {

                m_rgX = new List<SimpleDatum>();


                for (int i = 0; i < m_nFramesPerX * m_nStackPerX; i++)

                {

                    m_rgX.Add(sd);

                }

            }

            else

            {

                m_rgX.Add(sd);

                m_rgX.RemoveAt(0);

            }


            SimpleDatum[] rgSd = new SimpleDatum[m_nStackPerX];


            for (int i=0; i<m_nStackPerX; i++)

            {

                int nIdx = ((m_nStackPerX - i) * m_nFramesPerX) - 1;

                rgSd[i] = m_rgX[nIdx];

            }


            return new SimpleDatum(rgSd.ToList(), true);

        }


        public int act(SimpleDatum sd, SimpleDatum sdClip, int nActionCount)

        {

            setData(m_netOutput, sd, sdClip);

            m_netOutput.ForwardFromTo(0, m_netOutput.layers.Count - 2);


            Blob<T> output = m_netOutput.blob_by_name("logits");

            if (output == null)

                throw new Exception("Missing expected 'logits' blob!");


            // Choose greedy action

            return argmax(Utility.ConvertVecF<T>(output.mutable_cpu_data));

        }


        public bool GetModelUpdated()

        {

            bool bModelUpdated = m_bModelUpdated;

            m_bModelUpdated = false;

            return bModelUpdated;

        }


        public void UpdateTargetModel()

        {

            m_mycaffe.Log.Enable = false;

            m_netOutput.CopyTrainedLayersTo(m_netTarget);

            m_netOutput.CopyInternalBlobsTo(m_netTarget);

            m_mycaffe.Log.Enable = true;

            m_bModelUpdated = true;

        }


        public void Train(int nIteration, MemoryCollection rgSamples, int nActionCount)

        {

            m_rgSamples = rgSamples;


            if (m_nActionCount != nActionCount)

                throw new Exception("The logit output of '" + m_nActionCount.ToString() + "' does not match the action count of '" + nActionCount.ToString() + "'!");


            // Get next_q_values

            m_mycaffe.Log.Enable = false;

            setNextStateData(m_netTarget, rgSamples);

            m_netTarget.ForwardFromTo(0, m_netTarget.layers.Count - 2);


            setCurrentStateData(m_netOutput, rgSamples);

            m_memLoss.OnGetLoss += m_memLoss_ComputeTdLoss;


            if (m_nMiniBatch == 1)

            {

                m_solver.Step(1);

            }

            else

            {

                m_solver.Step(1, TRAIN_STEP.NONE, true, m_bUseAcceleratedTraining, true, true);

                m_colAccumulatedGradients.Accumulate(m_mycaffe.Cuda, m_netOutput.learnable_parameters, true);


                if (nIteration % m_nMiniBatch == 0)

                {

                    m_netOutput.learnable_parameters.CopyFrom(m_colAccumulatedGradients, true);

                    m_colAccumulatedGradients.SetDiff(0);

                    m_dfLearningRate = m_solver.ApplyUpdate(nIteration);

                    m_netOutput.ClearParamDiffs();

                }

            }


            m_memLoss.OnGetLoss -= m_memLoss_ComputeTdLoss;

            m_mycaffe.Log.Enable = true;


            resetNoise(m_netOutput);

            resetNoise(m_netTarget);

        }


        private void m_memLoss_ComputeTdLoss(object sender, MemoryLossLayerGetLossArgs<T> e)

        {

            MemoryCollection rgMem = m_rgSamples;


            Blob<T> q_values = m_netOutput.blob_by_name("logits");

            Blob<T> next_q_values = m_netTarget.blob_by_name("logits");


            float[] rgActions = rgMem.GetActionsAsOneHotVector(m_nActionCount);

            m_blobActions.ReshapeLike(q_values);

            m_blobActions.mutable_cpu_data = Utility.ConvertVec<T>(rgActions);

            m_blobQValue.ReshapeLike(q_values);


            // q_value = q_values.gather(1, action.unsqueeze(1)).squeeze(1)

            m_mycaffe.Cuda.mul(m_blobActions.count(), m_blobActions.gpu_data, q_values.gpu_data, m_blobQValue.mutable_gpu_data);

            reduce_sum_axis1(m_blobQValue);


            // next_q_value = next_q_values.max(1)[0]

            m_blobNextQValue.CopyFrom(next_q_values, false, true);

            reduce_argmax_axis1(m_blobNextQValue);


            // expected_q_values

            float[] rgRewards = rgMem.GetRewards();

            m_blobExpectedQValue.ReshapeLike(m_blobQValue);

            m_blobExpectedQValue.mutable_cpu_data = Utility.ConvertVec<T>(rgRewards);


            float[] rgDone = rgMem.GetInvertedDoneAsOneHotVector();

            m_blobDone.ReshapeLike(m_blobQValue);

            m_blobDone.mutable_cpu_data = Utility.ConvertVec<T>(rgDone);


            m_mycaffe.Cuda.mul(m_blobNextQValue.count(), m_blobNextQValue.gpu_data, m_blobDone.gpu_data, m_blobExpectedQValue.mutable_gpu_diff);           // next_q_val * (1- done)

            m_mycaffe.Cuda.mul_scalar(m_blobExpectedQValue.count(), m_fGamma, m_blobExpectedQValue.mutable_gpu_diff);                                      // gamma *  ^

            m_mycaffe.Cuda.add(m_blobExpectedQValue.count(), m_blobExpectedQValue.gpu_diff, m_blobExpectedQValue.gpu_data, m_blobExpectedQValue.gpu_data); // reward + ^


            // loss = (q_value - expected_q_value.detach()).pow(2)

            m_blobLoss.ReshapeLike(m_blobQValue);

            m_mycaffe.Cuda.sub(m_blobQValue.count(), m_blobQValue.gpu_data, m_blobExpectedQValue.gpu_data, m_blobQValue.mutable_gpu_diff); // q_value - expected_q_value

            m_mycaffe.Cuda.powx(m_blobLoss.count(), m_blobQValue.gpu_diff, 2.0, m_blobLoss.mutable_gpu_data);                              // (q_value - expected_q_value)^2


            // loss = (q_value - expected_q_value.detach()).pow(2) * weights

            m_blobWeights.ReshapeLike(m_blobQValue);

            m_blobWeights.mutable_cpu_data = Utility.ConvertVec<T>(m_rgSamples.Priorities); // weights

            m_mycaffe.Cuda.mul(m_blobLoss.count(), m_blobLoss.gpu_data, m_blobWeights.gpu_data, m_blobLoss.mutable_gpu_data);               //    ^ * weights


            // prios = loss + 1e-5

            m_mycaffe.Cuda.copy(m_blobLoss.count(), m_blobLoss.gpu_data, m_blobLoss.mutable_gpu_diff);

            m_mycaffe.Cuda.add_scalar(m_blobLoss.count(), 1e-5, m_blobLoss.mutable_gpu_diff);

            double[] rgPrios = Utility.ConvertVec<T>(m_blobLoss.mutable_cpu_diff);


            for (int i = 0; i < rgPrios.Length; i++)

            {

                m_rgSamples.Priorities[i] = rgPrios[i];

            }


            //-------------------------------------------------------

            //  Calculate the gradient - unroll the operations

            //  (autograd - psha! how about manualgrad :-D)

            //-------------------------------------------------------


            // initial gradient

            double dfGradient = 1.0;

            if (m_memLoss.layer_param.loss_weight.Count > 0)

                dfGradient *= m_memLoss.layer_param.loss_weight[0];


            // mean gradient - expand and divide by batch count

            dfGradient /= m_blobLoss.count();

            m_blobLoss.SetDiff(dfGradient);


            // multiplication gradient - multiply by the other side.

            m_mycaffe.Cuda.mul(m_blobLoss.count(), m_blobLoss.gpu_diff, m_blobWeights.gpu_data, m_blobLoss.mutable_gpu_diff);


            // power gradient - multiply by the exponent.

            m_mycaffe.Cuda.mul_scalar(m_blobLoss.count(), 2.0, m_blobLoss.mutable_gpu_diff);


            // q_value - expected_q_value gradient

            m_mycaffe.Cuda.mul(m_blobLoss.count(), m_blobLoss.gpu_diff, m_blobQValue.gpu_diff, m_blobLoss.mutable_gpu_diff);


            // squeeze/gather gradient

            mul(m_blobLoss, m_blobActions, e.Bottom[0]);


            e.Loss = reduce_mean(m_blobLoss, false);

            e.EnableLossUpdate = false;

        }


        private void resetNoise(Net<T> net)

        {

            foreach (Layer<T> layer in net.layers)

            {

                if (layer.type == LayerParameter.LayerType.INNERPRODUCT)

                {

                    if (layer.layer_param.inner_product_param.enable_noise)

                        ((InnerProductLayer<T>)layer).ResetNoise();

                }

            }

        }


        private void mul(Blob<T> val, Blob<T> actions, Blob<T> result)

        {

            float[] rgVal = Utility.ConvertVecF<T>(val.mutable_cpu_diff);

            float[] rgActions = Utility.ConvertVecF<T>(actions.mutable_cpu_data);

            float[] rgResult = new float[rgActions.Length];


            for (int i = 0; i < actions.num; i++)

            {

                float fPred = rgVal[i];


                for (int j = 0; j < actions.channels; j++)

                {

                    int nIdx = (i * actions.channels) + j;

                    rgResult[nIdx] = rgActions[nIdx] * fPred;

                }

            }


            result.mutable_cpu_diff = Utility.ConvertVec<T>(rgResult);

        }


        private float reduce_mean(Blob<T> b, bool bDiff)

        {

            float[] rg = Utility.ConvertVecF<T>((bDiff) ? b.mutable_cpu_diff : b.mutable_cpu_data);

            float fSum = rg.Sum(p => p);

            return fSum / rg.Length;

        }


        private void reduce_sum_axis1(Blob<T> b)

        {

            int nNum = b.shape(0);

            int nActions = b.shape(1);

            int nInnerCount = b.count(2);

            float[] rg = Utility.ConvertVecF<T>(b.mutable_cpu_data);

            float[] rgSum = new float[nNum * nInnerCount];


            for (int i = 0; i < nNum; i++)

            {

                for (int j = 0; j < nInnerCount; j++)

                {

                    float fSum = 0;


                    for (int k = 0; k < nActions; k++)

                    {

                        int nIdx = (i * nActions * nInnerCount) + (k * nInnerCount);

                        fSum += rg[nIdx + j];

                    }


                    int nIdxR = i * nInnerCount;

                    rgSum[nIdxR + j] = fSum;

                }

            }


            b.Reshape(nNum, nInnerCount, 1, 1);

            b.mutable_cpu_data = Utility.ConvertVec<T>(rgSum);

        }


        private void reduce_argmax_axis1(Blob<T> b)

        {

            int nNum = b.shape(0);

            int nActions = b.shape(1);

            int nInnerCount = b.count(2);

            float[] rg = Utility.ConvertVecF<T>(b.mutable_cpu_data);

            float[] rgMax = new float[nNum * nInnerCount];


            for (int i = 0; i < nNum; i++)

            {

                for (int j = 0; j < nInnerCount; j++)

                {

                    float fMax = -float.MaxValue;


                    for (int k = 0; k < nActions; k++)

                    {

                        int nIdx = (i * nActions * nInnerCount) + (k * nInnerCount);

                        fMax = Math.Max(fMax, rg[nIdx + j]);

                    }


                    int nIdxR = i * nInnerCount;

                    rgMax[nIdxR + j] = fMax;

                }

            }


            b.Reshape(nNum, nInnerCount, 1, 1);

            b.mutable_cpu_data = Utility.ConvertVec<T>(rgMax);

        }


        private int argmax(float[] rgProb, int nActionCount, int nSampleIdx)

        {

            float[] rgfProb = new float[nActionCount];


            for (int j = 0; j < nActionCount; j++)

            {

                int nIdx = (nSampleIdx * nActionCount) + j;

                rgfProb[j] = rgProb[nIdx];

            }


            return argmax(rgfProb);

        }


        private int argmax(float[] rgfAprob)

        {

            double fMax = -float.MaxValue;

            int nIdx = 0;


            for (int i = 0; i < rgfAprob.Length; i++)

            {

                if (rgfAprob[i] == fMax)

                {

                    if (m_random.NextDouble() > 0.5)

                        nIdx = i;

                }

                else if (fMax < rgfAprob[i])

                {

                    fMax = rgfAprob[i];

                    nIdx = i;

                }

            }


            return nIdx;

        }


        private void setData(Net<T> net, SimpleDatum sdData, SimpleDatum sdClip)

        {

            SimpleDatum[] rgData = new SimpleDatum[] { sdData };

            SimpleDatum[] rgClip = null;


            if (sdClip != null)

                rgClip = new SimpleDatum[] { sdClip };


            setData(net, rgData, rgClip);

        }


        private void setCurrentStateData(Net<T> net, MemoryCollection rgSamples)

        {

            List<SimpleDatum> rgData0 = rgSamples.GetCurrentStateData();

            List<SimpleDatum> rgClip0 = rgSamples.GetCurrentStateClip();


            SimpleDatum[] rgData = rgData0.ToArray();

            SimpleDatum[] rgClip = (rgClip0 != null) ? rgClip0.ToArray() : null;


            setData(net, rgData, rgClip);

        }


        private void setNextStateData(Net<T> net, MemoryCollection rgSamples)

        {

            List<SimpleDatum> rgData1 = rgSamples.GetNextStateData();

            List<SimpleDatum> rgClip1 = rgSamples.GetNextStateClip();


            SimpleDatum[] rgData = rgData1.ToArray();

            SimpleDatum[] rgClip = (rgClip1 != null) ? rgClip1.ToArray() : null;


            setData(net, rgData, rgClip);

        }


        private void setData(Net<T> net, SimpleDatum[] rgData, SimpleDatum[] rgClip)

        {

            Blob<T> data = net.blob_by_name("data");


            data.Reshape(rgData.Length, data.channels, data.height, data.width);

            m_transformer.Transform(rgData, data, m_mycaffe.Cuda, m_mycaffe.Log);


            if (rgClip != null)

            {

                Blob<T> clip = net.blob_by_name("clip");


                if (clip != null)

                {

                    clip.Reshape(rgClip.Length, rgClip[0].Channels, rgClip[0].Height, rgClip[0].Width);

                    m_transformer.Transform(rgClip, clip, m_mycaffe.Cuda, m_mycaffe.Log, true);

                }

            }

        }


        public void OnOverlay(OverlayArgs e)

        {

            Blob<T> logits = m_netOutput.blob_by_name("logits");

            if (logits == null)

                return;


            if (logits.num == 1)

                m_rgOverlay = Utility.ConvertVecF<T>(logits.mutable_cpu_data);


            if (m_rgOverlay == null)

                return;


            using (Graphics g = Graphics.FromImage(e.DisplayImage))

            {

                int nBorder = 30;

                int nWid = e.DisplayImage.Width - (nBorder * 2);

                int nWid1 = nWid / m_rgOverlay.Length;

                int nHt1 = (int)(e.DisplayImage.Height * 0.3);

                int nX = nBorder;

                int nY = e.DisplayImage.Height - nHt1;

                ColorMapper clrMap = new ColorMapper(0, m_rgOverlay.Length + 1, Color.Black, Color.Red);

                float fMax = -float.MaxValue;

                int nMaxIdx = 0;

                float fMin1 = m_rgOverlay.Min(p => p);

                float fMax1 = m_rgOverlay.Max(p => p);


                for (int i=0; i<m_rgOverlay.Length; i++)

                {

                    if (fMin1 < 0 || fMax1 > 1)

                       m_rgOverlay[i] = (m_rgOverlay[i] - fMin1) / (fMax1 - fMin1);


                    if (m_rgOverlay[i] > fMax)

                    {

                        fMax = m_rgOverlay[i];

                        nMaxIdx = i;

                    }

                }


                for (int i = 0; i < m_rgOverlay.Length; i++)

                {

                    drawProbabilities(g, nX, nY, nWid1, nHt1, i, m_rgOverlay[i], fMin1, fMax1, clrMap.GetColor(i + 1), (i == nMaxIdx) ? true : false);

                    nX += nWid1;

                }

            }

        }


        private void drawProbabilities(Graphics g, int nX, int nY, int nWid, int nHt, int nAction, float fProb, float fMin, float fMax, Color clr, bool bMax)

        {

            string str = "";


            if (m_font == null)

                m_font = new Font("Century Gothic", 9.0f);


            if (!m_rgStyle.ContainsKey(clr))

            {

                Color clr1 = Color.FromArgb(128, clr);

                Brush br1 = new SolidBrush(clr1);

                Color clr2 = Color.FromArgb(64, clr);

                Pen pen = new Pen(clr2, 1.0f);

                Brush br2 = new SolidBrush(clr2);

                Brush brBright = new SolidBrush(clr);

                m_rgStyle.Add(clr, new Tuple<Brush, Brush, Pen, Brush>(br1, br2, pen, brBright));

            }


            Brush brBack = m_rgStyle[clr].Item1;

            Brush brFront = m_rgStyle[clr].Item2;

            Brush brTop = m_rgStyle[clr].Item4;

            Pen penLine = m_rgStyle[clr].Item3;


            if (fMin != 0 || fMax != 0)

            {

                str = "Action " + nAction.ToString() + " (" + fProb.ToString("N7") + ")";

            }

            else

            {

                str = "Action " + nAction.ToString() + " - No Probabilities";

            }


            SizeF sz = g.MeasureString(str, m_font);


            int nY1 = (int)(nY + (nHt - sz.Height));

            int nX1 = (int)(nX + (nWid / 2) - (sz.Width / 2));

            g.DrawString(str, m_font, (bMax) ? brTop : brFront, new Point(nX1, nY1));


            if (fMin != 0 || fMax != 0)

            {

                float fX = nX;

                float fWid = nWid ;

                nHt -= (int)sz.Height;


                float fHt = nHt * fProb;

                float fHt1 = nHt - fHt;

                RectangleF rc1 = new RectangleF(fX, nY + fHt1, fWid, fHt);

                g.FillRectangle(brBack, rc1);

                g.DrawRectangle(penLine, rc1.X, rc1.Y, rc1.Width, rc1.Height);

            }

        }

    }

}

MyCaffe.MyCaffeControl
The MyCaffeControl is the main object used to manage all training, testing and running of the MyCaffe...
Definition: MyCaffeControl.cs:35

MyCaffe.MyCaffeControl.CancelEvent
CancelEvent CancelEvent
Returns the CancelEvent used.
Definition: MyCaffeControl.cs:648

MyCaffe.MyCaffeControl.GetInternalNet
Net< T > GetInternalNet(Phase phase=Phase.RUN)
Returns the internal net based on the Phase specified: TRAIN, TEST or RUN.
Definition: MyCaffeControl.cs:3328

MyCaffe.MyCaffeControl.GetInternalSolver
Solver< T > GetInternalSolver()
Get the internal solver.
Definition: MyCaffeControl.cs:3349

MyCaffe.MyCaffeControl.CurrentProject
ProjectEx CurrentProject
Returns the name of the currently loaded project.
Definition: MyCaffeControl.cs:709

MyCaffe.basecode.CancelEvent
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
Definition: CancelEvent.cs:17

MyCaffe.basecode.CancelEvent.Reset
void Reset()
Resets the event clearing any signaled state.
Definition: CancelEvent.cs:279

MyCaffe.basecode.CancelEvent.WaitOne
bool WaitOne(int nMs=int.MaxValue)
Waits for the signal state to occur.
Definition: CancelEvent.cs:290

MyCaffe.basecode.CancelEvent.CancelEvent
CancelEvent()
The CancelEvent constructor.
Definition: CancelEvent.cs:28

MyCaffe.basecode.CancelEvent.Set
void Set()
Sets the event to the signaled state.
Definition: CancelEvent.cs:270

MyCaffe.basecode.ColorMapper
The ColorMapper maps a value within a number range, to a Color within a color scheme.
Definition: ColorMapper.cs:14

MyCaffe.basecode.ColorMapper.GetColor
Color GetColor(double dfVal)
Find the color using a binary search algorithm.
Definition: ColorMapper.cs:350

MyCaffe.basecode.CryptoRandom
The CryptoRandom is a random number generator that can use either the standard .Net Random objec or t...
Definition: CryptoRandom.cs:14

MyCaffe.basecode.CryptoRandom.Next
int Next(int nMinVal, int nMaxVal, bool bMaxInclusive=true)
Returns a random int within the range
Definition: CryptoRandom.cs:113

MyCaffe.basecode.CryptoRandom.NextDouble
double NextDouble()
Returns a random double within the range .
Definition: CryptoRandom.cs:83

MyCaffe.basecode.Log
The Log class provides general output in text form.
Definition: Log.cs:13

MyCaffe.basecode.Log.WriteLine
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80

MyCaffe.basecode.Log.Log
Log(string strSrc)
The Log constructor.
Definition: Log.cs:33

MyCaffe.basecode.ProjectEx.GetSolverSettingAsNumeric
double? GetSolverSettingAsNumeric(string strParam)
Get a setting from the solver descriptor as a double value.
Definition: ProjectEx.cs:470

MyCaffe.basecode.PropertySet
Specifies a key-value pair of properties.
Definition: PropertySet.cs:16

MyCaffe.basecode.PropertySet.GetPropertyAsInt
int GetPropertyAsInt(string strName, int nDefault=0)
Returns a property as an integer value.
Definition: PropertySet.cs:287

MyCaffe.basecode.PropertySet.GetPropertyAsBool
bool GetPropertyAsBool(string strName, bool bDefault=false)
Returns a property as a boolean value.
Definition: PropertySet.cs:267

MyCaffe.basecode.PropertySet.GetPropertyAsDouble
double GetPropertyAsDouble(string strName, double dfDefault=0)
Returns a property as an double value.
Definition: PropertySet.cs:307

MyCaffe.basecode.PropertySet.ToString
override string ToString()
Returns the string representation of the properties.
Definition: PropertySet.cs:325

MyCaffe.basecode.SimpleDatum
The SimpleDatum class holds a data input within host memory.
Definition: SimpleDatum.cs:161

MyCaffe.basecode.SimpleDatum.GetDataAtF
float GetDataAtF(int nIdx)
Returns the item at a specified index in the float type.
Definition: SimpleDatum.cs:1680

MyCaffe.basecode.SimpleDatum.Sub
bool Sub(SimpleDatum sd, bool bSetNegativeToZero=false)
Subtract the data of another SimpleDatum from this one, so this = this - sd.
Definition: SimpleDatum.cs:1064

MyCaffe.basecode.SimpleDatum.Zero
void Zero()
Zero out all data in the datum but keep the size and other settings.
Definition: SimpleDatum.cs:1046

MyCaffe.basecode.SimpleDatum.TimeStamp
DateTime TimeStamp
Get/set the Timestamp.
Definition: SimpleDatum.cs:2254

MyCaffe.basecode.SimpleDatum.Tag
object Tag
Specifies user data associated with the SimpleDatum.
Definition: SimpleDatum.cs:901

MyCaffe.basecode.SimpleDatum.Channels
int Channels
Return the number of channels of the data.
Definition: SimpleDatum.cs:2296

MyCaffe.basecode.SimpleDatum.Index
int Index
Returns the index of the SimpleDatum.
Definition: SimpleDatum.cs:2245

MyCaffe.basecode.Utility
The Utility class provides general utility funtions.
Definition: Utility.cs:35

MyCaffe.basecode.Utility.ConvertVec
static double[] ConvertVec(float[] rgf)
Convert an array of float to an array of generics.
Definition: Utility.cs:550

MyCaffe.common.BlobCollection
The BlobCollection contains a list of Blobs.
Definition: BlobCollection.cs:16

MyCaffe.common.BlobCollection.Dispose
void Dispose()
Release all resource used by the collection and its Blobs.
Definition: BlobCollection.cs:542

MyCaffe.common.BlobCollection.Accumulate
void Accumulate(CudaDnn< T > cuda, BlobCollection< T > src, bool bAccumulateDiff)
Accumulate the diffs from one BlobCollection into another.
Definition: BlobCollection.cs:283

MyCaffe.common.BlobCollection.SetDiff
void SetDiff(double df)
Set all blob diff to the value specified.
Definition: BlobCollection.cs:311

MyCaffe.common.Blob
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25

MyCaffe.common.Blob.channels
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
Definition: Blob.cs:800

MyCaffe.common.Blob.height
int height
DEPRECIATED; legacy shape accessor height: use shape(2) instead.
Definition: Blob.cs:808

MyCaffe.common.Blob.mutable_gpu_diff
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555

MyCaffe.common.Blob.mutable_cpu_diff
T[] mutable_cpu_diff
Get diff from the GPU and bring it over to the host, or Set diff from the Host and send it over to th...
Definition: Blob.cs:1511

MyCaffe.common.Blob.mutable_gpu_data
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487

MyCaffe.common.Blob.mutable_cpu_data
T[] mutable_cpu_data
Get data from the GPU and bring it over to the host, or Set data from the Host and send it over to th...
Definition: Blob.cs:1461

MyCaffe.common.Blob.Reshape
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442

MyCaffe.common.Blob.CopyFrom
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
Definition: Blob.cs:903

MyCaffe.common.Blob.width
int width
DEPRECIATED; legacy shape accessor width: use shape(3) instead.
Definition: Blob.cs:816

MyCaffe.common.Blob.shape
List< int > shape()
Returns an array where each element contains the shape of an axis of the Blob.
Definition: Blob.cs:684

MyCaffe.common.Blob.count
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739

MyCaffe.common.Blob.ReshapeLike
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648

MyCaffe.common.Blob.gpu_diff
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1541

MyCaffe.common.Blob.SetDiff
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
Definition: Blob.cs:1981

MyCaffe.common.Blob.num
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
Definition: Blob.cs:792

MyCaffe.common.Blob.gpu_data
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479

MyCaffe.common.Net
Connects Layer's together into a direct acrylic graph (DAG) specified by a NetParameter
Definition: Net.cs:23

MyCaffe.common.Net.layers
List< Layer< T > > layers
Returns the layers.
Definition: Net.cs:2003

MyCaffe.common.Net.ForwardFromTo
double ForwardFromTo(int nStart=0, int nEnd=int.MaxValue)
The FromTo variant of forward and backward operate on the (topological) ordering by which the net is ...
Definition: Net.cs:1402

MyCaffe.common.Net.CopyInternalBlobsTo
void CopyInternalBlobsTo(Net< T > dstNet)
Copy the internal blobs from one net to another.
Definition: Net.cs:1699

MyCaffe.common.Net.CopyTrainedLayersTo
void CopyTrainedLayersTo(Net< T > dstNet)
Copies the trained layer of this Net to another Net.
Definition: Net.cs:1714

MyCaffe.common.Net.FindLastLayer
Layer< T > FindLastLayer(LayerParameter.LayerType type)
Find the last layer with the matching type.
Definition: Net.cs:2806

MyCaffe.common.Net.Dispose
virtual void Dispose(bool bDisposing)
Releases all resources (GPU and Host) used by the Net.
Definition: Net.cs:184

MyCaffe.common.Net.ClearParamDiffs
void ClearParamDiffs()
Zero out the diffs of all netw parameters. This should be run before Backward.
Definition: Net.cs:1907

MyCaffe.common.Net.learnable_parameters
BlobCollection< T > learnable_parameters
Returns the learnable parameters.
Definition: Net.cs:2117

MyCaffe.common.Net.net_param
NetParameter net_param
Returns the net parameter.
Definition: Net.cs:1857

MyCaffe.common.Net.blob_by_name
Blob< T > blob_by_name(string strName, bool bThrowExceptionOnError=true)
Returns a blob given its name.
Definition: Net.cs:2245

MyCaffe.common.ResultCollection
The ResultCollection contains the result of a given CaffeControl::Run.
Definition: ResultCollection.cs:17

MyCaffe.data.DataTransformer
Applies common transformations to the input data, such as scaling, mirroring, subtracting the image m...
Definition: DataTransformer.cs:23

MyCaffe.data.DataTransformer.DataTransformer
DataTransformer(CudaDnn< T > cuda, Log log, TransformationParameter p, Phase phase, int nC, int nH, int nW, SimpleDatum imgMean=null)
The DataTransformer constructor.
Definition: DataTransformer.cs:53

MyCaffe.data.DataTransformer.Update
void Update(int nDataSize=0, SimpleDatum imgMean=null)
Resync the transformer with changes in its parameter.
Definition: DataTransformer.cs:93

MyCaffe.data.DataTransformer.Transform
void Transform(List< Datum > rgDatum, Blob< T > blobTransformed, CudaDnn< T > cuda, Log log)
Transforms a list of Datum and places the transformed data into a Blob.
Definition: DataTransformer.cs:373

MyCaffe.data.DataTransformer.param
TransformationParameter param
Returns the TransformationParameter used.
Definition: DataTransformer.cs:174

MyCaffe.layers.InnerProductLayer
The InnerProductLayer, also know as a 'fully-connected' layer, computes the inner product with a set ...
Definition: InnerProductLayer.cs:22

MyCaffe.layers.Layer
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31

MyCaffe.layers.Layer.type
LayerParameter.LayerType type
Returns the LayerType of this Layer.
Definition: Layer.cs:927

MyCaffe.layers.Layer.layer_param
LayerParameter layer_param
Returns the LayerParameter for this Layer.
Definition: Layer.cs:899

MyCaffe.layers.MemoryLossLayerGetLossArgs
The MemoryLossLayerGetLossArgs class is passed to the OnGetLoss event.
Definition: MemoryLossLayer.cs:242

MyCaffe.layers.MemoryLossLayerGetLossArgs.EnableLossUpdate
bool EnableLossUpdate
Get/set enabling the loss update within the backpropagation pass.
Definition: MemoryLossLayer.cs:300

MyCaffe.layers.MemoryLossLayerGetLossArgs.Loss
double Loss
Get/set the externally calculated total loss.
Definition: MemoryLossLayer.cs:291

MyCaffe.layers.MemoryLossLayerGetLossArgs.Bottom
BlobCollection< T > Bottom
Specifies the bottom passed in during the forward pass.
Definition: MemoryLossLayer.cs:275

MyCaffe.layers.MemoryLossLayer
The MemoryLossLayer provides a method of performing a custom loss functionality. Similar to the Memor...
Definition: MemoryLossLayer.cs:21

MyCaffe.layers.MemoryLossLayer.OnGetLoss
EventHandler< MemoryLossLayerGetLossArgs< T > > OnGetLoss
The OnGetLoss event fires during each forward pass. The value returned is saved, and applied on the b...
Definition: MemoryLossLayer.cs:30

MyCaffe.param.InnerProductParameter.enable_noise
bool enable_noise
Enable/disable noise in the inner-product layer (default = false).
Definition: InnerProductParameter.cs:65

MyCaffe.param.LayerParameter
Specifies the base parameter for all layers.
Definition: LayerParameter.cs:24

MyCaffe.param.LayerParameter.loss_weight
List< double > loss_weight
Specifies the loss weight.
Definition: LayerParameter.cs:1955

MyCaffe.param.LayerParameter.inner_product_param
InnerProductParameter inner_product_param
Returns the parameter set when initialized with LayerType.INNERPRODUCT
Definition: LayerParameter.cs:2452

MyCaffe.param.LayerParameter.LayerType
LayerType
Specifies the layer type.
Definition: LayerParameter.cs:110

MyCaffe.param.TransformationParameter
Stores parameters used to apply transformation to the data layer's data.
Definition: TransformationParameter.cs:19

MyCaffe.param.TransformationParameter.mean_value
List< double > mean_value
If specified can be repeated once (would subtract it from all the channels or can be repeated the sam...
Definition: TransformationParameter.cs:156

MyCaffe.param.TransformationParameter.scale
double scale
For data pre-processing, we can do simple scaling and subtracting the data mean, if provided....
Definition: TransformationParameter.cs:99

MyCaffe.solvers.Solver
An interface for classes that perform optimization on Nets - this class serves as the base class for ...
Definition: Solver.cs:28

MyCaffe.solvers.Solver.Step
bool Step(int nIters, TRAIN_STEP step=TRAIN_STEP.NONE, bool bZeroDiffs=true, bool bApplyUpdates=true, bool bDisableOutput=false, bool bDisableProgress=false, double? dfLossOverride=null, bool? bAllowSnapshot=null)
Steps a set of iterations through a training cycle.
Definition: Solver.cs:818

MyCaffe.solvers.Solver.ApplyUpdate
abstract double ApplyUpdate(int nIterationOverride=-1)
Make and apply the update value for the current iteration.

MyCaffe.trainers.ConvertOutputArgs
The ConvertOutputArgs is passed to the OnConvertOutput event.
Definition: EventArgs.cs:311

MyCaffe.trainers.ConvertOutputArgs.RawOutput
byte[] RawOutput
Specifies the raw output byte stream.
Definition: EventArgs.cs:356

MyCaffe.trainers.ConvertOutputArgs.RawType
string RawType
Specifies the type of the raw output byte stream.
Definition: EventArgs.cs:348

MyCaffe.trainers.GetDataArgs
The GetDataArgs is passed to the OnGetData event to retrieve data.
Definition: EventArgs.cs:402

MyCaffe.trainers.GetDataArgs.State
StateBase State
Specifies the state data of the observations.
Definition: EventArgs.cs:517

MyCaffe.trainers.InitializeArgs
The InitializeArgs is passed to the OnInitialize event.
Definition: EventArgs.cs:90

MyCaffe.trainers.OverlayArgs
The OverlayArgs is passed ot the OnOverlay event, optionally fired just before displaying a gym image...
Definition: EventArgs.cs:376

MyCaffe.trainers.OverlayArgs.DisplayImage
Bitmap DisplayImage
Get/set the display image.
Definition: EventArgs.cs:392

MyCaffe.trainers.StateBase
The StateBase is the base class for the state of each observation - this is defined by actual trainer...
Definition: StateBase.cs:16

MyCaffe.trainers.StateBase.Done
bool Done
Get/set whether the state is done or not.
Definition: StateBase.cs:72

MyCaffe.trainers.StateBase.Reward
double Reward
Get/set the reward of the state.
Definition: StateBase.cs:63

MyCaffe.trainers.StateBase.Data
SimpleDatum Data
Returns other data associated with the state.
Definition: StateBase.cs:98

MyCaffe.trainers.StateBase.ActionCount
int ActionCount
Returns the number of actions.
Definition: StateBase.cs:90

MyCaffe.trainers.StateBase.Clip
SimpleDatum Clip
Returns the clip data assoicated with the state.
Definition: StateBase.cs:116

MyCaffe.trainers.WaitArgs
The WaitArgs is passed to the OnWait event.
Definition: EventArgs.cs:65

MyCaffe.trainers.common.MemoryCollectionFactory
The MemoryCollectionFactory is used to create various memory collection types.
Definition: MemoryCollectionFactory.cs:13

MyCaffe.trainers.common.MemoryCollectionFactory.CreateMemory
static IMemoryCollection CreateMemory(MEMTYPE type, int nMax, float fAlpha=0, string strFile=null)
CreateMemory creates the memory collection type based on the MEMTYPE parameter.
Definition: MemoryCollectionFactory.cs:22

MyCaffe.trainers.common.MemoryCollection
The memory collection stores a set of memory items.
Definition: MemoryCollection.cs:15

MyCaffe.trainers.common.MemoryCollection.GetInvertedDoneAsOneHotVector
float[] GetInvertedDoneAsOneHotVector()
Returns the inverted done (1 - done) values as a one-hot vector.
Definition: MemoryCollection.cs:196

MyCaffe.trainers.common.MemoryCollection.GetNextStateClip
List< SimpleDatum > GetNextStateClip()
Returns the list of clip items associated with the next state.
Definition: MemoryCollection.cs:141

MyCaffe.trainers.common.MemoryCollection.Priorities
double[] Priorities
Get/set the priorities associated with the collection (if any).
Definition: MemoryCollection.cs:50

MyCaffe.trainers.common.MemoryCollection.GetCurrentStateData
List< SimpleDatum > GetCurrentStateData()
Returns the list of data items associated with the current state.
Definition: MemoryCollection.cs:153

MyCaffe.trainers.common.MemoryCollection.GetActionsAsOneHotVector
float[] GetActionsAsOneHotVector(int nActionCount)
Returns the action items as a set of one-hot vectors.
Definition: MemoryCollection.cs:175

MyCaffe.trainers.common.MemoryCollection.GetCurrentStateClip
List< SimpleDatum > GetCurrentStateClip()
Returns the list of clip items associated with the current state.
Definition: MemoryCollection.cs:162

MyCaffe.trainers.common.MemoryCollection.GetRewards
float[] GetRewards()
Returns the rewards as a vector.
Definition: MemoryCollection.cs:215

MyCaffe.trainers.common.MemoryCollection.GetNextStateData
List< SimpleDatum > GetNextStateData()
Returns the list of data items associated with the next state.
Definition: MemoryCollection.cs:132

MyCaffe.trainers.common.MemoryItem
The MemoryItem stores the information about a given cycle.
Definition: MemoryCollection.cs:294

MyCaffe.trainers.dqn.noisy.st.Brain
The Brain uses the instance of MyCaffe (e.g. the open project) to run new actions and train the netwo...
Definition: TrainerNoisyDqn.cs:455

MyCaffe.trainers.dqn.noisy.st.Brain.Cancel
CancelEvent Cancel
Returns the Cancel event used to cancel all MyCaffe tasks.
Definition: TrainerNoisyDqn.cs:655

MyCaffe.trainers.dqn.noisy.st.Brain.OnOverlay
void OnOverlay(OverlayArgs e)
The OnOverlay callback is called just before displaying the gym image, thus allowing for an overlay t...
Definition: TrainerNoisyDqn.cs:1085

MyCaffe.trainers.dqn.noisy.st.Brain.UpdateTargetModel
void UpdateTargetModel()
The UpdateTargetModel transfers the trained layers from the active Net to the target Net.
Definition: TrainerNoisyDqn.cs:753

MyCaffe.trainers.dqn.noisy.st.Brain.GetModelUpdated
bool GetModelUpdated()
Get whether or not the model has been udpated or not.
Definition: TrainerNoisyDqn.cs:743

MyCaffe.trainers.dqn.noisy.st.Brain.Brain
Brain(MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, Phase phase)
The constructor.
Definition: TrainerNoisyDqn.cs:496

MyCaffe.trainers.dqn.noisy.st.Brain.getDataArgs
GetDataArgs getDataArgs(Phase phase, int nAction)
Returns the GetDataArgs used to retrieve new data from the envrionment implemented by derived parent ...
Definition: TrainerNoisyDqn.cs:621

MyCaffe.trainers.dqn.noisy.st.Brain.FrameStack
int FrameStack
Specifies the number of frames per X value.
Definition: TrainerNoisyDqn.cs:631

MyCaffe.trainers.dqn.noisy.st.Brain.Preprocess
SimpleDatum Preprocess(StateBase s, bool bUseRawInput, out bool bDifferent, bool bReset=false)
Preprocesses the data.
Definition: TrainerNoisyDqn.cs:667

MyCaffe.trainers.dqn.noisy.st.Brain.act
int act(SimpleDatum sd, SimpleDatum sdClip, int nActionCount)
Returns the action from running the model. The action returned is either randomly selected (when usin...
Definition: TrainerNoisyDqn.cs:726

MyCaffe.trainers.dqn.noisy.st.Brain.BatchSize
int BatchSize
Returns the batch size defined by the model.
Definition: TrainerNoisyDqn.cs:639

MyCaffe.trainers.dqn.noisy.st.Brain.Dispose
void Dispose()
Release all resources used by the Brain.
Definition: TrainerNoisyDqn.cs:576

MyCaffe.trainers.dqn.noisy.st.Brain.Train
void Train(int nIteration, MemoryCollection rgSamples, int nActionCount)
Train the model at the current iteration.
Definition: TrainerNoisyDqn.cs:768

MyCaffe.trainers.dqn.noisy.st.Brain.Log
Log Log
Returns the output log.
Definition: TrainerNoisyDqn.cs:647

MyCaffe.trainers.dqn.noisy.st.DqnAgent
The DqnAgent both builds episodes from the envrionment and trains on them using the Brain.
Definition: TrainerNoisyDqn.cs:183

MyCaffe.trainers.dqn.noisy.st.DqnAgent.DqnAgent
DqnAgent(IxTrainerCallback icallback, MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, Phase phase)
The constructor.
Definition: TrainerNoisyDqn.cs:220

MyCaffe.trainers.dqn.noisy.st.DqnAgent.Run
void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step)
The Run method provides the main loop that performs the following steps: 1.) get state 2....
Definition: TrainerNoisyDqn.cs:377

MyCaffe.trainers.dqn.noisy.st.DqnAgent.Run
byte[] Run(int nIterations, out string type)
Run the action on a set number of iterations and return the results with no training.
Definition: TrainerNoisyDqn.cs:303

MyCaffe.trainers.dqn.noisy.st.DqnAgent.Dispose
void Dispose()
Release all resources used.
Definition: TrainerNoisyDqn.cs:251

MyCaffe.trainers.dqn.noisy.st.TrainerNoisyDqn
The TrainerNoisyDqn implements the Noisy-DQN algorithm as described by Google Dopamine DQNAgent,...
Definition: TrainerNoisyDqn.cs:34

MyCaffe.trainers.dqn.noisy.st.TrainerNoisyDqn.Run
byte[] Run(int nN, PropertySet runProp, out string type)
Run a set of iterations and return the resuts.
Definition: TrainerNoisyDqn.cs:124

MyCaffe.trainers.dqn.noisy.st.TrainerNoisyDqn.TrainerNoisyDqn
TrainerNoisyDqn(MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, IxTrainerCallback icallback)
The constructor.
Definition: TrainerNoisyDqn.cs:47

MyCaffe.trainers.dqn.noisy.st.TrainerNoisyDqn.Train
bool Train(int nN, ITERATOR_TYPE type, TRAIN_STEP step)
Train the network using a modified PG training algorithm optimized for GPU use.
Definition: TrainerNoisyDqn.cs:166

MyCaffe.trainers.dqn.noisy.st.TrainerNoisyDqn.Dispose
void Dispose()
Release all resources used.
Definition: TrainerNoisyDqn.cs:58

MyCaffe.trainers.dqn.noisy.st.TrainerNoisyDqn.Shutdown
bool Shutdown(int nWait)
Shutdown the trainer.
Definition: TrainerNoisyDqn.cs:78

MyCaffe.trainers.dqn.noisy.st.TrainerNoisyDqn.Initialize
bool Initialize()
Initialize the trainer.
Definition: TrainerNoisyDqn.cs:66

MyCaffe.trainers.dqn.noisy.st.TrainerNoisyDqn.RunOne
ResultCollection RunOne(int nDelay=1000)
Run a single cycle on the environment after the delay.
Definition: TrainerNoisyDqn.cs:108

MyCaffe.trainers.dqn.noisy.st.TrainerNoisyDqn.Test
bool Test(int nN, ITERATOR_TYPE type)
Run the test cycle - currently this is not implemented.
Definition: TrainerNoisyDqn.cs:140

MyCaffe.trainers.IxTrainerCallback
The IxTrainerCallback provides functions used by each trainer to 'call-back' to the parent for inform...
Definition: Interfaces.cs:303

MyCaffe.trainers.IxTrainerCallbackRNN
The IxTrainerCallbackRNN provides functions used by each trainer to 'call-back' to the parent for inf...
Definition: Interfaces.cs:348

MyCaffe.trainers.IxTrainerCallbackRNN.OnConvertOutput
void OnConvertOutput(ConvertOutputArgs e)
The OnConvertOutput callback fires from within the Run method and is used to convert the network's ou...

MyCaffe.trainers.IxTrainerGetDataCallback
The IxTrainerGetDataCallback interface is called right after rendering the output image and just befo...
Definition: Interfaces.cs:335

MyCaffe.trainers.IxTrainerRL
The IxTrainerRL interface is implemented by each RL Trainer.
Definition: Interfaces.cs:257

MyCaffe.trainers.common.IMemoryCollection
The IMemoryCollection interface is implemented by all memory collection types.
Definition: Interfaces.cs:37

MyCaffe.trainers.common.IMemoryCollection.Update
void Update(MemoryCollection rgSamples)
Updates the memory collection - currently only used by the Prioritized memory collection to update it...

MyCaffe.trainers.common.IMemoryCollection.Count
int Count
Returns the number of items in the memory collection.
Definition: Interfaces.cs:59

MyCaffe.trainers.common.IMemoryCollection.CleanUp
void CleanUp()
Performs final clean-up tasks.

MyCaffe.trainers.common.IMemoryCollection.Add
void Add(MemoryItem m)
Add a new item to the memory collection.

MyCaffe.trainers.common.IMemoryCollection.GetSamples
MemoryCollection GetSamples(CryptoRandom random, int nCount, double dfBeta)
Retrieve a set of samples from the collection.

MyCaffe.basecode
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12

MyCaffe.basecode.Phase
Phase
Defines the Phase under which to run a Net.
Definition: Interfaces.cs:61

MyCaffe.common
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8

MyCaffe.common.TRAIN_STEP
TRAIN_STEP
Defines the training stepping method (if any).
Definition: Interfaces.cs:131

MyCaffe.data
The MyCaffe.data namespace contains dataset creators used to create common testing datasets such as M...
Definition: BinaryFile.cs:16

MyCaffe.layers
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15

MyCaffe.param
The MyCaffe.param namespace contains parameters used to create models.
Definition: AttentionParameter.cs:9

MyCaffe.solvers
The MyCaffe.solvers namespace contains all solver classes, including the base Solver.
Definition: AdaDeltaSolver.cs:13

MyCaffe.trainers.common
Definition: FileMemoryCollection.cs:10

MyCaffe.trainers.common.MEMTYPE
MEMTYPE
Specifies the type of memory collection to use.
Definition: Interfaces.cs:14

MyCaffe.trainers.dqn.noisy.st
Definition: TrainerNoisyDqn.cs:18

MyCaffe.trainers
The MyCaffe.trainers namespace contains all reinforcement and recurrent learning trainers.
Definition: FileMemoryCollection.cs:10

MyCaffe.trainers.ITERATOR_TYPE
ITERATOR_TYPE
Specifies the iterator type to use.
Definition: Interfaces.cs:22

MyCaffe
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12

System
Definition: Component.cs:11