mycaffe/html/_text_data_layer_8cs_source.html

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using MyCaffe.basecode;

using MyCaffe.common;

using MyCaffe.param;

using MyCaffe.fillers;

using System.IO;

using MyCaffe.layers.beta.TextData;


namespace MyCaffe.layers.beta

{

    public class TextDataLayer<T> : Layer<T>

    {

        DataItem m_currentData = null;

        Data m_data = null;

        Vocabulary m_vocab = null;

        ulong m_lOffset = 0;

        float[] m_rgEncInput1;

        float[] m_rgEncInput2;

        float[] m_rgEncClip;

        float[] m_rgDecInput;

        float[] m_rgDecClip;

        float[] m_rgDecTarget;


        public event EventHandler<OnGetDataArgs> OnGetData;


        public TextDataLayer(CudaDnn<T> cuda, Log log, LayerParameter p)

            : base(cuda, log, p)

        {

            m_type = LayerParameter.LayerType.TEXT_DATA;

        }


        protected override void dispose()

        {

            base.dispose();

        }


        public override int MinBottomBlobs

        {

            get { return (m_phase == Phase.RUN) ? 3 : 0; }

        }


        public override int MaxBottomBlobs

        {

            get { return (m_phase == Phase.RUN) ? 4 : 0; }

        }


        public override int MinTopBlobs

        {

            get { return 6; }

        }


        public override int MaxTopBlobs

        {

            get { return 7; }

        }


        public Vocabulary Vocabulary

        {

            get { return m_vocab; }

        }


        public IterationInfo IterationInfo

        {

            get { return (m_currentData == null) ? new IterationInfo(true, true, 0) : m_currentData.IterationInfo; }

        }


        private static string clean(string str)

        {

            string strOut = "";


            foreach (char ch in str)

            {

                if (ch == 'á')

                    strOut += 'a';

                else if (ch == 'é')

                    strOut += 'e';

                else if (ch == 'í')

                    strOut += 'i';

                else if (ch == 'ó')

                    strOut += 'o';

                else if (ch == 'ú')

                    strOut += 'u';

                else if (ch == 'Á')

                    strOut += 'A';

                else if (ch == 'É')

                    strOut += 'E';

                else if (ch == 'Í')

                    strOut += 'I';

                else if (ch == 'Ó')

                    strOut += 'O';

                else if (ch == 'Ú')

                    strOut += 'U';

                else

                    strOut += ch;

            }


            return strOut;

        }


        public override bool SupportsPreProcessing

        {

            get { return true; }

        }


        public override bool SupportsPostProcessing

        {

            get { return true; }

        }


        private static List<string> preprocess(string str, int nMaxLen = 0)

        {

            string strInput = clean(str);

            List<string> rgstr = strInput.ToLower().Trim().Split(' ').ToList();


            if (nMaxLen > 0)

            {

                rgstr = rgstr.Take(nMaxLen).ToList();

                if (rgstr.Count < nMaxLen)

                    return null;

            }


            return rgstr;

        }


        private string getPath(string strPath)

        {

            string strTarget = "$ProgramData$";


            if (!strPath.StartsWith(strTarget))

                return strPath;


            string strProgData = Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData);

            strProgData = strProgData.TrimEnd('\\');


            strPath = strProgData + strPath.Substring(strTarget.Length);


            return strPath;

        }


        public void PreProcessInputFiles(TextDataParameter p)

        {

            List<List<string>> rgrgstrInput = new List<List<string>>();

            List<List<string>> rgrgstrTarget = new List<List<string>>();


            string strEncoderSrc = getPath(p.encoder_source);

            string strDecoderSrc = getPath(p.decoder_source);


            string[] rgstrInput = File.ReadAllLines(strEncoderSrc);

            string[] rgstrTarget = File.ReadAllLines(strDecoderSrc);


            if (rgstrInput.Length != rgstrTarget.Length)

                throw new Exception("Both the input and target files must contains the same number of lines!");


            for (int i = 0; i < p.sample_size; i++)

            {

                List<string> rgstrInput1 = preprocess(rgstrInput[i]);

                List<string> rgstrTarget1 = preprocess(rgstrTarget[i]);


                if (rgstrInput1 != null && rgstrTarget1 != null)

                {

                    rgrgstrInput.Add(rgstrInput1);

                    rgrgstrTarget.Add(rgstrTarget1);

                }

            }


            m_vocab = new Vocabulary();

            m_vocab.Load(rgrgstrInput, rgrgstrTarget);

            m_data = new Data(rgrgstrInput, rgrgstrTarget, m_vocab);

        }


        public override BlobCollection<T> PreProcessInput(PropertySet customInput, out int nSeqLen, BlobCollection<T> colBottom = null)

        {

            nSeqLen = -1;


            if (colBottom == null)

            {

                string strInput = m_param.PrepareRunModelInputs();

                RawProto proto = RawProto.Parse(strInput);

                Dictionary<string, BlobShape> rgInput = NetParameter.InputFromProto(proto);

                colBottom = new BlobCollection<T>();


                foreach (KeyValuePair<string, BlobShape> kv in rgInput)

                {

                    Blob<T> blob = new Blob<T>(m_cuda, m_log);

                    blob.Name = kv.Key;

                    blob.Reshape(kv.Value);

                    colBottom.Add(blob);

                }

            }


            string strEncInput = customInput.GetProperty("InputData");

            if (strEncInput == null)

                throw new Exception("Could not find the expected input property 'InputData'!");


            PreProcessInput(strEncInput, null, colBottom);


            return colBottom;

        }


        public override bool PreProcessInput(string strEncInput, int? nDecInput, BlobCollection<T> colBottom)

        {

            if (nDecInput.HasValue && nDecInput.Value == (int)SPECIAL_TOKENS.EOS)

                return false;


            List<string> rgstrInput = null;

            if (strEncInput != null)

                rgstrInput = preprocess(strEncInput);


            DataItem data = Data.GetInputData(m_vocab, rgstrInput, nDecInput);


            if (m_param.text_data_param.enable_normal_encoder_output && m_param.text_data_param.enable_reverse_encoder_output)

                m_log.CHECK_EQ(colBottom.Count, 4, "The bottom collection must have 3 items: dec_input, enc_input, enc_inputr, enc_clip");

            else

                m_log.CHECK_EQ(colBottom.Count, 3, "The bottom collection must have 3 items: dec_input, enc_input | enc_inputr, enc_clip");


            int nT = (int)m_param.text_data_param.time_steps;

            int nBtmIdx = 0;


            colBottom[nBtmIdx].Reshape(new List<int>() { 1, 1, 1 });

            nBtmIdx++;


            if (m_param.text_data_param.enable_normal_encoder_output)

            {

                colBottom[nBtmIdx].Reshape(new List<int>() { nT, 1, 1 });

                nBtmIdx++;

            }


            if (m_param.text_data_param.enable_reverse_encoder_output)

            {

                colBottom[nBtmIdx].Reshape(new List<int>() { nT, 1, 1 });

                nBtmIdx++;

            }


            colBottom[nBtmIdx].Reshape(new List<int>() { nT, 1 });


            float[] rgEncInput = null;

            float[] rgEncInputR = null;

            float[] rgEncClip = null;

            float[] rgDecInput = new float[1];


            if (data.EncoderInput != null)

            {

                rgEncInput = new float[nT];

                rgEncInputR = new float[nT];

                rgEncClip = new float[nT];


                for (int i = 0; i < nT && i < data.EncoderInput.Count; i++)

                {

                    rgEncInput[i] = data.EncoderInput[i];

                    rgEncInputR[i] = data.EncoderInputReverse[i];

                    rgEncClip[i] = (i == 0) ? 0 : 1;

                }

            }


            rgDecInput[0] = data.DecoderInput;


            nBtmIdx = 0;

            colBottom[nBtmIdx].mutable_cpu_data = convert(rgDecInput);

            nBtmIdx++;


            if (m_param.text_data_param.enable_normal_encoder_output)

            {

                if (rgEncInput != null)

                    colBottom[nBtmIdx].mutable_cpu_data = convert(rgEncInput);

                nBtmIdx++;

            }


            if (m_param.text_data_param.enable_reverse_encoder_output)

            {

                if (rgEncInputR != null)

                    colBottom[nBtmIdx].mutable_cpu_data = convert(rgEncInputR);

                nBtmIdx++;

            }


            if (rgEncClip != null)

                colBottom[nBtmIdx].mutable_cpu_data = convert(rgEncClip);


            return true;

        }


        public override List<Tuple<string, int, double>> PostProcessOutput(Blob<T> blobSoftmax, int nK = 1)

        {

            m_log.CHECK_EQ(blobSoftmax.channels, 1, "Currently, only batch size = 1 supported.");


            List<Tuple<string, int, double>> rgRes = new List<Tuple<string, int, double>>();


            long lPos;

            double dfProb = blobSoftmax.GetMaxData(out lPos);


            rgRes.Add(new Tuple<string, int, double>(m_vocab.IndexToWord((int)lPos), (int)lPos, dfProb));


            if (nK > 1)

            {

                m_cuda.copy(blobSoftmax.count(), blobSoftmax.gpu_data, blobSoftmax.mutable_gpu_diff);


                for (int i = 1; i < nK; i++)

                {

                    blobSoftmax.SetData(-1000000000, (int)lPos);

                    dfProb = blobSoftmax.GetMaxData(out lPos);


                    string strWord = m_vocab.IndexToWord((int)lPos);

                    if (strWord.Length > 0)

                        rgRes.Add(new Tuple<string, int, double>(strWord, (int)lPos, dfProb));

                }


                m_cuda.copy(blobSoftmax.count(), blobSoftmax.gpu_diff, blobSoftmax.mutable_gpu_data);

                blobSoftmax.SetDiff(0);

            }


            return rgRes;

        }


        public override string PostProcessOutput(int nIdx)

        {

            return m_vocab.IndexToWord(nIdx);

        }


        public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            // Refuse transformation parameters since TextData is totally generic.

            if (m_param.transform_param != null)

                m_log.WriteLine("WARNING: " + m_type.ToString() + " does not transform data.");


            m_log.CHECK_EQ(m_param.text_data_param.batch_size, 1, "Currently, only batch_size = 1 supported.");


            if (m_param.text_data_param.enable_normal_encoder_output && m_param.text_data_param.enable_reverse_encoder_output)

                m_log.CHECK_EQ(colTop.Count, 7, "When normal and reverse encoder output used, there must be 7 tops: dec, dclip, enc, encr, eclip, vocabcount, dectgt (only valid on TEST | TRAIN)");

            else if (m_param.text_data_param.enable_normal_encoder_output || m_param.text_data_param.enable_reverse_encoder_output)

                m_log.CHECK_EQ(colTop.Count, 6, "When normal or reverse encoder output used, there must be 6 tops: dec, dclip, enc | encr, eclip, vocabcount, dectgt (only valid on TEST | TRAIN)");

            else

                m_log.FAIL("You must specify to enable either normal, reverse or both encoder inputs.");


            // Load the encoder and decoder input files into the Data and Vocabulary.

            PreProcessInputFiles(m_param.text_data_param);


            m_rgDecInput = new float[m_param.text_data_param.batch_size];

            m_rgDecClip = new float[m_param.text_data_param.batch_size];

            m_rgEncInput1 = new float[m_param.text_data_param.batch_size * m_param.text_data_param.time_steps];

            m_rgEncInput2 = new float[m_param.text_data_param.batch_size * m_param.text_data_param.time_steps];

            m_rgEncClip = new float[m_param.text_data_param.batch_size * m_param.text_data_param.time_steps];


            if (m_phase != Phase.RUN)

                m_rgDecTarget = new float[m_param.text_data_param.batch_size];


            reshape(colTop, true);

        }


        protected bool Skip()

        {

            ulong nSize = (ulong)m_param.solver_count;

            ulong nRank = (ulong)m_param.solver_rank;

            // In test mode, only rank 0 runs, so avoid skipping.

            bool bKeep = (m_lOffset % nSize) == nRank || m_param.phase == Phase.TEST;


            return !bKeep;

        }


        protected void Next()

        {

            m_currentData = m_data.GetNextData(m_param.text_data_param.shuffle);

        }


        public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            reshape(colTop, false);

        }


        private void reshape(BlobCollection<T> colTop, bool bSetup)

        {

            int nBatchSize = (int)m_param.text_data_param.batch_size;

            int nT = (int)m_param.text_data_param.time_steps;

            List<int> rgTopShape = new List<int>() { nT, nBatchSize, 1 };

            int nTopIdx = 0;


            // Reshape the decoder input.

            if (!bSetup)

                colTop[nTopIdx].Reshape(new List<int>() { 1, nBatchSize, 1 });

            nTopIdx++;


            // Reshape the decoder clip.

            if (!bSetup)

                colTop[nTopIdx].Reshape(new List<int>() { 1, nBatchSize });

            nTopIdx++;


            // Reshape the encoder data | data reverse.

            if (m_param.text_data_param.enable_normal_encoder_output || m_param.text_data_param.enable_reverse_encoder_output)

            {

                if (!bSetup)

                    colTop[nTopIdx].Reshape(rgTopShape);

                nTopIdx++;

            }


            // Reshape the encoder data reverse.

            if (m_param.text_data_param.enable_normal_encoder_output && m_param.text_data_param.enable_reverse_encoder_output)

            {

                if (!bSetup)

                    colTop[nTopIdx].Reshape(rgTopShape);

                nTopIdx++;

            }


            // Reshape the encoder clip for attention.

            if (!bSetup)

                colTop[nTopIdx].Reshape(new List<int>() { nT, nBatchSize });

            nTopIdx++;


            // Reshape the vocab count.

            colTop[nTopIdx].Reshape(new List<int>() { 1 });

            if (bSetup)

                colTop[nTopIdx].SetData(m_vocab.VocabularCount + 2, 0);

            nTopIdx++;


            // Reshape the decoder target.

            if (!bSetup)

                colTop[nTopIdx].Reshape(new List<int>() { 1, nBatchSize, 1 });

        }


        protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            int nBatch = (int)m_param.text_data_param.batch_size;

            int nT = (int)m_param.text_data_param.time_steps;


            Array.Clear(m_rgDecInput, 0, m_rgDecInput.Length);

            if (m_phase != Phase.RUN)

                Array.Clear(m_rgDecTarget, 0, m_rgDecTarget.Length);

            Array.Clear(m_rgDecClip, 0, m_rgDecClip.Length);

            Array.Clear(m_rgEncInput1, 0, m_rgEncInput1.Length);

            Array.Clear(m_rgEncInput2, 0, m_rgEncInput2.Length);

            Array.Clear(m_rgEncClip, 0, m_rgEncClip.Length);


            int nTopIdx = 0;


            if (m_phase != Phase.RUN)

            {

                for (int i = 0; i < nBatch; i++)

                {

                    while (Skip())

                        Next();


                    Next();


                    if (OnGetData != null)

                        OnGetData(this, new OnGetDataArgs(Vocabulary, IterationInfo));


                    int nIdx = i * nT;


                    for (int j = 0; j < nT && j < m_currentData.EncoderInput.Count; j++)

                    {

                        m_rgEncInput1[nIdx + j] = m_currentData.EncoderInput[j];

                        m_rgEncInput2[nIdx + j] = m_currentData.EncoderInputReverse[j];

                        m_rgEncClip[nIdx + j] = (j == 0) ? 0 : 1;

                    }


                    m_rgDecClip[i] = m_currentData.DecoderClip;

                    m_rgDecInput[i] = m_currentData.DecoderInput;

                    m_rgDecTarget[i] = m_currentData.DecoderTarget;

                }


                colTop[nTopIdx].mutable_cpu_data = convert(m_rgDecInput);

                nTopIdx++;


                colTop[nTopIdx].mutable_cpu_data = convert(m_rgDecClip);

                nTopIdx++;


                if (m_param.text_data_param.enable_normal_encoder_output)

                {

                    colTop[nTopIdx].mutable_cpu_data = convert(m_rgEncInput1);

                    nTopIdx++;

                }


                if (m_param.text_data_param.enable_normal_encoder_output)

                {

                    colTop[nTopIdx].mutable_cpu_data = convert(m_rgEncInput2);

                    nTopIdx++;

                }


                colTop[nTopIdx].mutable_cpu_data = convert(m_rgEncClip);

                nTopIdx++;


                nTopIdx++; // vocab count.


                colTop[nTopIdx].mutable_cpu_data = convert(m_rgDecTarget);

                nTopIdx++;

            }

            else

            {

                int nBtmIdx = 0;

                float fDecInput = convertF(colBottom[nBtmIdx].GetData(0));

                if (fDecInput < 0)

                    fDecInput = 1;


                nBtmIdx++;


                // Decoder input.

                colTop[nTopIdx].SetData(fDecInput, 0);

                nTopIdx++;


                // Decoder clip.

                colTop[nTopIdx].SetData((fDecInput == 1) ? 0 : 1, 0);

                nTopIdx++;


                if (m_param.text_data_param.enable_normal_encoder_output)

                {

                    colTop[nTopIdx].CopyFrom(colBottom[nBtmIdx]);

                    nTopIdx++;

                    nBtmIdx++;

                }


                if (m_param.text_data_param.enable_reverse_encoder_output)

                {

                    colTop[nTopIdx].CopyFrom(colBottom[nBtmIdx]);

                    nTopIdx++;

                    nBtmIdx++;

                }


                // Encoder clip.

                colTop[nTopIdx].CopyFrom(colBottom[nBtmIdx]);

            }

        }


        protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)

        {

        }

    }


    namespace TextData

    {

#pragma warning disable 1591


        class Data

        {

            Random m_random = new Random((int)DateTime.Now.Ticks);

            List<List<string>> m_rgInput;

            List<List<string>> m_rgOutput;

            int m_nCurrentSequence = -1;

            int m_nCurrentOutputIdx = 0;

            int m_nSequenceIdx = 0;

            int m_nIxInput = 1;

            int m_nIterations = 0;

            int m_nOutputCount = 0;

            Vocabulary m_vocab;


            public Data(List<List<string>> rgInput, List<List<string>> rgOutput, Vocabulary vocab)

            {

                m_vocab = vocab;

                m_rgInput = rgInput;

                m_rgOutput = rgOutput;

            }


            public Vocabulary Vocabulary

            {

                get { return m_vocab; }

            }


            public int VocabularyCount

            {

                get { return m_vocab.VocabularCount; }

            }


            public static DataItem GetInputData(Vocabulary vocab, List<string> rgstrInput, int? nDecInput = null)

            {

                List<int> rgInput = null;


                if (rgstrInput != null)

                {

                    rgInput = new List<int>();

                    foreach (string str in rgstrInput)

                    {

                        rgInput.Add(vocab.WordToIndex(str));

                    }

                }


                int nClip = 1;


                if (!nDecInput.HasValue)

                {

                    nClip = 0;

                    nDecInput = 1;

                }


                return new DataItem(rgInput, nDecInput.Value, -1, nClip, false, true, 0);

            }


            public DataItem GetNextData(bool bShuffle)

            {

                int nDecClip = 1;


                bool bNewSequence = false;

                bool bNewEpoch = false;


                if (m_nCurrentSequence == -1)

                {

                    m_nIterations++;

                    bNewSequence = true;


                    if (bShuffle)

                    {

                        m_nCurrentSequence = m_random.Next(m_rgInput.Count);

                    }

                    else

                    {

                        m_nCurrentSequence = m_nSequenceIdx;

                        m_nSequenceIdx++;

                        if (m_nSequenceIdx == m_rgOutput.Count)

                            m_nSequenceIdx = 0;

                    }


                    m_nOutputCount = m_rgOutput[m_nCurrentSequence].Count;

                    nDecClip = 0;


                    if (m_nIterations == m_rgOutput.Count)

                    {

                        bNewEpoch = true;

                        m_nIterations = 0;

                    }

                }


                List<string> rgstrInput = m_rgInput[m_nCurrentSequence];

                List<int> rgInput = new List<int>();

                foreach (string str in rgstrInput)

                {

                    rgInput.Add(m_vocab.WordToIndex(str));

                }


                int nIxTarget = 0;


                if (m_nCurrentOutputIdx < m_rgOutput[m_nCurrentSequence].Count)

                {

                    string strTarget = m_rgOutput[m_nCurrentSequence][m_nCurrentOutputIdx];

                    nIxTarget = m_vocab.WordToIndex(strTarget);

                }


                DataItem data = new DataItem(rgInput, m_nIxInput, nIxTarget, nDecClip, bNewEpoch, bNewSequence, m_nOutputCount);

                m_nIxInput = nIxTarget;


                m_nCurrentOutputIdx++;


                if (m_nCurrentOutputIdx == m_rgOutput[m_nCurrentSequence].Count)

                {

                    m_nCurrentSequence = -1;

                    m_nCurrentOutputIdx = 0;

                    m_nIxInput = 1;

                }


                return data;

            }

        }


        class DataItem

        {

            IterationInfo m_iter;

            List<int> m_rgInput;

            List<int> m_rgInputReverse;

            int m_nIxInput;

            int m_nIxTarget;

            int m_nDecClip;


            public DataItem(List<int> rgInput, int nIxInput, int nIxTarget, int nDecClip, bool bNewEpoch, bool bNewSequence, int nOutputCount)

            {

                m_rgInput = rgInput;

                m_nIxInput = nIxInput;

                m_nIxTarget = nIxTarget;

                m_nDecClip = nDecClip;

                m_iter = new IterationInfo(bNewEpoch, bNewSequence, nOutputCount);

                m_rgInputReverse = new List<int>();


                if (rgInput != null)

                {

                    for (int i = rgInput.Count - 1; i >= 0; i--)

                    {

                        m_rgInputReverse.Add(rgInput[i]);

                    }

                }

                else

                {

                    m_rgInputReverse = null;

                }

            }


            public List<int> EncoderInput

            {

                get { return m_rgInput; }

            }


            public List<int> EncoderInputReverse

            {

                get { return m_rgInputReverse; }

            }


            public int DecoderInput

            {

                get { return m_nIxInput; }

            }


            public int DecoderTarget

            {

                get { return m_nIxTarget; }

            }


            public int DecoderClip

            {

                get { return m_nDecClip; }

            }


            public IterationInfo IterationInfo

            {

                get { return m_iter; }

            }

        }


#pragma warning restore 1591


        public class IterationInfo

        {

            bool m_bNewEpoch;

            bool m_bNewSequence;

            int m_nOutputCount;


            public IterationInfo(bool bNewEpoch, bool bNewSequence, int nOutputCount)

            {

                m_bNewEpoch = bNewEpoch;

                m_bNewSequence = bNewSequence;

                m_nOutputCount = nOutputCount;

            }


            public bool NewEpoch

            {

                get { return m_bNewEpoch; }

            }


            public bool NewSequence

            {

                get { return m_bNewSequence; }

            }


            public int OutputCount

            {

                get { return m_nOutputCount; }

            }

        }


        public class Vocabulary

        {

            Dictionary<string, int> m_rgDictionary = new Dictionary<string, int>();

            Dictionary<string, int> m_rgWordToIndex = new Dictionary<string, int>();

            Dictionary<int, string> m_rgIndexToWord = new Dictionary<int, string>();

            List<string> m_rgstrVocabulary = new List<string>();


            public Vocabulary()

            {

            }


            public int WordToIndex(string strWord)

            {

                if (!m_rgWordToIndex.ContainsKey(strWord))

                    throw new Exception("I do not know the word '" + strWord + "'!");


                return m_rgWordToIndex[strWord];

            }


            public string IndexToWord(int nIdx)

            {

                if (!m_rgIndexToWord.ContainsKey(nIdx))

                    return "";


                return m_rgIndexToWord[nIdx];

            }


            public int VocabularCount

            {

                get { return m_rgstrVocabulary.Count; }

            }


            public void Load(List<List<string>> rgrgstrInput, List<List<string>> rgrgstrTarget)

            {

                m_rgDictionary = new Dictionary<string, int>();


                // Count up all words.

                for (int i = 0; i < rgrgstrInput.Count; i++)

                {

                    for (int j = 0; j < rgrgstrInput[i].Count; j++)

                    {

                        string strWord = rgrgstrInput[i][j];


                        if (!m_rgDictionary.ContainsKey(strWord))

                            m_rgDictionary.Add(strWord, 1);

                        else

                            m_rgDictionary[strWord]++;

                    }


                    for (int j = 0; j < rgrgstrTarget[i].Count; j++)

                    {

                        string strWord = rgrgstrTarget[i][j];


                        if (!m_rgDictionary.ContainsKey(strWord))

                            m_rgDictionary.Add(strWord, 1);

                        else

                            m_rgDictionary[strWord]++;

                    }

                }


                // NOTE: Start at one to save room for START and END tokens where

                // START = 0 in the model word vectors and

                // END = 0 in the next word softmax.

                int nIdx = 2;

                foreach (KeyValuePair<string, int> kv in m_rgDictionary)

                {

                    if (kv.Value > 0)

                    {

                        // Add word to vocabulary.

                        m_rgWordToIndex[kv.Key] = nIdx;

                        m_rgIndexToWord[nIdx] = kv.Key;

                        m_rgstrVocabulary.Add(kv.Key);

                        nIdx++;

                    }

                }

            }

        }


        public class OnGetDataArgs : EventArgs

        {

            Vocabulary m_vocab;

            IterationInfo m_iter;


            public OnGetDataArgs(Vocabulary vocab, IterationInfo iter)

            {

                m_vocab = vocab;

                m_iter = iter;

            }


            public Vocabulary Vocabulary

            {

                get { return m_vocab; }

            }


            public IterationInfo IterationInfo

            {

                get { return m_iter; }

            }

        }

    }

}

MyCaffe.basecode.Log
The Log class provides general output in text form.
Definition: Log.cs:13

MyCaffe.basecode.Log.WriteLine
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80

MyCaffe.basecode.Log.FAIL
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
Definition: Log.cs:394

MyCaffe.basecode.Log.CHECK_EQ
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239

MyCaffe.basecode.PropertySet
Specifies a key-value pair of properties.
Definition: PropertySet.cs:16

MyCaffe.basecode.PropertySet.GetProperty
string GetProperty(string strName, bool bThrowExceptions=true)
Returns a property as a string value.
Definition: PropertySet.cs:146

MyCaffe.basecode.RawProto
The RawProto class is used to parse and output Google prototxt file data.
Definition: RawProto.cs:17

MyCaffe.basecode.RawProto.Parse
static RawProto Parse(string str)
Parses a prototxt and places it in a new RawProto.
Definition: RawProto.cs:306

MyCaffe.common.BlobCollection
The BlobCollection contains a list of Blobs.
Definition: BlobCollection.cs:16

MyCaffe.common.BlobCollection.SetData
void SetData(double df)
Set all blob data to the value specified.
Definition: BlobCollection.cs:323

MyCaffe.common.BlobCollection.Count
int Count
Returns the number of items in the collection.
Definition: BlobCollection.cs:30

MyCaffe.common.BlobCollection.Reshape
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
Definition: BlobCollection.cs:238

MyCaffe.common.BlobCollection.CopyFrom
void CopyFrom(BlobCollection< T > bSrc, bool bCopyDiff=false)
Copy the data or diff from another BlobCollection into this one.
Definition: BlobCollection.cs:266

MyCaffe.common.Blob
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25

MyCaffe.common.Blob.channels
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
Definition: Blob.cs:800

MyCaffe.common.Blob.SetData
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
Definition: Blob.cs:1922

MyCaffe.common.Blob.mutable_gpu_diff
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555

MyCaffe.common.Blob.mutable_gpu_data
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487

MyCaffe.common.Blob.Reshape
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442

MyCaffe.common.Blob.count
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739

MyCaffe.common.Blob.Name
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184

MyCaffe.common.Blob.gpu_diff
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1541

MyCaffe.common.Blob.SetDiff
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
Definition: Blob.cs:1981

MyCaffe.common.Blob.gpu_data
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479

MyCaffe.common.Blob.GetMaxData
double GetMaxData(out long lPos)
Returns the maximum data and the position where the maximum is located in the data.
Definition: Blob.cs:2538

MyCaffe.common.CudaDnn
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969

MyCaffe.layers.Layer
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31

MyCaffe.layers.Layer.m_log
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43

MyCaffe.layers.Layer.m_param
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47

MyCaffe.layers.Layer.convert
void convert(BlobCollection< T > col)
Convert a collection of blobs from / to half size.
Definition: Layer.cs:535

MyCaffe.layers.Layer.convertF
float convertF(T df)
Converts a generic to a float value.
Definition: Layer.cs:1359

MyCaffe.layers.Layer.m_phase
Phase m_phase
Specifies the Phase under which the Layer is run.
Definition: Layer.cs:51

MyCaffe.layers.Layer.m_cuda
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39

MyCaffe.layers.Layer.m_type
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35

MyCaffe.layers.beta.TextData.IterationInfo
The IterationInfo class contains information about each iteration.
Definition: TextDataLayer.cs:843

MyCaffe.layers.beta.TextData.IterationInfo.OutputCount
int OutputCount
Returns the output count of the current sequence.
Definition: TextDataLayer.cs:881

MyCaffe.layers.beta.TextData.IterationInfo.IterationInfo
IterationInfo(bool bNewEpoch, bool bNewSequence, int nOutputCount)
The constructor.
Definition: TextDataLayer.cs:854

MyCaffe.layers.beta.TextData.IterationInfo.NewEpoch
bool NewEpoch
Returns whether or not the current iteration is in a new epoch.
Definition: TextDataLayer.cs:865

MyCaffe.layers.beta.TextData.IterationInfo.NewSequence
bool NewSequence
Returns whether or not the current iteration is in a new sequence.
Definition: TextDataLayer.cs:873

MyCaffe.layers.beta.TextData.OnGetDataArgs
Defines the arguments passed to the OnGetData event.
Definition: TextDataLayer.cs:992

MyCaffe.layers.beta.TextData.OnGetDataArgs.OnGetDataArgs
OnGetDataArgs(Vocabulary vocab, IterationInfo iter)
The constructor.
Definition: TextDataLayer.cs:1001

MyCaffe.layers.beta.TextData.Vocabulary
The Vocabulary object manages the overall word dictionary and word to index and index to word mapping...
Definition: TextDataLayer.cs:890

MyCaffe.layers.beta.TextData.Vocabulary.WordToIndex
int WordToIndex(string strWord)
The WordToIndex method maps a word to its corresponding index value.
Definition: TextDataLayer.cs:908

MyCaffe.layers.beta.TextData.Vocabulary.VocabularCount
int VocabularCount
Returns the number of words in the vocabulary.
Definition: TextDataLayer.cs:933

MyCaffe.layers.beta.TextData.Vocabulary.Load
void Load(List< List< string > > rgrgstrInput, List< List< string > > rgrgstrTarget)
Loads the word to index mappings.
Definition: TextDataLayer.cs:942

MyCaffe.layers.beta.TextData.Vocabulary.IndexToWord
string IndexToWord(int nIdx)
The IndexToWord method maps an index value to its corresponding word.
Definition: TextDataLayer.cs:921

MyCaffe.layers.beta.TextData.Vocabulary.Vocabulary
Vocabulary()
The constructor.
Definition: TextDataLayer.cs:899

MyCaffe.layers.beta.TextDataLayer
The TextDataLayer loads data from text data files for an encoder/decoder type model....
Definition: TextDataLayer.cs:20

MyCaffe.layers.beta.TextDataLayer.PostProcessOutput
override string PostProcessOutput(int nIdx)
Convert the index to the word.
Definition: TextDataLayer.cs:413

MyCaffe.layers.beta.TextDataLayer.Vocabulary
Vocabulary Vocabulary
Returns the vocabulary of the data sources.
Definition: TextDataLayer.cs:112

MyCaffe.layers.beta.TextDataLayer.MinBottomBlobs
override int? MinBottomBlobs
When running in TRAIN or TEST phase, returns 0 for data layers have no bottom (input) Blobs....
Definition: TextDataLayer.cs:79

MyCaffe.layers.beta.TextDataLayer.TextDataLayer
TextDataLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The TextDataLayer constructor.
Definition: TextDataLayer.cs:60

MyCaffe.layers.beta.TextDataLayer.OnGetData
EventHandler< OnGetDataArgs > OnGetData
The OnGetTrainingData is called during each forward pass after getting the training data for the pass...
Definition: TextDataLayer.cs:35

MyCaffe.layers.beta.TextDataLayer.dispose
override void dispose()
Release all internal blobs.
Definition: TextDataLayer.cs:69

MyCaffe.layers.beta.TextDataLayer.PreProcessInput
override BlobCollection< T > PreProcessInput(PropertySet customInput, out int nSeqLen, BlobCollection< T > colBottom=null)
The PreprocessInput allows derivative data layers to convert a property set of input data into the bo...
Definition: TextDataLayer.cs:247

MyCaffe.layers.beta.TextDataLayer.MaxTopBlobs
override int MaxTopBlobs
Returns the maximum number of required top (output) Blobs: dec, dclip, enc, encr, eclip,...
Definition: TextDataLayer.cs:104

MyCaffe.layers.beta.TextDataLayer.IterationInfo
IterationInfo? IterationInfo
Returns information on the current iteration.
Definition: TextDataLayer.cs:120

MyCaffe.layers.beta.TextDataLayer.Next
void Next()
Proceeds to the next data item. When shuffling, the next item is randomly selected.
Definition: TextDataLayer.cs:470

MyCaffe.layers.beta.TextDataLayer.Skip
bool Skip()
Skip to the next data input.
Definition: TextDataLayer.cs:457

MyCaffe.layers.beta.TextDataLayer.SupportsPreProcessing
override bool SupportsPreProcessing
Should return true when pre processing methods are overriden.
Definition: TextDataLayer.cs:161

MyCaffe.layers.beta.TextDataLayer.LayerSetUp
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
Definition: TextDataLayer.cs:423

MyCaffe.layers.beta.TextDataLayer.backward
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Not implemented - data Layers do not perform backward..
Definition: TextDataLayer.cs:646

MyCaffe.layers.beta.TextDataLayer.forward
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Run the Forward computation, which fills the data into the top (output) Blobs.
Definition: TextDataLayer.cs:542

MyCaffe.layers.beta.TextDataLayer.MaxBottomBlobs
override int? MaxBottomBlobs
When running in TRAIN or TEST phase, returns 0 for data layers have no bottom (input) Blobs....
Definition: TextDataLayer.cs:88

MyCaffe.layers.beta.TextDataLayer.PostProcessOutput
override List< Tuple< string, int, double > > PostProcessOutput(Blob< T > blobSoftmax, int nK=1)
Convert the maximum index within the softmax into the word index, then convert the word index back in...
Definition: TextDataLayer.cs:376

MyCaffe.layers.beta.TextDataLayer.SupportsPostProcessing
override bool SupportsPostProcessing
Should return true when pre postprocessing methods are overriden.
Definition: TextDataLayer.cs:169

MyCaffe.layers.beta.TextDataLayer.PreProcessInput
override bool PreProcessInput(string strEncInput, int? nDecInput, BlobCollection< T > colBottom)
Preprocess the input data for the RUN phase.
Definition: TextDataLayer.cs:288

MyCaffe.layers.beta.TextDataLayer.MinTopBlobs
override int MinTopBlobs
Returns the minimum number of required top (output) Blobs: dec, dclip, enc, eclip,...
Definition: TextDataLayer.cs:96

MyCaffe.layers.beta.TextDataLayer.PreProcessInputFiles
void PreProcessInputFiles(TextDataParameter p)
Load the input and target files and convert each into a list of lines each containing a list of words...
Definition: TextDataLayer.cs:206

MyCaffe.layers.beta.TextDataLayer.Reshape
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Data layers have no bottoms, so reshaping is trivial.
Definition: TextDataLayer.cs:480

MyCaffe.param.LayerParameter
Specifies the base parameter for all layers.
Definition: LayerParameter.cs:24

MyCaffe.param.LayerParameter.solver_count
int solver_count
Returns the number of Solvers participating in a multi-GPU session for which the Solver using this La...
Definition: LayerParameter.cs:2992

MyCaffe.param.LayerParameter.text_data_param
TextDataParameter text_data_param
Returns the parameter set when initialized with LayerType.TEXT_DATA
Definition: LayerParameter.cs:2849

MyCaffe.param.LayerParameter.solver_rank
int solver_rank
Returns the SolverRank of the Solver using this LayerParameter (if any).
Definition: LayerParameter.cs:3001

MyCaffe.param.LayerParameter.PrepareRunModelInputs
string PrepareRunModelInputs()
Prepare model inputs for the run-net (if any are needed for the layer).
Definition: LayerParameter.cs:739

MyCaffe.param.LayerParameter.transform_param
TransformationParameter transform_param
Returns the parameter set when initialized with LayerType.TRANSFORM
Definition: LayerParameter.cs:2018

MyCaffe.param.LayerParameter.phase
Phase phase
Specifies the Phase for which this LayerParameter is run.
Definition: LayerParameter.cs:1928

MyCaffe.param.LayerParameter.LayerType
LayerType
Specifies the layer type.
Definition: LayerParameter.cs:110

MyCaffe.param.LayerParameter.ToString
override string ToString()
Returns a string representation of the LayerParameter.
Definition: LayerParameter.cs:4636

MyCaffe.param.NetParameter
Specifies the parameters use to create a Net
Definition: NetParameter.cs:18

MyCaffe.param.NetParameter.InputFromProto
static Dictionary< string, BlobShape > InputFromProto(RawProto rp)
Collect the inputs from the RawProto.
Definition: NetParameter.cs:284

MyCaffe.param.TextDataParameter
Specifies the parameter for the Text data layer.
Definition: TextDataParameter.cs:16

MyCaffe.param.TextDataParameter.enable_reverse_encoder_output
bool enable_reverse_encoder_output
When enabled, the reverse ordered encoder data is output (default = true).
Definition: TextDataParameter.cs:158

MyCaffe.param.TextDataParameter.sample_size
uint sample_size
Specifies the sample size to select from the data sources.
Definition: TextDataParameter.cs:128

MyCaffe.param.TextDataParameter.time_steps
uint time_steps
Specifies the maximum length for each encoder input.
Definition: TextDataParameter.cs:118

MyCaffe.param.TextDataParameter.shuffle
bool shuffle
Specifies the whether to shuffle the data or now.
Definition: TextDataParameter.cs:138

MyCaffe.param.TextDataParameter.enable_normal_encoder_output
bool enable_normal_encoder_output
When enabled, the normal ordered encoder data is output (default = true).
Definition: TextDataParameter.cs:148

MyCaffe.param.TextDataParameter.decoder_source
string decoder_source
Specifies the decoder data source.
Definition: TextDataParameter.cs:87

MyCaffe.param.TextDataParameter.batch_size
virtual uint batch_size
Specifies the batch size.
Definition: TextDataParameter.cs:97

MyCaffe.param.TextDataParameter.encoder_source
string encoder_source
Specifies the encoder data source.
Definition: TextDataParameter.cs:77

MyCaffe.basecode
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12

MyCaffe.basecode.Phase
Phase
Defines the Phase under which to run a Net.
Definition: Interfaces.cs:61

MyCaffe.basecode.SPECIAL_TOKENS
SPECIAL_TOKENS
Specifies the special tokens.
Definition: Interfaces.cs:15

MyCaffe.common
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8

MyCaffe.fillers
The MyCaffe.fillers namespace contains all fillers including the Filler class.
Definition: BilinearFiller.cs:10

MyCaffe.layers.beta.TextData
Definition: TextDataLayer.cs:653

MyCaffe.layers.beta
The MyCaffe.layers.beta namespace contains all beta stage layers.
Definition: LayerFactory.cs:9

MyCaffe.layers
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15

MyCaffe.param
The MyCaffe.param namespace contains parameters used to create models.
Definition: AttentionParameter.cs:9

MyCaffe
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12

System
Definition: Component.cs:11