MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
Interfaces.cs
1using MyCaffe.basecode;
2using System;
3using System.Collections.Generic;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7
8namespace MyCaffe.layers.gpt
9{
13 public interface IVocabulary
14 {
18 int Count { get; }
22 char BOS { get; }
26 char EOS { get; }
31 void Add(string str);
38 int Build();
44 int BuildFromString(string strData);
50 int[] CreateTarget(int[] rgSrc);
58 int[] Tokenize(string str, bool bAddBos, bool bAddEos);
65 List<int> Tokenize(string str1, bool bMustExist = true);
73 string Detokenize(float[] rgf, bool bIgnoreBos, bool bIgnoreEos);
81 string Detokenize(int nIdxToken, bool bIgnoreBos, bool bIgnoreEos);
82 }
83
87 public interface ICustomTokenInput
88 {
97 List<Tuple<DateTime, int[], int[]>> LoadAllEncoderTokens(CancelEvent evtCancel, Log log, Phase phase, out int nVocabSize);
106 List<Tuple<DateTime, int[], int[]>> LoadAllDecoderTokens(CancelEvent evtCancel, Log log, Phase phase, out int nVocabSize);
107 }
108
112 public abstract class InputData
113 {
117 protected Random m_random;
118
123 public InputData(int? nRandomSeed = null)
124 {
125 if (nRandomSeed.HasValue)
126 m_random = new Random(nRandomSeed.Value);
127 else
128 m_random = new Random();
129 }
130
134 public abstract List<string> RawData { get; }
138 public abstract uint TokenSize { get; }
142 public abstract uint VocabularySize { get; }
150 public abstract bool GetDataAvailabilityAt(int nIdx, bool bIncludeSrc, bool bIncludeTrg);
159 public abstract Tuple<float[], float[]> GetData(int nBatchSize, int nBlockSize, InputData trgData, out int[] rgnIdx);
167 public abstract Tuple<float[], float[]> GetDataAt(int nBatchSize, int nBlockSize, int[] rgnIdx);
175 public abstract List<int> Tokenize(string str, bool bAddBos, bool bAddEos);
183 public abstract string Detokenize(int nTokIdx, bool bIgnoreBos, bool bIgnoreEos);
193 public abstract string Detokenize(float[] rgf, int nStartIdx, int nCount, bool bIgnoreBos, bool bIgnoreEos);
197 public abstract char BOS { get; }
201 public abstract char EOS { get; }
202 }
203}
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
Definition: CancelEvent.cs:17
The Log class provides general output in text form.
Definition: Log.cs:13
The InputData is an abstract class used to get training data and tokenize input data.
Definition: Interfaces.cs:113
abstract uint TokenSize
Returns the size of a single token (e.g. 1 for character data)
Definition: Interfaces.cs:138
abstract string Detokenize(float[] rgf, int nStartIdx, int nCount, bool bIgnoreBos, bool bIgnoreEos)
Detokenize an array into a string.
abstract uint VocabularySize
Returns the size of the vocabulary.
Definition: Interfaces.cs:142
abstract List< string > RawData
Returns the raw data.
Definition: Interfaces.cs:134
abstract Tuple< float[], float[]> GetData(int nBatchSize, int nBlockSize, InputData trgData, out int[] rgnIdx)
Gets a set of randomly selected source/target data, where the target may be null.
abstract char EOS
Return the special end of sequence character.
Definition: Interfaces.cs:201
abstract char BOS
Return the special begin of sequence character.
Definition: Interfaces.cs:197
abstract Tuple< float[], float[]> GetDataAt(int nBatchSize, int nBlockSize, int[] rgnIdx)
Gets a set of source/target data from a specific index.
abstract List< int > Tokenize(string str, bool bAddBos, bool bAddEos)
Tokenize an input string using the internal vocabulary.
abstract bool GetDataAvailabilityAt(int nIdx, bool bIncludeSrc, bool bIncludeTrg)
Returns true if data is available at the given index.
Random m_random
Specifies the random object made available to the derived classes.
Definition: Interfaces.cs:117
abstract string Detokenize(int nTokIdx, bool bIgnoreBos, bool bIgnoreEos)
Detokenize a single token.
InputData(int? nRandomSeed=null)
The constructor.
Definition: Interfaces.cs:123
The ICustomTokenInput interface specifies the interface that all custom token inputs implement.
Definition: Interfaces.cs:88
List< Tuple< DateTime, int[], int[]> > LoadAllEncoderTokens(CancelEvent evtCancel, Log log, Phase phase, out int nVocabSize)
Load all encoder tokens and their associated date/time. evtCancel Specifies the cancel event....
List< Tuple< DateTime, int[], int[]> > LoadAllDecoderTokens(CancelEvent evtCancel, Log log, Phase phase, out int nVocabSize)
Load all decoder tokens and their associated date/time. evtCancel Specifies the cancel event....
The IVocabulary interface specifies the interface that all Vocabularies implement.
Definition: Interfaces.cs:14
char EOS
Returns the special EOS character.
Definition: Interfaces.cs:26
int Build()
Build the vocabulary.
int[] CreateTarget(int[] rgSrc)
Create a target that is offset from the source by one and ends with a EOS.
int[] Tokenize(string str, bool bAddBos, bool bAddEos)
Tokenize a string of data.
int BuildFromString(string strData)
Build the vocabulary from a string.
char BOS
Returns the special BOS character.
Definition: Interfaces.cs:22
int Count
Returns the size of the vocabulary.
Definition: Interfaces.cs:18
List< int > Tokenize(string str1, bool bMustExist=true)
Tokenize a character into its corresponding index token.
string Detokenize(int nIdxToken, bool bIgnoreBos, bool bIgnoreEos)
Detokenize an index token into its corresponding character.
string Detokenize(float[] rgf, bool bIgnoreBos, bool bIgnoreEos)
Detokenize an array into a string.
void Add(string str)
Add a new string to the vocabulary.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
Phase
Defines the Phase under which to run a Net.
Definition: Interfaces.cs:61
The MyCaffe.layers.gpt namespace contains all GPT related layers.
Definition: LayerFactory.cs:15
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12