MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
PositionalEncodingLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using MyCaffe.basecode;
7using MyCaffe.common;
8using MyCaffe.param;
9
10namespace MyCaffe.layers.gpt
11{
19 public class PositionalEncodingLayer<T> : Layer<T>
20 {
21 Blob<T> m_blobPosEnc;
22 List<int> m_rgShape = new List<int>() { 1, 1, 1 };
23 double m_dfScale;
24 int m_nBlockSize;
25 int m_nEmbed;
26
35 : base(cuda, log, p)
36 {
37 m_type = LayerParameter.LayerType.POSITIONAL_ENCODER;
38 m_nBlockSize = (int)p.positional_encoder_param.block_size;
39 m_nEmbed = (int)p.positional_encoder_param.embed;
40 m_dfScale = Math.Sqrt(m_nEmbed);
41
42 m_blobPosEnc = new Blob<T>(m_cuda, m_log, false);
43 m_blobPosEnc.Name = p.name + " posenc";
44
46 }
47
51 protected override void dispose()
52 {
53 dispose(ref m_blobPosEnc);
54 base.dispose();
55 }
56
58 protected override void setup_internal_blobs(BlobCollection<T> col)
59 {
60 if (col.Count > 0)
61 return;
62
63 col.Add(m_blobPosEnc);
64 }
65
69 public override int ExactNumBottomBlobs
70 {
71 get { return 1; }
72 }
73
77 public override int ExactNumTopBlobs
78 {
79 get { return 1; }
80 }
81
87 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
88 {
89 }
90
96 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
97 {
98 colTop[0].ReshapeLike(colBottom[0]);
99
100 int nBatch = colBottom[0].num;
101 m_rgShape[0] = nBatch;
102 m_rgShape[1] = m_nBlockSize;
103 m_rgShape[2] = m_nEmbed;
104
105 shareLayerBlob(m_blobPosEnc, m_rgShape);
106 if (!m_blobPosEnc.CompareShape(m_rgShape, true))
107 {
108 m_blobPosEnc.Reshape(m_rgShape);
109 m_blobPosEnc.Reshape(1, m_rgShape[1], m_rgShape[2], 1);
110 int nDim = m_nBlockSize * m_nEmbed;
111
112 if (typeof(T) == typeof(float))
113 {
114 float[] rgPosEnc1 = new float[nDim];
115 for (int pos = 0; pos < m_nBlockSize; pos++)
116 {
117 for (int i = 0; i < m_nEmbed; i++)
118 {
119 int nIdx = pos * m_nEmbed + i;
120 double df1 = 2 * i / (double)m_nEmbed;
121 double dfPow = Math.Pow(10000, df1);
122 double dfPos = pos / dfPow;
123
124 if (i % 2 == 0)
125 {
126 double dfSin = Math.Sin(dfPos);
127 rgPosEnc1[nIdx] = (float)dfSin;
128 }
129 else if (i % 2 == 1)
130 {
131 double dfCos = Math.Cos(dfPos);
132 rgPosEnc1[nIdx] = (float)dfCos;
133 }
134 }
135 }
136
137 m_blobPosEnc.mutable_cpu_data = convert(rgPosEnc1);
138 }
139 else
140 {
141 double[] rgPosEnc1 = new double[nDim];
142 for (int pos = 0; pos < m_nBlockSize; pos++)
143 {
144 for (int i = 0; i < m_nEmbed; i++)
145 {
146 int nIdx = pos * m_nEmbed + i;
147 double df1 = 2 * i / (double)m_nEmbed;
148 double dfPow = Math.Pow(10000, df1);
149 double dfPos = pos / dfPow;
150
151 if (i % 2 == 0)
152 {
153 double dfSin = Math.Sin(dfPos);
154 rgPosEnc1[nIdx] = dfSin;
155 }
156 else if (i % 2 == 1)
157 {
158 double dfCos = Math.Cos(dfPos);
159 rgPosEnc1[nIdx] = dfCos;
160 }
161 }
162 }
163
164 m_blobPosEnc.mutable_cpu_data = convert(rgPosEnc1);
165 }
166
167 if (nBatch > 1)
168 {
169 m_blobPosEnc.Reshape(m_rgShape);
170
171 for (int i = 1; i < nBatch; i++)
172 {
173 m_cuda.copy(nDim, m_blobPosEnc.gpu_data, m_blobPosEnc.mutable_gpu_data, 0, i * nDim);
174 }
175 }
176 }
177 }
178
192 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
193 {
194 long hBottomData = colBottom[0].gpu_data;
195 long hTopData = colTop[0].mutable_gpu_data;
196 int nCount = colBottom[0].count();
197
198 m_cuda.add(nCount, m_blobPosEnc.gpu_data, hBottomData, hTopData, m_dfScale);
199 }
200
216 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
217 {
218 long hTopDiff = colTop[0].gpu_diff;
219 long hBottomDiff = colBottom[0].mutable_gpu_diff;
220 int nCount = colBottom[0].count();
221
222 m_cuda.scale(nCount, m_dfScale, hTopDiff, hBottomDiff);
223 }
224 }
225}
The Log class provides general output in text form.
Definition: Log.cs:13
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void ReshapeLike(BlobCollection< T > src)
Reshapes all blobs in the collection to the sizes of the source.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
void convert(BlobCollection< T > col)
Convert a collection of blobs from / to half size.
Definition: Layer.cs:535
bool shareLayerBlob(Blob< T > b, List< int > rgMinShape)
Attempts to share a Layer Blob if another parameter Blob with the same name and acceptable size is fo...
Definition: Layer.cs:1170
BlobCollection< T > m_colInternalBlobs
Specifies internal blobs used by the layer.
Definition: Layer.cs:59
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
The PositionalEncodingLayer is a neuron layer that adds positional encoding to the input.
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
override int ExactNumBottomBlobs
Returns the exact number of required bottom (input) Blobs: embed
override void dispose()
Release any resources used.
PositionalEncodingLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The PositionalEncoderLayer constructor.
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: embed
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the data as needed by the layer.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t. the PositionalEncoder value inputs.
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
PositionalEncoderParameter positional_encoder_param
Returns the parameter set when initialized with LayerType.POSITIONAL_ENCODER
LayerType
Specifies the layer type.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe.layers.gpt namespace contains all GPT related layers.
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12