MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
GateAddNormLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading;
7using MyCaffe.basecode;
8using MyCaffe.common;
9using MyCaffe.param;
10
11namespace MyCaffe.layers.tft
12{
28 public class GateAddNormLayer<T> : Layer<T>
29 {
30 int m_nBlocks;
31 Layer<T> m_dropout = null;
32 Layer<T> m_gate = null;
33 Layer<T> m_layerNorm = null;
34 Blob<T> m_blobResidual = null;
35 Blob<T> m_blobDrop = null;
36 Blob<T> m_blobGate = null;
37 Blob<T> m_blobGateAddResidual = null;
38 BlobCollection<T> m_colTop = new BlobCollection<T>();
39 BlobCollection<T> m_colBtm = new BlobCollection<T>();
40 List<int> m_rgShape = new List<int>(4);
41
49 : base(cuda, log, p)
50 {
51 m_type = LayerParameter.LayerType.GATEADDNORM;
52
54 {
55 m_blobDrop = new Blob<T>(cuda, log);
56 m_blobDrop.Name = p.name + ".drop";
57 }
58
59 m_blobResidual = new Blob<T>(cuda, log);
60 m_blobResidual.Name = p.name + ".residual";
61 m_blobGate = new Blob<T>(cuda, log);
62 m_blobGate.Name = p.name + ".gate";
63 m_blobGateAddResidual = new Blob<T>(cuda, log);
64 m_blobGateAddResidual.Name = p.name + ".gateres";
65 }
66
68 protected override void dispose()
69 {
70 dispose(ref m_blobResidual);
71 dispose(ref m_blobGate);
72 dispose(ref m_blobGateAddResidual);
73 dispose(ref m_blobDrop);
74
75 dispose(ref m_dropout);
76 dispose(ref m_gate);
77 dispose(ref m_layerNorm);
78 }
79
81 protected override void setup_internal_blobs(BlobCollection<T> col)
82 {
83 if (col.Count > 0)
84 return;
85
86 if (m_blobDrop != null)
87 col.Add(m_blobDrop);
88 col.Add(m_blobGate);
89 col.Add(m_blobResidual);
90 }
91
95 public override int MinBottomBlobs
96 {
97 get { return 1; }
98 }
99
103 public override int MaxBottomBlobs
104 {
105 get { return 2; }
106 }
107
111 public override int ExactNumTopBlobs
112 {
113 get { return 1; }
114 }
115
116 private void addBtmTop(Blob<T> btm, Blob<T> top)
117 {
118 m_colBtm.Clear();
119 m_colBtm.Add(btm);
120 m_colTop.Clear();
121 m_colTop.Add(top);
122 }
123
129 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
130 {
132 Blob<T> blobBtm = colBottom[0];
133
134 if (colBottom.Count > 1)
135 {
136 if (m_param.gateaddnorm_param.residual_channel_offset > 0)
137 {
138 int nDiff = colBottom[1].channels - m_param.gateaddnorm_param.residual_channel_offset;
139 if (colBottom[1].channels % nDiff != 0)
140 m_log.FAIL("The number bottom(1).channels must be divisible by the bottom(1).channels - the residual channel offset. For example if bottom(1).channels = 120 and redidual_channel_offset = 90, the difference = 30 which is a factor of both 120 and 90.");
141 }
142 }
143
145 {
146 if (m_dropout == null)
147 {
148 p = new LayerParameter(LayerParameter.LayerType.DROPOUT, m_param.name + ".drop");
150 m_dropout = Layer<T>.Create(m_cuda, m_log, convertLayerParam(p, m_param), null);
151
152 addBtmTop(colBottom[0], m_blobDrop);
153 m_dropout.Setup(m_colBtm, m_colTop);
154 }
155 blobBtm = m_blobDrop;
156 }
157
158 if (m_gate == null)
159 {
160 p = new LayerParameter(LayerParameter.LayerType.GLU, m_param.name + ".glu");
162 m_gate = Layer<T>.Create(m_cuda, m_log, convertLayerParam(p, m_param), null);
163
164 addBtmTop(blobBtm, m_blobGate);
165 m_gate.Setup(m_colBtm, m_colTop);
166 blobs.Add(m_gate.blobs);
167 }
168 m_blobGateAddResidual.ReshapeLike(m_blobGate);
169
170 if (m_layerNorm == null)
171 {
172 p = new LayerParameter(LayerParameter.LayerType.LAYERNORM, m_param.name + ".layernorm");
174 m_layerNorm = Layer<T>.Create(m_cuda, m_log, convertLayerParam(p, m_param), null);
175 addBtmTop(m_blobGate, colTop[0]);
176 m_layerNorm.Setup(m_colBtm, m_colTop);
177 }
178
180 }
181
187 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
188 {
189 Blob<T> blobBtm = colBottom[0];
190
191 if (colBottom.Count > 1)
192 {
193 if (m_param.gateaddnorm_param.residual_channel_offset > 0)
194 {
195 int nDiff = colBottom[1].channels - m_param.gateaddnorm_param.residual_channel_offset;
196 m_log.CHECK_EQ(colBottom[1].channels % nDiff, 0, "The bottom(1).channels must be divisible by the bottom(1).channels - residual_channel_offset!");
197 m_nBlocks = colBottom[1].channels / nDiff;
198
199 int nQTimeSteps = nDiff;
200 m_rgShape.Clear();
201 m_rgShape.Add(colBottom[0].num);
202 m_rgShape.Add(nQTimeSteps);
203 m_rgShape.Add(colBottom[0].count(2));
204 m_blobResidual.Reshape(m_rgShape);
205 }
206 else
207 {
208 m_blobResidual.ReshapeLike(colBottom[1]);
209 }
210 }
211
212 if (m_dropout != null)
213 {
214 addBtmTop(colBottom[0], m_blobDrop);
215 m_dropout.Reshape(m_colBtm, m_colTop);
216 blobBtm = m_blobDrop;
217 }
218
219 addBtmTop(blobBtm, m_blobGate);
220 m_gate.Reshape(m_colBtm, m_colTop);
221 m_blobGateAddResidual.ReshapeLike(m_blobGate);
222
223 addBtmTop(m_blobGate, colTop[0]);
224 m_layerNorm.Reshape(m_colBtm, m_colTop);
225 }
226
227 private void copy_to_fwd(BlobCollection<T> colBtm, int nIdx, Blob<T> bTop)
228 {
229 if (nIdx >= colBtm.Count)
230 return;
231
232 Blob<T> bBtm = colBtm[nIdx];
233
234 if (m_param.gateaddnorm_param.residual_channel_offset > 0)
235 {
236 // Copy just the future items to the top, so if future = 30,
237 // with input shape is btm(256,120,64) just the last (256,30,64) are copied to top
238 int nOuterNum = bBtm.num;
239 int nChannels = m_nBlocks;
240 int nInnerNum = (bBtm.channels / m_nBlocks) * bBtm.count(2);
241 m_cuda.channel_copy(bTop.count(), nOuterNum, nChannels, m_nBlocks, nInnerNum, m_nBlocks-1, bBtm.gpu_data, bTop.mutable_gpu_data, DIR.FWD);
242 }
243 else
244 {
245 bTop.CopyFrom(bBtm);
246 }
247 }
248
249 private void copy_to_bwd(BlobCollection<T> colBtm, int nIdx, Blob<T> bTop)
250 {
251 if (nIdx >= colBtm.Count)
252 return;
253
254 Blob<T> bBtm = colBtm[nIdx];
255
256 if (m_param.gateaddnorm_param.residual_channel_offset > 0)
257 {
258 // Copy just the future items to the top, so if future = 30,
259 // with input shape is btm(256,120,64) just the last (256,30,64) are copied to top
260 int nOuterNum = bBtm.num;
261 int nChannels = m_nBlocks;
262 int nInnerNum = (bBtm.channels / m_nBlocks) * bBtm.count(2);
263 m_cuda.channel_copy(bTop.count(), nOuterNum, nChannels, m_nBlocks, nInnerNum, m_nBlocks - 1, bBtm.mutable_gpu_diff, bTop.gpu_diff, DIR.BWD);
264 }
265 else
266 {
267 bBtm.CopyFrom(bTop, true);
268 }
269 }
270
271 private void add_to_bwd(BlobCollection<T> colBtm, int nIdx, Blob<T> bTop)
272 {
273 if (nIdx >= colBtm.Count)
274 return;
275
276 Blob<T> bBtm = colBtm[nIdx];
277
278 if (m_param.gateaddnorm_param.residual_channel_offset > 0)
279 {
280 // Copy just the future items to the top, so if future = 30,
281 // with input shape is btm(256,120,64) just the last (256,30,64) are copied to top
282 int nOuterNum = bBtm.num;
283 int nChannels = m_nBlocks;
284 int nInnerNum = (bBtm.channels / m_nBlocks) * bBtm.count(2);
285 m_cuda.channel_add(bTop.count(), nOuterNum, nChannels, m_nBlocks, nInnerNum, m_nBlocks - 1, bBtm.mutable_gpu_diff, bTop.gpu_diff, DIR.BWD);
286 }
287 else
288 {
289 m_cuda.add(bTop.count(), bTop.gpu_diff, bBtm.gpu_diff, bBtm.mutable_gpu_diff);
290 }
291 }
292
304 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
305 {
306 Blob<T> blobBtm = colBottom[0];
307 copy_to_fwd(colBottom, 1, m_blobResidual);
308
309 if (m_dropout != null)
310 {
311 addBtmTop(colBottom[0], m_blobDrop);
312 m_dropout.Forward(m_colBtm, m_colTop);
313 blobBtm = m_blobDrop;
314 }
315
316 addBtmTop(blobBtm, m_blobGate);
317 m_gate.Forward(m_colBtm, m_colTop);
318
319 if (colBottom.Count > 1)
320 m_cuda.add(m_blobGateAddResidual.count(), m_blobGate.gpu_data, m_blobResidual.gpu_data, m_blobGateAddResidual.mutable_gpu_data);
321 else
322 m_blobGateAddResidual.CopyFrom(m_blobGate);
323
324 addBtmTop(m_blobGateAddResidual, colTop[0]);
325 m_layerNorm.Forward(m_colBtm, m_colTop);
326
327 colTop[0].ReshapeLike(m_blobGate);
328 }
329
344 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
345 {
346 addBtmTop(m_blobGateAddResidual, colTop[0]);
347 m_layerNorm.Backward(m_colTop, rgbPropagateDown, m_colBtm);
348
349 // Copy grad to the residual if it exists.
350 copy_to_bwd(colBottom, 1, m_blobGateAddResidual);
351 m_blobGate.CopyFrom(m_blobGateAddResidual, true);
352 if (colBottom.Count > 1)
353 m_blobResidual.CopyFrom(m_blobGateAddResidual, true);
354
355 addBtmTop(colBottom[0], m_blobGate);
356 m_gate.Backward(m_colTop, rgbPropagateDown, m_colBtm);
357
358 if (m_dropout != null)
359 {
360 addBtmTop(m_blobDrop, colBottom[0]);
361 m_dropout.Backward(m_colTop, rgbPropagateDown, m_colBtm);
362 colBottom[0].CopyFrom(m_blobDrop, true);
363 }
364 }
365 }
366}
The Log class provides general output in text form.
Definition: Log.cs:13
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
Definition: Log.cs:394
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void Clear(bool bDispose=false)
Remove all items from the collection.
void ReshapeLike(BlobCollection< T > src)
Reshapes all blobs in the collection to the sizes of the source.
void CopyFrom(BlobCollection< T > bSrc, bool bCopyDiff=false)
Copy the data or diff from another BlobCollection into this one.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
Definition: Blob.cs:800
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
Definition: Blob.cs:903
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1541
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
Definition: Blob.cs:792
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
void Backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Given the top Blob error gradients, compute the bottom Blob error gradients.
Definition: Layer.cs:815
double Forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Given the bottom (input) Blobs, this function computes the top (output) Blobs and the loss.
Definition: Layer.cs:728
abstract void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Adjust the shapes of top blobs and internal buffers to accomodate the shapes of the bottom blobs.
BlobCollection< T > m_colInternalBlobs
Specifies internal blobs used by the layer.
Definition: Layer.cs:59
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
void Setup(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Implements common Layer setup functionality.
Definition: Layer.cs:439
static Layer< T > Create(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel, IXDatabaseBase db=null, TransferInput trxinput=null)
Create a new Layer based on the LayerParameter.
Definition: Layer.cs:1468
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
BlobCollection< T > blobs
Returns the collection of learnable parameter Blobs for the Layer.
Definition: Layer.cs:875
LayerParameter convertLayerParam(LayerParameter pChild, LayerParameter pParent)
Called to convert a parent LayerParameterEx, used in blob sharing, with a child layer parameter.
Definition: Layer.cs:1134
The GateAddNormLayer implements the Dropout, Gated Linear Unit layer, LayerNorm while adding in the r...
override int MaxBottomBlobs
Returns the max number of required bottom (input) Blobs: x, residual
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the top (output) blobs.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation
GateAddNormLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The constructor.
override int MinBottomBlobs
Returns the min number of required bottom (input) Blobs: x
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t. the stacked embedding numeric and categorical value inputs.
override void dispose()
Releases all GPU and host resources used by the Layer.
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: y
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
double dropout_ratio
Specifies the dropout ratio. (e.g. the probability that values will be dropped out and set to zero....
override void Copy(LayerParameterBase src)
Copy on parameter to another.
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
LayerNormParameter layer_norm_param
Returns the parameter set when initialized with LayerType.LAYERNORM
GluParameter glu_param
Returns the parameter set when initialized with LayerType.GLU
GateAddNormParameter gateaddnorm_param
Returns the parameter set when initialized with LayerType.GLU
LayerType
Specifies the layer type.
DropoutParameter dropout_param
Returns the parameter set when initialized with LayerType.DROPOUT
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
DIR
Defines the direction of data flow.
Definition: CudaDnn.cs:22
The MyCaffe.layers.tft namespace contains all TFT related layers.
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12