MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
GluLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Reflection;
6using System.Text;
7using MyCaffe.basecode;
8using MyCaffe.common;
9using MyCaffe.param;
10
11namespace MyCaffe.layers.tft
12{
29 public class GluLayer<T> : Layer<T>
30 {
31 Layer<T> m_ip1Layer;
32 Layer<T> m_ip2Layer;
33 Layer<T> m_modLayer;
34 Blob<T> m_blobIp1;
35 Blob<T> m_blobIp2;
36 Blob<T> m_blobMod;
37 Blob<T> m_blobBtm;
38 BlobCollection<T> m_colTop = new BlobCollection<T>();
39 BlobCollection<T> m_colBtm = new BlobCollection<T>();
40
48 : base(cuda, log, p)
49 {
51
52 m_blobIp1 = new Blob<T>(cuda, log);
53 m_blobIp1.Name = p.name + ".ip1";
54 m_blobIp2 = new Blob<T>(cuda, log);
55 m_blobIp2.Name = p.name + ".ip2";
56 m_blobMod = new Blob<T>(cuda, log);
57 m_blobMod.Name = p.name + ".mod";
58 m_blobBtm = new Blob<T>(cuda, log);
59 m_blobBtm.Name = p.name + ".btm";
60 }
61
63 protected override void dispose()
64 {
65 dispose(ref m_blobIp1);
66 dispose(ref m_blobIp2);
67 dispose(ref m_blobMod);
68 dispose(ref m_blobBtm);
69
70 dispose(ref m_ip1Layer);
71 dispose(ref m_ip2Layer);
72 dispose(ref m_modLayer);
73 }
74
76 protected override void setup_internal_blobs(BlobCollection<T> col)
77 {
78 if (col.Count > 0)
79 return;
80
81 col.Add(m_blobIp1);
82 col.Add(m_blobIp2);
83 col.Add(m_blobMod);
84 col.Add(m_blobBtm);
85 }
86
90 public override int ExactNumBottomBlobs
91 {
92 get { return 1; }
93 }
94
98 public override int ExactNumTopBlobs
99 {
100 get { return 1; }
101 }
102
103 private void addBtmTop(Blob<T> btm, Blob<T> top)
104 {
105 m_colBtm.Clear();
106 m_colBtm.Add(btm);
107 m_colTop.Clear();
108 m_colTop.Add(top);
109 }
110
116 public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)
117 {
118 m_blobBtm.ReshapeLike(colBottom[0]);
119
120 if (m_ip1Layer == null)
121 {
122 LayerParameter ip1 = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT, m_param.name + ".fc1");
123 ip1.inner_product_param.num_output = (uint)m_param.glu_param.input_dim;
130
131 m_ip1Layer = Layer<T>.Create(m_cuda, m_log, convertLayerParam(ip1, m_param), null);
132
133 addBtmTop(colBottom[0], m_blobIp1);
134 m_ip1Layer.Setup(m_colBtm, m_colTop);
135 blobs.Add(m_ip1Layer.blobs);
136 }
137
138 if (m_modLayer == null)
139 {
140 if (m_param.glu_param.modulation == param.tft.GluParameter.MODULATION.SIGMOID)
141 {
144
145 m_modLayer = Layer<T>.Create(m_cuda, m_log, convertLayerParam(mod, m_param), null);
146
147 addBtmTop(m_blobIp1, m_blobMod);
148 m_modLayer.Setup(m_colBtm, m_colTop);
149 }
150 else
151 {
152 m_log.FAIL("Unknown modulation type '" + m_param.glu_param.modulation.ToString() + "'");
153 }
154 }
155
156 if (m_ip2Layer == null)
157 {
158 LayerParameter ip2 = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT, m_param.name + ".fc2");
159 ip2.inner_product_param.num_output = (uint)m_param.glu_param.input_dim;
166
167 m_ip2Layer = Layer<T>.Create(m_cuda, m_log, convertLayerParam(ip2, m_param), null);
168
169 addBtmTop(colBottom[0], m_blobIp2);
170 m_ip2Layer.Setup(m_colBtm, m_colTop);
171 blobs.Add(m_ip2Layer.blobs);
172
173 colTop[0].ReshapeLike(m_blobIp2);
174 }
175 }
176
182 public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)
183 {
184 m_blobBtm.ReshapeLike(colBottom[0]);
185
186 addBtmTop(colBottom[0], m_blobIp1);
187 m_ip1Layer.Reshape(m_colBtm, m_colTop);
188
189 addBtmTop(m_blobIp1, m_blobMod);
190 m_modLayer.Reshape(m_colBtm, m_colTop);
191
192 addBtmTop(colBottom[0], m_blobIp2);
193 m_ip2Layer.Reshape(m_colBtm, m_colTop);
194
195 colTop[0].ReshapeLike(m_blobIp2);
196 }
197
209 protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)
210 {
211 m_blobBtm.CopyFrom(colBottom[0]);
212
213 addBtmTop(colBottom[0], m_blobIp1);
214 m_ip1Layer.Forward(m_colBtm, m_colTop); // x1 = fc1(x)
215
216 addBtmTop(m_blobIp1, m_blobMod);
217 m_modLayer.Forward(m_colBtm, m_colTop); // sig = sigmoid(x1)
218
219 addBtmTop(colBottom[0], m_blobIp2);
220 m_ip2Layer.Forward(m_colBtm, m_colTop); // x2 = fc2(x)
221
222 m_cuda.mul(colTop[0].count(), m_blobIp2.gpu_data, m_blobMod.gpu_data, colTop[0].mutable_gpu_data);
223 }
224
239 protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)
240 {
241 // sig grad = y grad * x2
242 m_cuda.mul(colTop[0].count(), colTop[0].gpu_diff, m_blobIp2.gpu_data, m_blobMod.mutable_gpu_diff);
243
244 // x2 grad = y grad * sig
245 m_cuda.mul(colTop[0].count(), colTop[0].gpu_diff, m_blobMod.gpu_data, m_blobIp2.mutable_gpu_diff);
246
247 addBtmTop(m_blobBtm, m_blobIp2);
248 m_ip2Layer.Backward(m_colTop, rgbPropagateDown, m_colBtm);
249
250 addBtmTop(m_blobIp1, m_blobMod);
251 m_modLayer.Backward(m_colTop, rgbPropagateDown, m_colBtm);
252
253 addBtmTop(colBottom[0], m_blobIp1);
254 m_ip1Layer.Backward(m_colTop, rgbPropagateDown, m_colBtm);
255
256 // Add gradients from x1 and x2
257 m_cuda.add(colBottom[0].count(), m_blobBtm.gpu_diff, colBottom[0].gpu_diff, colBottom[0].mutable_gpu_diff);
258 }
259 }
260}
The Log class provides general output in text form.
Definition: Log.cs:13
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
Definition: Log.cs:394
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void Clear(bool bDispose=false)
Remove all items from the collection.
void ReshapeLike(BlobCollection< T > src)
Reshapes all blobs in the collection to the sizes of the source.
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
Definition: Blob.cs:903
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1541
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
void Backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Given the top Blob error gradients, compute the bottom Blob error gradients.
Definition: Layer.cs:815
double Forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Given the bottom (input) Blobs, this function computes the top (output) Blobs and the loss.
Definition: Layer.cs:728
abstract void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Adjust the shapes of top blobs and internal buffers to accomodate the shapes of the bottom blobs.
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39
void Setup(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Implements common Layer setup functionality.
Definition: Layer.cs:439
static Layer< T > Create(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel, IXDatabaseBase db=null, TransferInput trxinput=null)
Create a new Layer based on the LayerParameter.
Definition: Layer.cs:1468
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
BlobCollection< T > blobs
Returns the collection of learnable parameter Blobs for the Layer.
Definition: Layer.cs:875
LayerParameter convertLayerParam(LayerParameter pChild, LayerParameter pParent)
Called to convert a parent LayerParameterEx, used in blob sharing, with a child layer parameter.
Definition: Layer.cs:1134
The GluLayer implements the Gated Linear Unit layer.
Definition: GluLayer.cs:30
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation
Definition: GluLayer.cs:209
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
Definition: GluLayer.cs:116
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the top (output) blobs.
Definition: GluLayer.cs:182
override int ExactNumBottomBlobs
Returns the exact number of required bottom (input) Blobs: x
Definition: GluLayer.cs:91
override void dispose()
Releases all GPU and host resources used by the Layer.
Definition: GluLayer.cs:63
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: y
Definition: GluLayer.cs:99
GluLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The constructor.
Definition: GluLayer.cs:47
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
Definition: GluLayer.cs:76
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t. the stacked embedding numeric and categorical value inputs.
Definition: GluLayer.cs:239
Specifies whether to use the NVIDIA cuDnn version or Caffe version of a given forward/backward operat...
Engine engine
Specifies the Engine in use.
Engine
Defines the type of engine to use.
double sigma_init
Specifies the initialization value for the sigma weight and sigma bias used when 'enable_noise' = tru...
FillerParameter weight_filler
The filler for the weights.
int axis
Specifies the first axis to be lumped into a single inner product computation; all preceding axes are...
bool enable_noise
Enable/disable noise in the inner-product layer (default = false).
FillerParameter bias_filler
The filler for the bias.
uint num_output
The number of outputs for the layer.
bool bias_term
Whether to have bias terms or not.
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
SigmoidParameter sigmoid_param
Returns the parameter set when initialized with LayerType.SIGMOID
GluParameter glu_param
Returns the parameter set when initialized with LayerType.GLU
InnerProductParameter inner_product_param
Returns the parameter set when initialized with LayerType.INNERPRODUCT
LayerType
Specifies the layer type.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe.layers.tft namespace contains all TFT related layers.
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12