MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
LSTMLayer.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading.Tasks;
6using MyCaffe.basecode;
7using MyCaffe.db.image;
8using MyCaffe.common;
9using MyCaffe.param;
10
11namespace MyCaffe.layers
12{
58 public class LSTMLayer<T> : RecurrentLayer<T>
59 {
80 public LSTMLayer(CudaDnn<T> cuda, Log log, LayerParameter p, CancelEvent evtCancel)
81 : base(cuda, log, p, evtCancel)
82 {
84 }
85
90 protected override void RecurrentInputBlobNames(List<string> rgNames)
91 {
92 rgNames.Clear();
93 rgNames.Add("h_0");
94 rgNames.Add("c_0");
95 }
96
101 protected override void RecurrentOutputBlobNames(List<string> rgNames)
102 {
103 rgNames.Clear();
104 rgNames.Add("h_" + m_nT.ToString());
105 rgNames.Add("c_T");
106 }
107
112 protected override void RecurrentInputShapes(List<BlobShape> rgShapes)
113 {
114 int nNumBlobs = 2;
115
116 rgShapes.Clear();
117
118 for (int i = 0; i < nNumBlobs; i++)
119 {
120 BlobShape s = new param.BlobShape();
121 s.dim.Add(1); // a single timestep
122 s.dim.Add(m_nN);
124 rgShapes.Add(s);
125 }
126 }
127
133 protected override void OutputBlobNames(List<string> rgNames)
134 {
135 rgNames.Clear();
136 rgNames.Add("h");
137 }
138
143 protected override void FillUnrolledNet(NetParameter net_param)
144 {
145 uint nNumOutput = m_param.recurrent_param.num_output;
146 m_log.CHECK_GT(nNumOutput, 0, "num_output must be positive.");
149
150 // Add generic LayerParameter's (without bottoms/tops) of layer types we'll
151 // use to save redundant code.
152 LayerParameter hidden_param = new param.LayerParameter(LayerParameter.LayerType.INNERPRODUCT);
153 hidden_param.inner_product_param.num_output = nNumOutput * 4;
154 hidden_param.inner_product_param.bias_term = false;
155 hidden_param.inner_product_param.axis = 2;
156 hidden_param.inner_product_param.weight_filler = weight_filler.Clone();
157
158 LayerParameter biased_hidden_param = hidden_param.Clone(false);
159 biased_hidden_param.inner_product_param.bias_term = true;
160 biased_hidden_param.inner_product_param.bias_filler = bias_filler.Clone();
161
162 LayerParameter sum_param = new param.LayerParameter(LayerParameter.LayerType.ELTWISE);
164
165 LayerParameter scale_param = new LayerParameter(LayerParameter.LayerType.SCALE);
166 scale_param.scale_param.axis = 0;
167
168 LayerParameter slice_param = new LayerParameter(LayerParameter.LayerType.SLICE);
169 slice_param.slice_param.axis = 0;
170
171 LayerParameter split_param = new LayerParameter(LayerParameter.LayerType.SPLIT);
172
173 List<BlobShape> rgInputShapes = new List<BlobShape>();
174 RecurrentInputShapes(rgInputShapes);
175 m_log.CHECK_EQ(2, rgInputShapes.Count, "There should be 2 input shapes.");
176
177
178 //--- Add the layers ---
179
180 LayerParameter input_layer_param = new LayerParameter(LayerParameter.LayerType.INPUT);
181 input_layer_param.top.Add("c_0");
182 input_layer_param.input_param.shape.Add(rgInputShapes[0].Clone());
183 input_layer_param.top.Add("h_0");
184 input_layer_param.input_param.shape.Add(rgInputShapes[1].Clone());
185 net_param.layer.Add(input_layer_param);
186
187 LayerParameter cont_slice_param = slice_param.Clone(false);
188 cont_slice_param.name = "cont_slice";
189 cont_slice_param.bottom.Add("cont");
190 cont_slice_param.slice_param.axis = 0;
191 net_param.layer.Add(cont_slice_param);
192
193 // Add layer to transform all timesteps of x to the hidden state dimension.
194 // W_xc_x = W_xc * x + b_c
195 {
196 LayerParameter x_transform_param = biased_hidden_param.Clone(false);
197 x_transform_param.name = "x_transform";
198 x_transform_param.parameters.Add(new ParamSpec("W_xc"));
199 x_transform_param.parameters.Add(new ParamSpec("b_c"));
200 x_transform_param.bottom.Add("x");
201 x_transform_param.top.Add("W_xc_x");
202 x_transform_param.propagate_down.Add(true);
203 net_param.layer.Add(x_transform_param);
204 }
205
206 if (m_bStaticInput)
207 {
208 // Add layer to transform x_static to the hidden state dimension.
209 // W_xc_x_static = W_xc_static * x_static
210 LayerParameter x_static_transform_param = hidden_param.Clone(false);
211 x_static_transform_param.inner_product_param.axis = 1;
212 x_static_transform_param.name = "W_xc_x_static";
213 x_static_transform_param.parameters.Add(new ParamSpec("W_xc_static"));
214 x_static_transform_param.bottom.Add("x_static");
215 x_static_transform_param.top.Add("W_xc_x_static_preshape");
216 x_static_transform_param.propagate_down.Add(true);
217 net_param.layer.Add(x_static_transform_param);
218
219 LayerParameter reshape_param = new LayerParameter(LayerParameter.LayerType.RESHAPE);
220 BlobShape new_shape = reshape_param.reshape_param.shape;
221 new_shape.dim.Add(1); // One timestep.
222 new_shape.dim.Add(-1); // Should infer m_nN as the dimension so we can reshape on batch size.
223 new_shape.dim.Add((int)x_static_transform_param.inner_product_param.num_output);
224 reshape_param.name = "W_xc_x_static_reshape";
225 reshape_param.bottom.Add("W_xc_x_static_preshape");
226 reshape_param.top.Add("W_xc_x_static");
227 net_param.layer.Add(reshape_param);
228 }
229
230 LayerParameter x_slice_param = slice_param.Clone(false);
231 x_slice_param.name = "W_xc_x_slice";
232 x_slice_param.bottom.Add("W_xc_x");
233 net_param.layer.Add(x_slice_param);
234
235 LayerParameter output_concat_layer = new LayerParameter(LayerParameter.LayerType.CONCAT);
236 output_concat_layer.name = "h_concat";
237 output_concat_layer.top.Add("h");
238 output_concat_layer.concat_param.axis = 0;
239
240 for (int t = 1; t <= m_nT; t++)
241 {
242 string tm1s = (t - 1).ToString();
243 string ts = t.ToString();
244
245 cont_slice_param.top.Add("cont_" + ts);
246 x_slice_param.top.Add("W_xc_x_" + ts);
247
248
249 // Add layer to flush the hidden state when beginning a new sequence,
250 // as indicated by cont_t.
251 // h_conted_{t-1} := cont_t * h_{t-1}
252 //
253 // Normally, cont_t is binary (i.e., 0 or 1), so:
254 // h_conted_{t-1} := h_{t-1} if cont_t == 1
255 // 0 otherwise.
256 {
257 LayerParameter cont_h_param = scale_param.Clone(false);
258 cont_h_param.group_start = true;
259 cont_h_param.name = "h_conted_" + tm1s;
260 cont_h_param.bottom.Add("h_" + tm1s);
261 cont_h_param.bottom.Add("cont_" + ts);
262 cont_h_param.top.Add("h_conted_" + tm1s);
263 net_param.layer.Add(cont_h_param);
264 }
265
266 // Add layer to compute
267 // W_hc_h_{t-1} := W_hc * h_conted_{t-1}
268 {
269 LayerParameter w_param = hidden_param.Clone(false);
270 w_param.name = "transform_" + ts;
271 w_param.parameters.Add(new ParamSpec("W_hc"));
272 w_param.bottom.Add("h_conted_" + tm1s);
273 w_param.top.Add("W_hc_h_" + tm1s);
274 w_param.inner_product_param.axis = 2;
275 net_param.layer.Add(w_param);
276 }
277
278 // Add the outputs of the linear transformations to compute the gate input.
279 // get_input_t := W_hc * h_conted_{t-1} + W_xc * x_t + b_c
280 // = W_hc_h_{t-1} + W_xc_x_t + b_c
281 {
282 LayerParameter input_sum_layer = sum_param.Clone(false);
283 input_sum_layer.name = "gate_input_" + ts;
284 input_sum_layer.bottom.Add("W_hc_h_" + tm1s);
285 input_sum_layer.bottom.Add("W_xc_x_" + ts);
286 if (m_bStaticInput)
287 input_sum_layer.bottom.Add("W_xc_x_static");
288 input_sum_layer.top.Add("gate_input_" + ts);
289 net_param.layer.Add(input_sum_layer);
290 }
291
292 // Add LSTMUnit layer to compute the cell & hidden vectors c_t and h_t.
293 // Inputs: c_{t-1}, gate_input_t = (i_t, f_t, o_t, g_t), cont_t
294 // Outputs: c_t, h_t
295 // [ i_t' ]
296 // [ f_t' ] := gate_input_t
297 // [ o_t' ]
298 // [ g_t' ]
299 // i_t := \sigmoid[i_t']
300 // f_t := \sigmoid[f_t']
301 // o_t := \sigmoid[o_t']
302 // g_t := \tanh[g_t']
303 // c_t := cont_t * (f_t .* c_{t-1}) + (i_t .* g_t)
304 // h_t := o_t .* \tanh[c_t]
305 {
306 LayerParameter lstm_unit_param = new LayerParameter(LayerParameter.LayerType.LSTM_UNIT);
307 lstm_unit_param.bottom.Add("c_" + tm1s);
308 lstm_unit_param.bottom.Add("gate_input_" + ts);
309 lstm_unit_param.bottom.Add("cont_" + ts);
310 lstm_unit_param.top.Add("c_" + ts);
311 lstm_unit_param.top.Add("h_" + ts);
312 lstm_unit_param.name = "unit_" + ts;
313 net_param.layer.Add(lstm_unit_param);
314 }
315
316 output_concat_layer.bottom.Add("h_" + ts);
317 }
318
319 {
320 LayerParameter c_T_copy_param = split_param.Clone(false);
321 c_T_copy_param.bottom.Add("c_" + m_nT.ToString());
322 c_T_copy_param.top.Add("c_T");
323 net_param.layer.Add(c_T_copy_param);
324 }
325
326 net_param.layer.Add(output_concat_layer.Clone(false));
327 }
328 }
329}
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
Definition: CancelEvent.cs:17
The Log class provides general output in text form.
Definition: Log.cs:13
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239
void CHECK_GT(double df1, double df2, string str)
Test whether one number is greater than another.
Definition: Log.cs:299
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969
The LSTMLayer processes sequential inputs using a 'Long Short-Term Memory' (LSTM) [1] style recurrent...
Definition: LSTMLayer.cs:59
LSTMLayer(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel)
The LSTMLayer constructor.
Definition: LSTMLayer.cs:80
override void FillUnrolledNet(NetParameter net_param)
Fills the NetParameter with the LSTM network architecture.
Definition: LSTMLayer.cs:143
override void RecurrentOutputBlobNames(List< string > rgNames)
Fills the rgNames array with names of the Tth timestep recurrent output Blobs.
Definition: LSTMLayer.cs:101
override void RecurrentInputBlobNames(List< string > rgNames)
Fills the rgNames array with the names of the 0th timestep recurrent input Blobs.
Definition: LSTMLayer.cs:90
override void OutputBlobNames(List< string > rgNames)
Fills the rgNames array with the names of the output Blobs, concatenated across all timesteps.
Definition: LSTMLayer.cs:133
override void RecurrentInputShapes(List< BlobShape > rgShapes)
Fill the rgShapes array with the shapes of the recurrent input Blobs.
Definition: LSTMLayer.cs:112
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35
The RecurrentLayer is an abstract class for implementing recurrent behavior inside of an unrolled new...
int m_nN
The number of independent streams to process simultaneously.
int m_nT
The number of timesteps in the layer's input, and the number of timesteps over which to backpropagate...
bool m_bStaticInput
Whether the layer has a 'static' input copies across all timesteps.
int axis
The first axis of bottom[0] (the first input Blob) along which to apply bottom[1] (the second input B...
Specifies the shape of a Blob.
Definition: BlobShape.cs:15
BlobShape()
The BlobShape constructor.
Definition: BlobShape.cs:21
List< int > dim
The blob shape dimensions.
Definition: BlobShape.cs:93
int axis
The axis along which to concatenate – may be negative to index from the end (e.g.,...
Specifies the parameters for the EltwiseLayer.
EltwiseOp
Defines the operation to perform.
EltwiseOp operation
Specifies the element-wise operation.
Specifies the filler parameters used to create each Filler.
FillerParameter Clone()
Creates a new copy of this instance of the parameter.
FillerParameter weight_filler
The filler for the weights.
int axis
Specifies the first axis to be lumped into a single inner product computation; all preceding axes are...
FillerParameter bias_filler
The filler for the bias.
uint num_output
The number of outputs for the layer.
bool bias_term
Whether to have bias terms or not.
List< BlobShape > shape
Define N shapes to set a shape for each top. Define 1 shape to set the same shape for every top....
Specifies the base parameter for all layers.
LayerParameter()
Constructor for the parameter.
List< ParamSpec > parameters
Specifies the ParamSpec parameters of the LayerParameter.
SliceParameter slice_param
Returns the parameter set when initialized with LayerType.SLICE
string name
Specifies the name of this LayerParameter.
List< bool > propagate_down
Specifies whether or not the LayerParameter (or protions of) should be backpropagated.
ScaleParameter scale_param
Returns the parameter set when initialized with LayerType.SCALE
bool group_start
Specifies whether or not this node is the start of a new group - this is only used when rendering mod...
EltwiseParameter eltwise_param
Returns the parameter set when initialized with LayerType.ELTWISE
InputParameter input_param
Returns the parameter set when initialized with LayerType.INPUT
List< string > top
Specifies the active top connections (in the bottom, out the top)
ReshapeParameter reshape_param
Returns the parameter set when initialized with LayerType.RESHAPE
InnerProductParameter inner_product_param
Returns the parameter set when initialized with LayerType.INNERPRODUCT
ConcatParameter concat_param
Returns the parameter set when initialized with LayerType.CONCAT
RecurrentParameter recurrent_param
Returns the parameter set when initialized with LayerType.RECURRENT
List< string > bottom
Specifies the active bottom connections (in the bottom, out the top).
LayerType
Specifies the layer type.
virtual LayerParameter Clone(bool bCloneBlobs)
Creates a new copy of this instance of the parameter.
Specifies the parameters use to create a Net
Definition: NetParameter.cs:18
List< LayerParameter > layer
The layers that make up the net. Each of their configurations, including connectivity and behavior,...
Specifies training parameters (multipliers on global learning constants, and the name of other settin...
Definition: ParamSpec.cs:19
uint num_output
The dimension of the output (and usually hidden state) representation – must be explicitly set to non...
FillerParameter weight_filler
The filler for the weights.
FillerParameter bias_filler
The filler for the bias.
BlobShape shape
Specifies the output dimensions.
int axis
Specifies the axis along wich to slice – may be negative to index from the end (e....
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8
The MyCaffe.db.image namespace contains all image database related classes.
Definition: Database.cs:18
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12