MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
CausalSelfAttentionParameter.cs
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
6using MyCaffe.basecode;
7
9{
13 [Serializable]
14 [TypeConverter(typeof(ExpandableObjectConverter))]
16 {
17 uint m_nHeads = 6;
18 uint m_nEmbed = 192;
19 double m_dfAttnDropout;
20 double m_dfResidDropout;
21 uint m_nBlockSize = 128;
22 uint m_nLayers = 6;
23
26 {
27 }
28
32 [Description("Specifies number of layers (transformer blocks) used.")]
33 public uint layers
34 {
35 get { return m_nLayers; }
36 set { m_nLayers = value; }
37 }
38
42 [Description("Specifies number of heads used.")]
43 public uint heads
44 {
45 get { return m_nHeads; }
46 set { m_nHeads = value; }
47 }
48
52 public uint embed
53 {
54 get { return m_nEmbed; }
55 set { m_nEmbed = value; }
56 }
57
61 public uint block_size
62 {
63 get { return m_nBlockSize; }
64 set { m_nBlockSize = value; }
65 }
66
70 public double attn_dropout
71 {
72 get { return m_dfAttnDropout; }
73 set { m_dfAttnDropout = value; }
74 }
75
79 public double resid_dropout
80 {
81 get { return m_dfResidDropout; }
82 set { m_dfResidDropout = value; }
83 }
84
86 public override object Load(System.IO.BinaryReader br, bool bNewInstance = true)
87 {
88 RawProto proto = RawProto.Parse(br.ReadString());
90
91 if (!bNewInstance)
92 Copy(p);
93
94 return p;
95 }
96
98 public override void Copy(LayerParameterBase src)
99 {
101
102 m_nLayers = p.layers;
103 m_nHeads = p.heads;
104 m_nEmbed = p.embed;
105 m_nBlockSize = p.block_size;
106 m_dfAttnDropout = p.attn_dropout;
107 m_dfResidDropout = p.resid_dropout;
108 }
109
111 public override LayerParameterBase Clone()
112 {
114 p.Copy(this);
115 return p;
116 }
117
123 public override RawProto ToProto(string strName)
124 {
125 RawProtoCollection rgChildren = new RawProtoCollection();
126
127 rgChildren.Add("layers", layers.ToString());
128 rgChildren.Add("heads", heads.ToString());
129 rgChildren.Add("embed", embed.ToString());
130 rgChildren.Add("block_size", block_size.ToString());
131 rgChildren.Add("attn_dropout", attn_dropout.ToString());
132 rgChildren.Add("resid_dropout", resid_dropout.ToString());
133
134 return new RawProto(strName, "", rgChildren);
135 }
136
143 {
144 string strVal;
146
147 if ((strVal = rp.FindValue("layers")) != null)
148 p.layers = uint.Parse(strVal);
149
150 if ((strVal = rp.FindValue("heads")) != null)
151 p.heads = uint.Parse(strVal);
152
153 if ((strVal = rp.FindValue("embed")) != null)
154 p.embed = uint.Parse(strVal);
155
156 if ((strVal = rp.FindValue("block_size")) != null)
157 p.block_size = uint.Parse(strVal);
158
159 if ((strVal = rp.FindValue("attn_dropout")) != null)
160 p.attn_dropout = double.Parse(strVal);
161
162 if ((strVal = rp.FindValue("resid_dropout")) != null)
163 p.resid_dropout = double.Parse(strVal);
164
165 return p;
166 }
167 }
168}
The RawProtoCollection class is a list of RawProto objects.
void Add(RawProto p)
Adds a RawProto to the collection.
The RawProto class is used to parse and output Google prototxt file data.
Definition: RawProto.cs:17
static RawProto Parse(string str)
Parses a prototxt and places it in a new RawProto.
Definition: RawProto.cs:306
string FindValue(string strName)
Searches for a falue of a node within this nodes children.
Definition: RawProto.cs:105
The LayerParameterBase is the base class for all other layer specific parameters.
Specifies the parameters for the CausalSelfAttentionLayer.
override RawProto ToProto(string strName)
Convert the parameter into a RawProto.
static CausalSelfAttentionParameter FromProto(RawProto rp)
Parses the parameter from a RawProto.
override void Copy(LayerParameterBase src)
Copy on parameter to another.
override object Load(System.IO.BinaryReader br, bool bNewInstance=true)
Load the parameter from a binary reader.
double resid_dropout
Specifies dropout probability used on the residual weights.
override LayerParameterBase Clone()
Creates a new copy of this instance of the parameter.
uint layers
The number of layers (transformer blocks) used.
double attn_dropout
Specifies dropout probability used on the attention weights.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12