mycaffe/html/_gate_add_norm_layer_8cs_source.html

using System;

using System.Collections.Generic;

using System.Diagnostics;

using System.Linq;

using System.Text;

using System.Threading;

using MyCaffe.basecode;

using MyCaffe.common;

using MyCaffe.param;


namespace MyCaffe.layers.tft

{

    public class GateAddNormLayer<T> : Layer<T>

    {

        int m_nBlocks;

        Layer<T> m_dropout = null;

        Layer<T> m_gate = null;

        Layer<T> m_layerNorm = null;

        Blob<T> m_blobResidual = null;

        Blob<T> m_blobDrop = null;

        Blob<T> m_blobGate = null;

        Blob<T> m_blobGateAddResidual = null;

        BlobCollection<T> m_colTop = new BlobCollection<T>();

        BlobCollection<T> m_colBtm = new BlobCollection<T>();

        List<int> m_rgShape = new List<int>(4);


        public GateAddNormLayer(CudaDnn<T> cuda, Log log, LayerParameter p)

            : base(cuda, log, p)

        {

            m_type = LayerParameter.LayerType.GATEADDNORM;


            if (m_param.dropout_param != null && m_param.dropout_param.dropout_ratio > 0)

            {

                m_blobDrop = new Blob<T>(cuda, log);

                m_blobDrop.Name = p.name + ".drop";

            }


            m_blobResidual = new Blob<T>(cuda, log);

            m_blobResidual.Name = p.name + ".residual";

            m_blobGate = new Blob<T>(cuda, log);

            m_blobGate.Name = p.name + ".gate";

            m_blobGateAddResidual = new Blob<T>(cuda, log);

            m_blobGateAddResidual.Name = p.name + ".gateres";

        }


        protected override void dispose()

        {

            dispose(ref m_blobResidual);

            dispose(ref m_blobGate);

            dispose(ref m_blobGateAddResidual);

            dispose(ref m_blobDrop);


            dispose(ref m_dropout);

            dispose(ref m_gate);

            dispose(ref m_layerNorm);

        }


        protected override void setup_internal_blobs(BlobCollection<T> col)

        {

            if (col.Count > 0)

                return;


            if (m_blobDrop != null)

                col.Add(m_blobDrop);

            col.Add(m_blobGate);

            col.Add(m_blobResidual);

        }


        public override int MinBottomBlobs

        {

            get { return 1; }

        }


        public override int MaxBottomBlobs

        {

            get { return 2; }

        }


        public override int ExactNumTopBlobs

        {

            get { return 1; }

        }


        private void addBtmTop(Blob<T> btm, Blob<T> top)

        {

            m_colBtm.Clear();

            m_colBtm.Add(btm);

            m_colTop.Clear();

            m_colTop.Add(top);

        }


        public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            LayerParameter p;

            Blob<T> blobBtm = colBottom[0];


            if (colBottom.Count > 1)

            {

                if (m_param.gateaddnorm_param.residual_channel_offset > 0)

                {

                    int nDiff = colBottom[1].channels - m_param.gateaddnorm_param.residual_channel_offset;

                    if (colBottom[1].channels % nDiff != 0)

                        m_log.FAIL("The number bottom(1).channels must be divisible by the bottom(1).channels - the residual channel offset. For example if bottom(1).channels = 120 and redidual_channel_offset = 90, the difference = 30 which is a factor of both 120 and 90.");

                }

            }


            if (m_param.dropout_param != null && m_param.dropout_param.dropout_ratio > 0)

            {

                if (m_dropout == null)

                {

                    p = new LayerParameter(LayerParameter.LayerType.DROPOUT, m_param.name + ".drop");

                    p.dropout_param.Copy(m_param.dropout_param);

                    m_dropout = Layer<T>.Create(m_cuda, m_log, convertLayerParam(p, m_param), null);


                    addBtmTop(colBottom[0], m_blobDrop);

                    m_dropout.Setup(m_colBtm, m_colTop);

                }

                blobBtm = m_blobDrop;

            }


            if (m_gate == null)

            {

                p = new LayerParameter(LayerParameter.LayerType.GLU, m_param.name + ".glu");

                p.glu_param.Copy(m_param.glu_param);

                m_gate = Layer<T>.Create(m_cuda, m_log, convertLayerParam(p, m_param), null);


                addBtmTop(blobBtm, m_blobGate);

                m_gate.Setup(m_colBtm, m_colTop);

                blobs.Add(m_gate.blobs);

            }

            m_blobGateAddResidual.ReshapeLike(m_blobGate);


            if (m_layerNorm == null)

            {

                p = new LayerParameter(LayerParameter.LayerType.LAYERNORM, m_param.name + ".layernorm");

                p.layer_norm_param.Copy(m_param.layer_norm_param);

                m_layerNorm = Layer<T>.Create(m_cuda, m_log, convertLayerParam(p, m_param), null);

                addBtmTop(m_blobGate, colTop[0]);

                m_layerNorm.Setup(m_colBtm, m_colTop);

            }


            setup_internal_blobs(m_colInternalBlobs);

        }


        public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            Blob<T> blobBtm = colBottom[0];


            if (colBottom.Count > 1)

            {

                if (m_param.gateaddnorm_param.residual_channel_offset > 0)

                {

                    int nDiff = colBottom[1].channels - m_param.gateaddnorm_param.residual_channel_offset;

                    m_log.CHECK_EQ(colBottom[1].channels % nDiff, 0, "The bottom(1).channels must be divisible by the bottom(1).channels - residual_channel_offset!");

                    m_nBlocks = colBottom[1].channels / nDiff;


                    int nQTimeSteps = nDiff;

                    m_rgShape.Clear();

                    m_rgShape.Add(colBottom[0].num);

                    m_rgShape.Add(nQTimeSteps);

                    m_rgShape.Add(colBottom[0].count(2));

                    m_blobResidual.Reshape(m_rgShape);

                }

                else

                {

                    m_blobResidual.ReshapeLike(colBottom[1]);

                }

            }


            if (m_dropout != null)

            {

                addBtmTop(colBottom[0], m_blobDrop);

                m_dropout.Reshape(m_colBtm, m_colTop);

                blobBtm = m_blobDrop;

            }


            addBtmTop(blobBtm, m_blobGate);

            m_gate.Reshape(m_colBtm, m_colTop);

            m_blobGateAddResidual.ReshapeLike(m_blobGate);


            addBtmTop(m_blobGate, colTop[0]);

            m_layerNorm.Reshape(m_colBtm, m_colTop);

        }


        private void copy_to_fwd(BlobCollection<T> colBtm, int nIdx, Blob<T> bTop)

        {

            if (nIdx >= colBtm.Count)

                return;


            Blob<T> bBtm = colBtm[nIdx];


            if (m_param.gateaddnorm_param.residual_channel_offset > 0)

            {

                // Copy just the future items to the top, so if future = 30,

                // with input shape is btm(256,120,64) just the last (256,30,64) are copied to top

                int nOuterNum = bBtm.num;

                int nChannels = m_nBlocks;

                int nInnerNum = (bBtm.channels / m_nBlocks) * bBtm.count(2);

                m_cuda.channel_copy(bTop.count(), nOuterNum, nChannels, m_nBlocks, nInnerNum, m_nBlocks-1, bBtm.gpu_data, bTop.mutable_gpu_data, DIR.FWD);

            }

            else

            {

                bTop.CopyFrom(bBtm);

            }

        }


        private void copy_to_bwd(BlobCollection<T> colBtm, int nIdx, Blob<T> bTop)

        {

            if (nIdx >= colBtm.Count)

                return;


            Blob<T> bBtm = colBtm[nIdx];


            if (m_param.gateaddnorm_param.residual_channel_offset > 0)

            {

                // Copy just the future items to the top, so if future = 30,

                // with input shape is btm(256,120,64) just the last (256,30,64) are copied to top

                int nOuterNum = bBtm.num;

                int nChannels = m_nBlocks;

                int nInnerNum = (bBtm.channels / m_nBlocks) * bBtm.count(2);

                m_cuda.channel_copy(bTop.count(), nOuterNum, nChannels, m_nBlocks, nInnerNum, m_nBlocks - 1, bBtm.mutable_gpu_diff, bTop.gpu_diff, DIR.BWD);

            }

            else

            {

                bBtm.CopyFrom(bTop, true);

            }

        }


        private void add_to_bwd(BlobCollection<T> colBtm, int nIdx, Blob<T> bTop)

        {

            if (nIdx >= colBtm.Count)

                return;


            Blob<T> bBtm = colBtm[nIdx];


            if (m_param.gateaddnorm_param.residual_channel_offset > 0)

            {

                // Copy just the future items to the top, so if future = 30,

                // with input shape is btm(256,120,64) just the last (256,30,64) are copied to top

                int nOuterNum = bBtm.num;

                int nChannels = m_nBlocks;

                int nInnerNum = (bBtm.channels / m_nBlocks) * bBtm.count(2);

                m_cuda.channel_add(bTop.count(), nOuterNum, nChannels, m_nBlocks, nInnerNum, m_nBlocks - 1, bBtm.mutable_gpu_diff, bTop.gpu_diff, DIR.BWD);

            }

            else

            {

                m_cuda.add(bTop.count(), bTop.gpu_diff, bBtm.gpu_diff, bBtm.mutable_gpu_diff);

            }

        }


        protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            Blob<T> blobBtm = colBottom[0];

            copy_to_fwd(colBottom, 1, m_blobResidual);


            if (m_dropout != null)

            {

                addBtmTop(colBottom[0], m_blobDrop);

                m_dropout.Forward(m_colBtm, m_colTop);

                blobBtm = m_blobDrop;

            }


            addBtmTop(blobBtm, m_blobGate);

            m_gate.Forward(m_colBtm, m_colTop);


            if (colBottom.Count > 1)

                m_cuda.add(m_blobGateAddResidual.count(), m_blobGate.gpu_data, m_blobResidual.gpu_data, m_blobGateAddResidual.mutable_gpu_data);

            else

                m_blobGateAddResidual.CopyFrom(m_blobGate);


            addBtmTop(m_blobGateAddResidual, colTop[0]);

            m_layerNorm.Forward(m_colBtm, m_colTop);


            colTop[0].ReshapeLike(m_blobGate);

        }


        protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)

        {

            addBtmTop(m_blobGateAddResidual, colTop[0]);

            m_layerNorm.Backward(m_colTop, rgbPropagateDown, m_colBtm);


            // Copy grad to the residual if it exists.

            copy_to_bwd(colBottom, 1, m_blobGateAddResidual);

            m_blobGate.CopyFrom(m_blobGateAddResidual, true);

            if (colBottom.Count > 1)

                m_blobResidual.CopyFrom(m_blobGateAddResidual, true);


            addBtmTop(colBottom[0], m_blobGate);

            m_gate.Backward(m_colTop, rgbPropagateDown, m_colBtm);


            if (m_dropout != null)

            {

                addBtmTop(m_blobDrop, colBottom[0]);

                m_dropout.Backward(m_colTop, rgbPropagateDown, m_colBtm);

                colBottom[0].CopyFrom(m_blobDrop, true);

            }

        }

    }

}

MyCaffe.basecode.Log
The Log class provides general output in text form.
Definition: Log.cs:13

MyCaffe.basecode.Log.FAIL
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
Definition: Log.cs:394

MyCaffe.basecode.Log.CHECK_EQ
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239

MyCaffe.common.BlobCollection
The BlobCollection contains a list of Blobs.
Definition: BlobCollection.cs:16

MyCaffe.common.BlobCollection.Add
void Add(Blob< T > b)
Add a new Blob to the collection.
Definition: BlobCollection.cs:92

MyCaffe.common.BlobCollection.Count
int Count
Returns the number of items in the collection.
Definition: BlobCollection.cs:30

MyCaffe.common.BlobCollection.Clear
void Clear(bool bDispose=false)
Remove all items from the collection.
Definition: BlobCollection.cs:135

MyCaffe.common.BlobCollection.ReshapeLike
void ReshapeLike(BlobCollection< T > src)
Reshapes all blobs in the collection to the sizes of the source.
Definition: BlobCollection.cs:214

MyCaffe.common.BlobCollection.CopyFrom
void CopyFrom(BlobCollection< T > bSrc, bool bCopyDiff=false)
Copy the data or diff from another BlobCollection into this one.
Definition: BlobCollection.cs:266

MyCaffe.common.Blob
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25

MyCaffe.common.Blob.channels
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
Definition: Blob.cs:800

MyCaffe.common.Blob.mutable_gpu_diff
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555

MyCaffe.common.Blob.mutable_gpu_data
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487

MyCaffe.common.Blob.Reshape
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442

MyCaffe.common.Blob.CopyFrom
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
Definition: Blob.cs:903

MyCaffe.common.Blob.count
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739

MyCaffe.common.Blob.ReshapeLike
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648

MyCaffe.common.Blob.Name
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184

MyCaffe.common.Blob.gpu_diff
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1541

MyCaffe.common.Blob.num
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
Definition: Blob.cs:792

MyCaffe.common.Blob.gpu_data
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479

MyCaffe.common.CudaDnn
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969

MyCaffe.layers.Layer
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31

MyCaffe.layers.Layer.m_log
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43

MyCaffe.layers.Layer.m_param
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47

MyCaffe.layers.Layer.Backward
void Backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Given the top Blob error gradients, compute the bottom Blob error gradients.
Definition: Layer.cs:815

MyCaffe.layers.Layer.Forward
double Forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Given the bottom (input) Blobs, this function computes the top (output) Blobs and the loss.
Definition: Layer.cs:728

MyCaffe.layers.Layer.Reshape
abstract void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Adjust the shapes of top blobs and internal buffers to accomodate the shapes of the bottom blobs.

MyCaffe.layers.Layer.m_colInternalBlobs
BlobCollection< T > m_colInternalBlobs
Specifies internal blobs used by the layer.
Definition: Layer.cs:59

MyCaffe.layers.Layer.m_cuda
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39

MyCaffe.layers.Layer.Setup
void Setup(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Implements common Layer setup functionality.
Definition: Layer.cs:439

MyCaffe.layers.Layer.Create
static Layer< T > Create(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel, IXDatabaseBase db=null, TransferInput trxinput=null)
Create a new Layer based on the LayerParameter.
Definition: Layer.cs:1468

MyCaffe.layers.Layer.m_type
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35

MyCaffe.layers.Layer.blobs
BlobCollection< T > blobs
Returns the collection of learnable parameter Blobs for the Layer.
Definition: Layer.cs:875

MyCaffe.layers.Layer.convertLayerParam
LayerParameter convertLayerParam(LayerParameter pChild, LayerParameter pParent)
Called to convert a parent LayerParameterEx, used in blob sharing, with a child layer parameter.
Definition: Layer.cs:1134

MyCaffe.layers.tft.GateAddNormLayer
The GateAddNormLayer implements the Dropout, Gated Linear Unit layer, LayerNorm while adding in the r...
Definition: GateAddNormLayer.cs:29

MyCaffe.layers.tft.GateAddNormLayer.MaxBottomBlobs
override int MaxBottomBlobs
Returns the max number of required bottom (input) Blobs: x, residual
Definition: GateAddNormLayer.cs:104

MyCaffe.layers.tft.GateAddNormLayer.Reshape
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the top (output) blobs.
Definition: GateAddNormLayer.cs:187

MyCaffe.layers.tft.GateAddNormLayer.forward
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation
Definition: GateAddNormLayer.cs:304

MyCaffe.layers.tft.GateAddNormLayer.GateAddNormLayer
GateAddNormLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The constructor.
Definition: GateAddNormLayer.cs:48

MyCaffe.layers.tft.GateAddNormLayer.MinBottomBlobs
override int MinBottomBlobs
Returns the min number of required bottom (input) Blobs: x
Definition: GateAddNormLayer.cs:96

MyCaffe.layers.tft.GateAddNormLayer.setup_internal_blobs
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
Definition: GateAddNormLayer.cs:81

MyCaffe.layers.tft.GateAddNormLayer.backward
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t. the stacked embedding numeric and categorical value inputs.
Definition: GateAddNormLayer.cs:344

MyCaffe.layers.tft.GateAddNormLayer.dispose
override void dispose()
Releases all GPU and host resources used by the Layer.
Definition: GateAddNormLayer.cs:68

MyCaffe.layers.tft.GateAddNormLayer.ExactNumTopBlobs
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: y
Definition: GateAddNormLayer.cs:112

MyCaffe.layers.tft.GateAddNormLayer.LayerSetUp
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
Definition: GateAddNormLayer.cs:129

MyCaffe.param.DropoutParameter.dropout_ratio
double dropout_ratio
Specifies the dropout ratio. (e.g. the probability that values will be dropped out and set to zero....
Definition: DropoutParameter.cs:63

MyCaffe.param.DropoutParameter.Copy
override void Copy(LayerParameterBase src)
Copy on parameter to another.
Definition: DropoutParameter.cs:101

MyCaffe.param.LayerParameter
Specifies the base parameter for all layers.
Definition: LayerParameter.cs:24

MyCaffe.param.LayerParameter.name
string name
Specifies the name of this LayerParameter.
Definition: LayerParameter.cs:1865

MyCaffe.param.LayerParameter.layer_norm_param
LayerNormParameter layer_norm_param
Returns the parameter set when initialized with LayerType.LAYERNORM
Definition: LayerParameter.cs:2488

MyCaffe.param.LayerParameter.glu_param
GluParameter glu_param
Returns the parameter set when initialized with LayerType.GLU
Definition: LayerParameter.cs:2380

MyCaffe.param.LayerParameter.gateaddnorm_param
GateAddNormParameter gateaddnorm_param
Returns the parameter set when initialized with LayerType.GLU
Definition: LayerParameter.cs:2362

MyCaffe.param.LayerParameter.LayerType
LayerType
Specifies the layer type.
Definition: LayerParameter.cs:110

MyCaffe.param.LayerParameter.dropout_param
DropoutParameter dropout_param
Returns the parameter set when initialized with LayerType.DROPOUT
Definition: LayerParameter.cs:2290

MyCaffe.basecode
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12

MyCaffe.common
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8

MyCaffe.common.DIR
DIR
Defines the direction of data flow.
Definition: CudaDnn.cs:22

MyCaffe.layers.tft
The MyCaffe.layers.tft namespace contains all TFT related layers.
Definition: LayerFactory.cs:15

MyCaffe.param
The MyCaffe.param namespace contains parameters used to create models.
Definition: AttentionParameter.cs:9

MyCaffe
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12

System
Definition: Component.cs:11