mycaffe/html/_softmax_cross_entropy_loss_layer_8cs_source.html

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using MyCaffe.basecode;

using MyCaffe.common;

using MyCaffe.param;


namespace MyCaffe.layers

{

    public class SoftmaxCrossEntropyLossLayer<T> : LossLayer<T>

    {

        SoftmaxLayer<T> m_softmaxLayer;

        Blob<T> m_blobSoftmaxOutput;

        Blob<T> m_blobTarget = null;

        Blob<T> m_blobLoss;

        BlobCollection<T> m_colSoftmaxBottomVec = new BlobCollection<T>();

        BlobCollection<T> m_colSoftmaxTopVec = new BlobCollection<T>();


        // How to normalize the loss.

        double m_dfNormalizer = 0;


        public SoftmaxCrossEntropyLossLayer(CudaDnn<T> cuda, Log log, LayerParameter p)

            : base(cuda, log, p)

        {

            m_type = LayerParameter.LayerType.SOFTMAXCROSSENTROPY_LOSS;

            m_blobSoftmaxOutput = new Blob<T>(cuda, log);

            m_blobSoftmaxOutput.Name = m_param.name + " softmax out";

            m_blobLoss = new Blob<T>(cuda, log);

            m_blobLoss.Name = m_param.name + " loss";


            LayerParameter param_softmax = p.Clone(false);

            param_softmax.loss_weight.Clear();

            m_softmaxLayer = new SoftmaxLayer<T>(cuda, log, param_softmax);

        }


        protected override void dispose()

        {

            m_blobSoftmaxOutput.Dispose();

            m_softmaxLayer.Dispose();

            m_blobLoss.Dispose();


            if (m_blobTarget != null)

                m_blobTarget.Dispose();


            base.dispose();

        }


        public override int ExactNumTopBlobs

        {

            get { return -1; }

        }


        public override int MinTopBlobs

        {

            get { return 1; }

        }


        public override int MaxTopBlobs

        {

            get { return 2; }

        }


        public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            base.LayerSetUp(colBottom, colTop);


            m_colSoftmaxBottomVec = new BlobCollection<T>();

            m_colSoftmaxBottomVec.Add(colBottom[0]);

            m_colSoftmaxTopVec = new BlobCollection<T>();

            m_colSoftmaxTopVec.Add(m_blobSoftmaxOutput);

            m_softmaxLayer.Setup(m_colSoftmaxBottomVec, m_colSoftmaxTopVec);

        }


        public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            base.Reshape(colBottom, colTop);

            int nAxis = colBottom[0].CanonicalAxisIndex(m_param.softmax_param.axis);

            m_nOuterNum = colBottom[0].count(0, nAxis); // batch size

            m_nInnerNum = colBottom[0].count(nAxis); // instance size: |output| == |target|


            if (colBottom[0].count() != colBottom[1].count())

            {

                m_log.CHECK_EQ(colBottom[0].count(0, nAxis), colBottom[1].count(0, nAxis), "SOFTMAX_CROSS_ENTROPY_LOSS layer inputs must have the same count, or the target must have 'num' items of indexes.");


                // Set the label at the target index = 1.0

                if (m_blobTarget == null)

                {

                    m_blobTarget = new Blob<T>(m_cuda, m_log);

                    m_blobTarget.Name = "full_label";

                }


                m_blobTarget.ReshapeLike(colBottom[0]);

            }


            m_softmaxLayer.Reshape(m_colSoftmaxBottomVec, m_colSoftmaxTopVec);

            m_blobLoss.ReshapeLike(colBottom[0]);

        }


        protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            // Set the target data.

            if (m_blobTarget != null)

            {

                int nAxis = colBottom[0].CanonicalAxisIndex(m_param.softmax_param.axis);

                m_log.CHECK_EQ(colBottom[0].count(0, nAxis), colBottom[1].count(0, nAxis), "SOFTMAX_CROSS_ENTROPY_LOSS layer inputs must have the same count, or the target must have 'num' items of indexes.");

                m_blobTarget.SetData(0);


                float[] rgfTarget = convertF(colBottom[1].mutable_cpu_data);

                for (int i = 0; i < colBottom[1].num; i++)

                {

                    int nTargetIdx = (int)rgfTarget[i];

                    m_blobTarget.SetData(1.0, m_nInnerNum * i + nTargetIdx);

                }

            }


            // The forward pass computes the softmax outputs.

            m_colSoftmaxBottomVec[0] = colBottom[0];

            m_softmaxLayer.Forward(m_colSoftmaxBottomVec, m_colSoftmaxTopVec);


            // Compute the loss (negative log likelihood)

            int nCount = colBottom[0].count();


            // Stable version of loss computation for input data.

            long hInputData = colBottom[0].gpu_data;

            long hTarget = (m_blobTarget != null) ? m_blobTarget.gpu_data : colBottom[1].gpu_data;


            // Since this memory is not used for anything, we use it here to avoid having

            // to allocate the GPU memory to accumulate intermediate results.

            long hLossData = colBottom[0].mutable_gpu_diff;

            long hCountData = (m_blobTarget != null) ? m_blobTarget.mutable_gpu_diff : colBottom[1].mutable_gpu_diff;


            m_cuda.sigmoid_cross_entropy_fwd(nCount, hInputData, hTarget, hLossData, false, -1, hCountData);


            double dfValidCount = nCount;

            double dfLoss = m_cuda.asum_double(nCount, hLossData);

            m_dfNormalizer = get_normalizer(m_normalization, (int)dfValidCount);


            colTop[0].SetData(dfLoss / m_dfNormalizer, 0);


            // Return the losses in colTop[1] if it exists.

            if (colTop.Count == 2)

            {

                m_cuda.copy(nCount, hLossData, m_blobLoss.mutable_gpu_data);

                colTop[1].ShareData(m_blobLoss);

            }


            // Clear scratch memory to prevent interfering with the backward pass (see #6202)

            colBottom[0].SetDiff(0);

            colBottom[1].SetDiff(0);


            if (m_blobTarget != null)

                m_blobTarget.SetDiff(0);

        }


        protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)

        {

            if (rgbPropagateDown[1])

                m_log.FAIL(m_type.ToString() + " Layer cannot backpropagate to label inputs.");


            if (rgbPropagateDown[0])

            {

                // First, compute the diff.

                int nCount = colBottom[0].count();

                long hSoftmaxOutputData = m_blobSoftmaxOutput.gpu_data;

                long hTarget = (m_blobTarget != null) ? m_blobTarget.gpu_data : colBottom[1].gpu_data;

                long hBottomDiff = colBottom[0].mutable_gpu_diff;


                m_cuda.copy(nCount, hSoftmaxOutputData, hBottomDiff);

                m_cuda.axpy(nCount, convert(-1.0), hTarget, hBottomDiff);


                // Scale down gradient

                double dfLossWeight = convertD(colTop[0].GetDiff(0)) / m_dfNormalizer;

                m_cuda.scal(nCount, dfLossWeight, hBottomDiff);

            }

        }

    }

}

MyCaffe.basecode.Log
The Log class provides general output in text form.
Definition: Log.cs:13

MyCaffe.basecode.Log.FAIL
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
Definition: Log.cs:394

MyCaffe.basecode.Log.CHECK_EQ
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239

MyCaffe.common.BlobCollection
The BlobCollection contains a list of Blobs.
Definition: BlobCollection.cs:16

MyCaffe.common.BlobCollection.Add
void Add(Blob< T > b)
Add a new Blob to the collection.
Definition: BlobCollection.cs:92

MyCaffe.common.BlobCollection.SetData
void SetData(double df)
Set all blob data to the value specified.
Definition: BlobCollection.cs:323

MyCaffe.common.BlobCollection.SetDiff
void SetDiff(double df)
Set all blob diff to the value specified.
Definition: BlobCollection.cs:311

MyCaffe.common.BlobCollection.Count
int Count
Returns the number of items in the collection.
Definition: BlobCollection.cs:30

MyCaffe.common.Blob
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25

MyCaffe.common.Blob.SetData
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
Definition: Blob.cs:1922

MyCaffe.common.Blob.mutable_gpu_diff
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555

MyCaffe.common.Blob.mutable_gpu_data
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487

MyCaffe.common.Blob.ReshapeLike
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648

MyCaffe.common.Blob.Name
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184

MyCaffe.common.Blob.Dispose
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
Definition: Blob.cs:402

MyCaffe.common.Blob.SetDiff
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
Definition: Blob.cs:1981

MyCaffe.common.Blob.gpu_data
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479

MyCaffe.common.CudaDnn
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969

MyCaffe.layers.Layer.m_log
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43

MyCaffe.layers.Layer.m_param
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47

MyCaffe.layers.Layer.convert
void convert(BlobCollection< T > col)
Convert a collection of blobs from / to half size.
Definition: Layer.cs:535

MyCaffe.layers.Layer.convertF
float convertF(T df)
Converts a generic to a float value.
Definition: Layer.cs:1359

MyCaffe.layers.Layer.convertD
double convertD(T df)
Converts a generic to a double value.
Definition: Layer.cs:1349

MyCaffe.layers.Layer.m_cuda
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39

MyCaffe.layers.Layer.m_type
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35

MyCaffe.layers.LossLayer
The LossLayer provides an interface for Layer's that take two blobs as input – usually (1) prediction...
Definition: LossLayer.cs:23

MyCaffe.layers.LossLayer.m_nOuterNum
int m_nOuterNum
Specifies the outer num, such as the batch count (e.g. count(0, axis)). Each derivative class must se...
Definition: LossLayer.cs:39

MyCaffe.layers.LossLayer.m_nInnerNum
int m_nInnerNum
Specifies the inner num, such as the channel + height + width (e.g. count(axis + 1))....
Definition: LossLayer.cs:43

MyCaffe.layers.LossLayer.get_normalizer
virtual double get_normalizer(LossParameter.NormalizationMode normalization_mode, int nValidCount)
Returns the normalizer used to normalize the loss.
Definition: LossLayer.cs:92

MyCaffe.layers.LossLayer.m_normalization
LossParameter.NormalizationMode m_normalization
Specifies the normalization mode used to normalize the loss.
Definition: LossLayer.cs:35

MyCaffe.layers.SoftmaxCrossEntropyLossLayer
The SoftmaxCrossEntropyLossLayer computes the cross-entropy (logisitic) loss and is often used for pr...
Definition: SoftmaxCrossEntropyLossLayer.cs:20

MyCaffe.layers.SoftmaxCrossEntropyLossLayer.forward
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation.
Definition: SoftmaxCrossEntropyLossLayer.cs:157

MyCaffe.layers.SoftmaxCrossEntropyLossLayer.dispose
override void dispose()
Releases all GPU and host resources used by the Layer.
Definition: SoftmaxCrossEntropyLossLayer.cs:54

MyCaffe.layers.SoftmaxCrossEntropyLossLayer.Reshape
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
Definition: SoftmaxCrossEntropyLossLayer.cs:111

MyCaffe.layers.SoftmaxCrossEntropyLossLayer.LayerSetUp
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
Definition: SoftmaxCrossEntropyLossLayer.cs:95

MyCaffe.layers.SoftmaxCrossEntropyLossLayer.backward
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the softmax cross-entropy loss error gradient w.r.t. the predictions.
Definition: SoftmaxCrossEntropyLossLayer.cs:245

MyCaffe.layers.SoftmaxCrossEntropyLossLayer.MinTopBlobs
override int MinTopBlobs
Returns the minimum number of required top (output) Blobs: loss.
Definition: SoftmaxCrossEntropyLossLayer.cs:78

MyCaffe.layers.SoftmaxCrossEntropyLossLayer.SoftmaxCrossEntropyLossLayer
SoftmaxCrossEntropyLossLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The SoftmaxCrossEntropyLayer constructor.
Definition: SoftmaxCrossEntropyLossLayer.cs:39

MyCaffe.layers.SoftmaxCrossEntropyLossLayer.ExactNumTopBlobs
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs as variable.
Definition: SoftmaxCrossEntropyLossLayer.cs:70

MyCaffe.layers.SoftmaxCrossEntropyLossLayer.MaxTopBlobs
override int MaxTopBlobs
Returns the maximum number of required top (output) Blobs: loss, loss values
Definition: SoftmaxCrossEntropyLossLayer.cs:86

MyCaffe.layers.SoftmaxLayer
The SoftmaxLayer computes the softmax function. This layer is initialized with the MyCaffe....
Definition: SoftmaxLayer.cs:24

MyCaffe.param.LayerParameter
Specifies the base parameter for all layers.
Definition: LayerParameter.cs:24

MyCaffe.param.LayerParameter.name
string name
Specifies the name of this LayerParameter.
Definition: LayerParameter.cs:1865

MyCaffe.param.LayerParameter.loss_weight
List< double > loss_weight
Specifies the loss weight.
Definition: LayerParameter.cs:1955

MyCaffe.param.LayerParameter.softmax_param
SoftmaxParameter softmax_param
Returns the parameter set when initialized with LayerType.SOFTMAX
Definition: LayerParameter.cs:2794

MyCaffe.param.LayerParameter.LayerType
LayerType
Specifies the layer type.
Definition: LayerParameter.cs:110

MyCaffe.param.LayerParameter.ToString
override string ToString()
Returns a string representation of the LayerParameter.
Definition: LayerParameter.cs:4636

MyCaffe.param.LayerParameter.Clone
virtual LayerParameter Clone(bool bCloneBlobs)
Creates a new copy of this instance of the parameter.
Definition: LayerParameter.cs:3043

MyCaffe.param.SoftmaxParameter.axis
int axis
The axis along which to perform the softmax – may be negative to index from the end (e....
Definition: SoftmaxParameter.cs:83

MyCaffe.basecode
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12

MyCaffe.common
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8

MyCaffe.layers
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15

MyCaffe.param
The MyCaffe.param namespace contains parameters used to create models.
Definition: AttentionParameter.cs:9

MyCaffe
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12

System
Definition: Component.cs:11