caffe源碼解讀(2)－center_loss_layer.cpp

center_loss公式定義
center_loss_layer.cpp源碼解讀
center loss,softmax loss在mnist數據集上的對比實驗

定義

“Center Loss: simultaneously learning a center for deep features of each class and penalizing the distances between the deep features and their corresponding class centers. 參考論文：　A Discriminative Feature Learning Approach for　Deep Face　Recognition。　關於對center loss的理解，可參考知乎回答鏈接。

公式

(1) Forward Computation

\begin{matrix} (2) & L_{c} = \frac{1}{2 N} \sum_{i = 1}^{N} | x_{i} - c |_{2}^{2} \end{matrix}

(2) Backward Computation

\begin{matrix} (3) & \frac{\partial L_{c}}{\partial x_{i}} = x_{i} - c \end{matrix}

(3) Update Equation

\begin{matrix} (4) & Δ c = - \frac{α}{N} \sum_{i = 1}^{N} (x_{i} - c) \end{matrix}

代碼

(１) LayerSetUp

namespace caffe{
    template<typename Dtype>
    void CenterLossLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
        const vector<Blob<Dtype>*>& top){
        LossLayer<Dtype>::LayerSetUp(bottom, top);
        CHECK_EQ(bottom[0]->num(), bottom[1]->num()); 
         //2個bottom,feature = bottom[0]->cpu_data();label = bottom[1]->cpu_data()
         //1個top,top[0]->mutable_cpu_data()[0] = loss;
        int channels = bottom[0]->channels();
        int num = bottom[0]->num();
         //獲取center loss層的參數
         //loss weight 即參數lambda,用來調節center loss佔比
        alpha = this->layer_param_.center_loss_param().alpha();
        lossWeight = this->layer_param_.center_loss_param().loss_weight();
        clusterNum = this->layer_param_.center_loss_param().cluster_num();

        center_info_.Reshape(clusterNum, channels, 1, 1);
        center_loss_.Reshape(num, channels, 1, 1);
        center_update_count_.resize(clusterNum);
        //caffe_set對center_info_.mutable_cpu_data()初始化
        caffe_set(clusterNum * channels, Dtype(0.0), center_info_.mutable_cpu_data());
    }

(2) Forward前向傳播

template<typename Dtype>
    void CenterLossLayer<Dtype>::Forward_cpu(
        const vector<Blob<Dtype>*> &bottom,
        const vector<Blob<Dtype>*> &top){
        //２個bottom輸入
        const Dtype *feature = bottom[0]->cpu_data();
        const Dtype *label = bottom[1]->cpu_data();
        int num = bottom[0]->num();
        int channels = bottom[0]->channels();
        //初始化loss
        Dtype loss = 0;
        caffe_set(clusterNum * channels, Dtype(0.0), center_info_.mutable_cpu_diff());
        for(int i = 0; i < clusterNum; ++i){
            center_update_count_[i] = 1;
        }
        for(int i = 0; i < num; ++i){
            int targetLabel = label[i];
            //caffe_sub做減法：center_loss.mutable_cpu_data=feature-center_info_.cpu_data()
            //即公式中xi-c
            caffe_sub(channels, feature + i * channels,
            center_info_.cpu_data() + targetLabel * channels,
            center_loss_.mutable_cpu_data() + i * channels);
            // store the update loss and number
            caffe_add(channels, center_loss_.cpu_data() + i * channels,
            center_info_.cpu_diff() + targetLabel * channels,
            center_info_.mutable_cpu_diff() + targetLabel * channels);
    　　　　　center_update_count_[targetLabel]++;
　　　　　　 //此處即按公式(1)計算center loss
　　　　　　 //並將loss作爲top輸出
            loss += caffe_cpu_dot(channels, center_loss_.cpu_data() + i * channels,
            center_loss_.cpu_data() + i * channels) * lossWeight / Dtype(2.0) / static_cast<Dtype>(num);
        }
        top[0]->mutable_cpu_data()[0] = loss;
        // update center loss.按公式(3)更新類中心：c
        for(int i = 0; i < clusterNum; ++i){
            Dtype scale = -alpha * lossWeight / Dtype(center_update_count_[i]);
            caffe_scal(channels, scale, center_info_.mutable_cpu_diff() + i * channels);
        }
        center_info_.Update();
    }

(3) Backward反向傳播

    template<typename Dtype>
    void CenterLossLayer<Dtype>::Backward_cpu(
        const vector<Blob<Dtype>*> &top,
        const vector<bool> &propagate_down,
        const vector<Blob<Dtype>*> &bottom){
        int num = bottom[0]->num();
        int channels = bottom[0]->channels();
        //center_loss_.mutable_cpu_data()=feature-center_info_.cpu_data()
        //按公式（２）計算反向傳播偏導
        caffe_scal(num * channels, lossWeight, center_loss_.mutable_cpu_data());
        Dtype *out = bottom[0]->mutable_cpu_diff();
        //center_loss_.cpu_data()拷貝到out中進行backward運算
        caffe_copy(num * channels, center_loss_.cpu_data(), out);

    }

實驗

Github 上有開源的整個項目的代碼[鏈接]（https://github.com/wangwen39/center-loss），新手可以用來練手。特徵可視化可直接參考caffe主頁：http://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/siamese/mnist_siamese.ipynb
mnist數據集共有10個類別的手寫體數字0-9，通過對比實驗可以看出，center loss能夠很好的使類類之間的距離增大，同時使類內更加聚攏，從而達到更好的分類準確度。
① softmax loss

② center loss + softmax loss