caffe 添加MMDLoss層(caffe 自定義網絡層)

這篇文章主要通過《Learning Transferable Features with Deep Adaptation Networks》這篇論文,增加MMDLoss網絡層,對caffe增加網絡層進行講解。

1.添加頭文件

   在/caffe-master/include/caffe/layers/neuron_layer.hpp 中添加MMDLoss的頭文件聲明

#ifndef CAFFE_NEURON_LAYER_HPP_
#define CAFFE_NEURON_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {

/**
 * @brief An interface for layers that take one blob as input (@f$ x @f$)
 *        and produce one equally-sized blob as output (@f$ y @f$), where
 *        each element of the output depends only on the corresponding input
 *        element.
 */
template <typename Dtype>
class NeuronLayer : public Layer<Dtype> {
 public:
  explicit NeuronLayer(const LayerParameter& param)
     : Layer<Dtype>(param) {}
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);

  virtual inline int ExactNumBottomBlobs() const { return 1; }
  virtual inline int ExactNumTopBlobs() const { return 1; }
};

template <typename Dtype>
class MMDLossLayer : public NeuronLayer<Dtype> {
 public:
  explicit MMDLossLayer(const LayerParameter& param)
      : NeuronLayer<Dtype>(param){}
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);

  virtual inline const char* type() const { return "MMDLoss"; }
  virtual inline int ExactNumBottomBlobs() const { return -1; }

 protected:
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
  Dtype* beta_;
  Blob<Dtype> mmd_data_;
  Dtype mmd_lambda_;
  int input_num_;
  int data_dim_;
  int size_of_source_;
  int size_of_target_;
  Dtype gamma_;
  int num_of_kernel_;
  int* source_index_;
  int* target_index_;
  int iter_of_epoch_;
  int now_iter_;
  bool fix_gamma_;
  Dtype** Q_;
  Dtype* sum_of_epoch_;
  Dtype* variance_;
  Dtype I_lambda_;
  int all_sample_num_;
  int top_k_;
  Dtype* sum_of_pure_mmd_;
  int method_number_;
  Dtype kernel_mul_;
};

}  // namespace caffe

#endif  // CAFFE_NEURON_LAYER_HPP_

2.實現MMDLoss

   在/caffe-master/src/caffe/layers裏添加mmd_layer.cpp文件

#include <algorithm>
#include <cfloat>
#include <vector>

#include "caffe/layer.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/layers/neuron_layer.hpp"

namespace caffe {

template <typename Dtype>
void MMDLossLayer<Dtype>::LayerSetUp(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  NeuronLayer<Dtype>::LayerSetUp(bottom, top);
  input_num_ = bottom[0]->count(0, 1);
  data_dim_ = bottom[0]->count(1);
  num_of_kernel_ = this->layer_param_.mmd_param().num_of_kernel(); 
  mmd_lambda_ = this->layer_param_.mmd_param().mmd_lambda();
  iter_of_epoch_ = this->layer_param_.mmd_param().iter_of_epoch();
  fix_gamma_ = this->layer_param_.mmd_param().fix_gamma();
  beta_ = new Dtype[num_of_kernel_];
  caffe_set(num_of_kernel_, Dtype(1.0) / num_of_kernel_, beta_);
  now_iter_ = 0;
  sum_of_epoch_ = new Dtype[num_of_kernel_];
  caffe_set(num_of_kernel_, Dtype(0), sum_of_epoch_);
  gamma_ = Dtype(-1);
  Q_ = new Dtype* [num_of_kernel_];
  for(int i = 0; i < num_of_kernel_; i++){
      Q_[i] = new Dtype[num_of_kernel_];
      caffe_set(num_of_kernel_, Dtype(0), Q_[i]);
  }
  variance_ = new Dtype[num_of_kernel_];
  caffe_set(num_of_kernel_, Dtype(0), variance_);
  sum_of_pure_mmd_ = new Dtype[num_of_kernel_];
  caffe_set(num_of_kernel_, Dtype(0), sum_of_pure_mmd_);
  all_sample_num_ = 0;
  kernel_mul_ = this->layer_param_.mmd_param().kernel_mul();
  if(this->layer_param_.mmd_param().method() == "max"){
        method_number_ = 1;
        top_k_ = this->layer_param_.mmd_param().method_param().top_num();
  }
  else if(this->layer_param_.mmd_param().method() == "none"){
        method_number_ = 0;
  }
  else if(this->layer_param_.mmd_param().method() == "L2"){
        method_number_ = 4;
        top_k_ = this->layer_param_.mmd_param().method_param().top_num();
        I_lambda_ = this->layer_param_.mmd_param().method_param().i_lambda();
  }
  else if(this->layer_param_.mmd_param().method() == "max_ratio"){
        top_k_ = this->layer_param_.mmd_param().method_param().top_num();
        method_number_ = 3;
  }
  LOG(INFO) << this->layer_param_.mmd_param().method() << " num: " << method_number_;
  source_index_ = new int[input_num_];
  target_index_ = new int[input_num_];
  mmd_data_.Reshape(1, 1, 1, data_dim_);
}

template <typename Dtype>
void MMDLossLayer<Dtype>::Reshape(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  NeuronLayer<Dtype>::Reshape(bottom, top);
}

template <typename Dtype>
void MMDLossLayer<Dtype>::Forward_cpu(
  const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
}

template <typename Dtype>
void MMDLossLayer<Dtype>::Backward_cpu(
  const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
  const vector<Blob<Dtype>*>& bottom) {
}

#ifdef CPU_ONLY
STUB_GPU(MMDLossLayer);
#endif

INSTANTIATE_CLASS(MMDLossLayer);
REGISTER_LAYER_CLASS(MMDLoss);

}

   在/caffe-master/src/caffe/layers裏添加mmd_layer.cu文件

#include <algorithm>
#include <cfloat>
#include <vector>
#include <CGAL/basic.h>
#include <CGAL/QP_models.h>
#include <CGAL/QP_functions.h>

#include <CGAL/MP_Float.h>
typedef CGAL::MP_Float ET;

#include "caffe/layer.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/layers/neuron_layer.hpp"
#include "caffe/util/output_matrix.hpp"

typedef CGAL::Quadratic_program_from_iterators
<float **,float*,CGAL::Const_oneset_iterator<CGAL::Comparison_result>,
    bool*, float*,bool*,float*,float**,float*> Program;
typedef CGAL::Quadratic_program_solution<ET> Solution;

namespace caffe {

template <typename Dtype>
void MMDLossLayer<Dtype>::Forward_gpu(
    const vector<Blob<Dtype>*>& top,
const vector<Blob<Dtype>*>& bottom){
}

template <typename Dtype>
void perm_source_and_target(int num, int* source_index, int* target_index, 
        int& size_of_source, int& size_of_target, const Dtype* label){
    int source_pos = 0;
    int target_pos = 0;
    for(int i = 0;i < num;++i){
        if(label[i * 2] < 0){
            //source data
            source_index[source_pos++] = i;
        }
        else{
            //target data
            target_index[target_pos++] = i;
        }
    }
    size_of_source = source_pos;
    size_of_target = target_pos;
}

template <typename Dtype>
std::vector<std::pair<Dtype, int> > maxn(int num_of_max, Dtype* mmd, int num_of_kernel){
    std::vector<std::pair<Dtype, int> > temp;
    for(int i = 0; i < num_of_kernel; i++){
        temp.push_back(std::make_pair(mmd[i], i));
    }
    std::partial_sort(
            temp.begin(), temp.begin() + num_of_max, temp.end(), std::greater<std::pair<Dtype, int> >());
    return temp;
}

template <typename Dtype>
void MMDLossLayer<Dtype>::Backward_gpu(
    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
    if(mmd_lambda_ == 0){
        return;
    }
    now_iter_++;
    Dtype sum;
    caffe_gpu_asum(input_num_ * data_dim_, bottom[0]->gpu_diff(), &sum);
    LOG(INFO) << "before mmd diff " << sum;
    perm_source_and_target<Dtype>(input_num_, source_index_, target_index_, 
            size_of_source_, size_of_target_, bottom[1]->cpu_data());
    int sample_num;
    if (size_of_source_ <= 1 || size_of_target_ <= 1){
        return;
    }
    if(size_of_source_ > size_of_target_){
        sample_num = size_of_source_;
    }
    else{
        sample_num = size_of_target_;
    }
    int s1,s2,t1,t2;
    srand((unsigned int)time(0));
    Dtype* bottom_data = bottom[0]->mutable_gpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
    Dtype* tempX1 = mmd_data_.mutable_gpu_data();
    Dtype* tempX2 = mmd_data_.mutable_gpu_diff();
    
    Dtype square_distance;
    Dtype bandwidth = 0;
    for(int i = 0; i < input_num_; i++){
        s1 = rand() % input_num_;
        s2 = rand() % input_num_;
        s2 = (s1 != s2) ? s2 : (s2 + 1) % input_num_;
        caffe_gpu_memcpy(sizeof(Dtype) * data_dim_, bottom_data + s1 * data_dim_, tempX1);
        caffe_gpu_memcpy(sizeof(Dtype) * data_dim_, bottom_data + s2 * data_dim_, tempX2);
        caffe_gpu_sub<Dtype>(data_dim_, tempX1, tempX2, tempX2);
        caffe_gpu_dot<Dtype>(data_dim_, tempX2, tempX2, &square_distance);
        bandwidth += square_distance;
    }
    if(fix_gamma_){
        gamma_ = gamma_ < 0 ? (Dtype)input_num_ / bandwidth : gamma_;
    } 
    else{
        gamma_ = (Dtype)input_num_ / bandwidth;
    } 
    LOG(INFO) << "bandwidth " << gamma_;
    Dtype loss = 0;

    Dtype* temp_loss1 = new Dtype[num_of_kernel_];
    Dtype* temp_loss2 = new Dtype[num_of_kernel_];
    Dtype* temp_loss3 = new Dtype[num_of_kernel_];
    Dtype* temp_loss4 = new Dtype[num_of_kernel_];

    all_sample_num_ += sample_num;
    for(int i = 0; i < sample_num; i++){
        //random get sample, insert code
        s1 = rand() % size_of_source_;
        s2 = rand() % size_of_source_;
        s2 = (s1 != s2) ? s2 : (s2 + 1) % size_of_source_;

        t1 = rand() % size_of_target_;
        t2 = rand() % size_of_target_;
        t2 = (t1 != t2) ? t2 : (t2 + 1) % size_of_target_;
        
        s1 = source_index_[s1];
        s2 = source_index_[s2];
        t1 = target_index_[t1];
        t2 = target_index_[t2];
        //////////////
        Dtype square_sum = 0;
        Dtype factor_for_diff = 0;
        const Dtype* x_s1 = bottom_data + s1 * data_dim_;
        const Dtype* x_s2 = bottom_data + s2 * data_dim_;
        const Dtype* x_t1 = bottom_data + t1 * data_dim_;
        const Dtype* x_t2 = bottom_data + t2 * data_dim_;

        caffe_gpu_sub<Dtype>(data_dim_, x_s1, x_s2, tempX1);
        caffe_gpu_sub<Dtype>(data_dim_, x_s2, x_s1, tempX2);
        caffe_gpu_dot<Dtype>(data_dim_, tempX1, tempX1, &square_sum);
        Dtype times = pow(kernel_mul_, (Dtype)(num_of_kernel_ / 2));
        Dtype temp_gamma = gamma_ / times;
        for(int j = 0; j < num_of_kernel_; j++){
            Dtype temp_n = (0.0 - temp_gamma) * square_sum;
            temp_n = exp(temp_n);

            sum_of_pure_mmd_[j] += temp_n;
            temp_n = temp_n * beta_[j];
            if(i % 2 == 0){
                temp_loss1[j] = temp_n;
            }
            else{
                temp_loss2[j] = temp_n;
            }
            if(i % 2 == 0){
                temp_loss3[j] = temp_n;
            }
            else{
                temp_loss4[j] = temp_n;
            }

            loss += temp_n;
            temp_n = (-2) * temp_gamma * temp_n;
            sum_of_epoch_[j] += temp_n;
            factor_for_diff += temp_n;
            temp_gamma = temp_gamma * kernel_mul_;
        }
        caffe_gpu_scal(data_dim_, mmd_lambda_ * factor_for_diff / sample_num * Dtype(32), tempX1);
        caffe_gpu_scal(data_dim_, mmd_lambda_ * factor_for_diff / sample_num * Dtype(32), tempX2);
        caffe_gpu_add(data_dim_, tempX1, bottom_diff + s1 * data_dim_, bottom_diff + s1 * data_dim_);
        caffe_gpu_add(data_dim_, tempX2, bottom_diff + s2 * data_dim_, bottom_diff + s2 * data_dim_);
         
        factor_for_diff = 0;
        caffe_gpu_sub<Dtype>(data_dim_, x_s1, x_t2, tempX1);
        caffe_gpu_sub<Dtype>(data_dim_, x_t2, x_s1, tempX2);
        caffe_gpu_dot<Dtype>(data_dim_, tempX1, tempX1, &square_sum);
        temp_gamma = gamma_ / times;
        for(int j = 0; j < num_of_kernel_; j++){
            Dtype temp_n = (0.0 - temp_gamma) * square_sum;
            temp_n = exp(temp_n) * Dtype(-1);

            sum_of_pure_mmd_[j] += temp_n;
            if(i % 2 == 0){
                temp_loss1[j] += temp_n;
            }
            else{
                temp_loss2[j] += temp_n;
            }
            temp_n = temp_n * beta_[j];
            if(i % 2 == 0){
                temp_loss3[j] = temp_n;
            }
            else{
                temp_loss4[j] = temp_n;
            }

            loss += temp_n;
            temp_n = (-2) * temp_gamma * temp_n;
            sum_of_epoch_[j] += temp_n;
            factor_for_diff += temp_n;
            temp_gamma = temp_gamma * kernel_mul_;
        }
        caffe_gpu_scal(data_dim_, mmd_lambda_ * factor_for_diff / sample_num * Dtype(32), tempX1);
        caffe_gpu_scal(data_dim_, mmd_lambda_ * factor_for_diff / sample_num * Dtype(32), tempX2);
        caffe_gpu_add(data_dim_, tempX1, bottom_diff + s1 * data_dim_, bottom_diff + s1 * data_dim_);
        caffe_gpu_add(data_dim_, tempX2, bottom_diff + t2 * data_dim_, bottom_diff + t2 * data_dim_);
         
        factor_for_diff = 0;
        caffe_gpu_sub<Dtype>(data_dim_, x_t1, x_s2, tempX1);
        caffe_gpu_sub<Dtype>(data_dim_, x_s2, x_t1, tempX2);
        caffe_gpu_dot<Dtype>(data_dim_, tempX1, tempX1, &square_sum);
        temp_gamma = gamma_ / times;
        for(int j = 0; j < num_of_kernel_; j++){
            Dtype temp_n = (0.0 - temp_gamma) * square_sum;
            temp_n = exp(temp_n) * Dtype(-1);
            
            sum_of_pure_mmd_[j] += temp_n;
            if(i % 2 == 0){
                temp_loss1[j] += temp_n;
            }
            else{
                temp_loss2[j] += temp_n;
            }
            temp_n = temp_n * beta_[j];
            if(i % 2 == 0){
                temp_loss3[j] = temp_n;
            }
            else{
                temp_loss4[j] = temp_n;
            }
            
            loss += temp_n;
            temp_n = (-2) * temp_gamma * temp_n;
            sum_of_epoch_[j] += temp_n;
            factor_for_diff += temp_n;
            temp_gamma = temp_gamma * kernel_mul_;
        }
        caffe_gpu_scal(data_dim_, mmd_lambda_ * factor_for_diff / sample_num * Dtype(32), tempX1);
        caffe_gpu_scal(data_dim_, mmd_lambda_ * factor_for_diff / sample_num * Dtype(32), tempX2);
        caffe_gpu_add(data_dim_, tempX1, bottom_diff + t1 * data_dim_, bottom_diff + t1 * data_dim_);
        caffe_gpu_add(data_dim_, tempX2, bottom_diff + s2 * data_dim_, bottom_diff + s2 * data_dim_);
        
        factor_for_diff = 0;
        caffe_gpu_sub<Dtype>(data_dim_, x_t1, x_t2, tempX1);
        caffe_gpu_sub<Dtype>(data_dim_, x_t2, x_t1, tempX2);
        caffe_gpu_dot<Dtype>(data_dim_, tempX1, tempX1, &square_sum);
        temp_gamma = gamma_ / times;
        for(int j = 0; j < num_of_kernel_; j++){
            Dtype temp_n = (0.0 - temp_gamma) * square_sum;
            temp_n = exp(temp_n);

            sum_of_pure_mmd_[j] += temp_n;
            if(i % 2 == 0){
                temp_loss1[j] += temp_n;
            }
            else{
                temp_loss2[j] += temp_n;
            }
            temp_n = temp_n * beta_[j];
            if(i % 2 == 0){
                temp_loss3[j] = temp_n;
            }
            else{
                temp_loss4[j] = temp_n;
            }

            loss += temp_n;
            temp_n = (-2) * temp_gamma * temp_n;
            sum_of_epoch_[j] += temp_n;
            factor_for_diff += temp_n;
            temp_gamma = temp_gamma * kernel_mul_;
        }
        caffe_gpu_scal(data_dim_, mmd_lambda_ * factor_for_diff / sample_num * Dtype(32), tempX1);
        caffe_gpu_scal(data_dim_, mmd_lambda_ * factor_for_diff / sample_num * Dtype(32), tempX2);
        caffe_gpu_add(data_dim_, tempX1, bottom_diff + t1 * data_dim_, bottom_diff + t1 * data_dim_);
        caffe_gpu_add(data_dim_, tempX2, bottom_diff + t2 * data_dim_, bottom_diff + t2 * data_dim_);

        if(i % 2 == 1){
            caffe_sub(num_of_kernel_, temp_loss1, temp_loss2, temp_loss1);
            caffe_mul(num_of_kernel_, temp_loss1, temp_loss1, temp_loss1);
            caffe_add(num_of_kernel_, temp_loss1, variance_, variance_);
            caffe_sub(num_of_kernel_, temp_loss3, temp_loss4, temp_loss3);
            caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num_of_kernel_, num_of_kernel_, 1, Dtype(1),temp_loss3, temp_loss3, Dtype(1), Q_[0]);
        }
    }
    delete [] temp_loss1;
    delete [] temp_loss2;
    delete [] temp_loss3;
    delete [] temp_loss4;
    if(now_iter_ >= iter_of_epoch_){
        gamma_ = Dtype(-1);
        now_iter_ = 0;
        //update beta
        //normalize Q and sum_of_epoch_
        caffe_scal(num_of_kernel_ * num_of_kernel_, Dtype(2) / all_sample_num_, Q_[0]);
        caffe_scal(num_of_kernel_, Dtype(1) / all_sample_num_, sum_of_epoch_);
        
        for(int i = 0; i < num_of_kernel_; i++){
            for(int j = 0; j < num_of_kernel_; j++){
                if(i != j){
                    Q_[0][i * num_of_kernel_ + j] = Dtype(0);
                }
                else{
                    Q_[0][i * num_of_kernel_ + j] += I_lambda_;
                }
            }
        }
        //Q <- Q + \lambda I
        if(method_number_ == 4){
            caffe_set(num_of_kernel_ * num_of_kernel_, Dtype(0), Q_[0]);
            for(int i = 0;i < num_of_kernel_;++i){
                Q_[0][(num_of_kernel_ + 1) * i] += Dtype(1);
            }
        }

        //print_gpu_matrix(Q_[0], num_of_kernel_, num_of_kernel_, num_of_kernel_, num_of_kernel_);
        //for(int i = 0; i < num_of_kernel_; i++){
          //  LOG(INFO) << sum_of_pure_mmd_[i];
        //}
        bool temp_bool = true;
        for(int i = 0; i < num_of_kernel_; i++){
            if(sum_of_pure_mmd_[i] > 0){
                temp_bool = false;
            }
        }
        bool has_negative = false;
        for(int i = 0; i < num_of_kernel_; i++){
            if(sum_of_pure_mmd_[i] < 0){
                has_negative = true;
            }
        }
        if(temp_bool){
            caffe_scal(num_of_kernel_ * num_of_kernel_, Dtype(-1), Q_[0]);
        }
        switch(method_number_){
            case 1: 
            { 
                if(has_negative){
                    break;
                }
                //sort by total kernel value
                std::vector<std::pair<Dtype, int> > sorted_kernels = maxn(top_k_, sum_of_pure_mmd_, num_of_kernel_);
                caffe_set(num_of_kernel_, Dtype(0), beta_);
                Dtype top_sum = 0;
                for(int i = 0;i < top_k_;++i){
                    if(sorted_kernels[i].first > 0){
                        top_sum += sorted_kernels[i].first;
                    }
                }    
                LOG(INFO) << "top_sum " << top_sum;
                for(int i = 0;i < top_k_;++i){
                    LOG(INFO) << "mmd " << sorted_kernels[i].first;
                    LOG(INFO) << "id " << sorted_kernels[i].second;
                    if(sorted_kernels[i].first > 0){
                        beta_[sorted_kernels[i].second] = sorted_kernels[i].first / top_sum;
                        LOG(INFO) << "beta " << beta_[sorted_kernels[i].second];
                    }
                } 
                break;
            }
            case 4:
            {
                float *equal_cons[num_of_kernel_];
                bool lw_cons[num_of_kernel_];
                bool up_cons[num_of_kernel_];
                float lw_mul[num_of_kernel_];
                float up_mul[num_of_kernel_];
                float obj_first[num_of_kernel_];
                for(int i = 0; i < num_of_kernel_; i++){
                    equal_cons[i] = new float[1];
                    equal_cons[i][0] = sum_of_pure_mmd_[i];
                    lw_cons[i] = true;
                    up_cons[i] = false;
                    lw_mul[i] = 0.0;
                    up_mul[i] = 0.0;
                    obj_first[i] = Dtype(0);
                }
                float b[1];
                if(temp_bool){
                    b[0] = Dtype(-1);
                }
                else{
                    b[0] = Dtype(1);
                }
                CGAL::Const_oneset_iterator<CGAL::Comparison_result> r(CGAL::EQUAL);
                Program qp(num_of_kernel_, 1, equal_cons, b, r, lw_cons, lw_mul, up_cons, up_mul, (float**)Q_, obj_first, 0);
                Solution s = CGAL::solve_quadratic_program(qp, ET());
                LOG(INFO) << "before s";
                LOG(INFO) << s;
                int j = 0;
                if(!has_negative){
                    for(CGAL::Quadratic_program_solution<ET>::Variable_value_iterator
                            it = s.variable_values_begin();
                            it < s.variable_values_end();++it, ++j){
                        beta_[j] = (Dtype)to_double(*it);
                    }
                    Dtype beta_sum = caffe_cpu_asum(num_of_kernel_, beta_);
                    caffe_scal(num_of_kernel_, 1 / beta_sum, beta_);
                    std::vector<std::pair<Dtype, int> > sorted_betas = maxn(top_k_, beta_, num_of_kernel_);
                    caffe_set(num_of_kernel_, Dtype(0), beta_);
                    Dtype top_sum = 0;
                    for(int i = 0;i < top_k_;++i){
                        if(sorted_betas[i].first > 0){
                            top_sum += sorted_betas[i].first;
                        }
                    }
                    for(int i = 0;i < top_k_;++i){
                        LOG(INFO) << "mmd " << sorted_betas[i].first;
                        LOG(INFO) << "id " << sorted_betas[i].second;
                        if(sorted_betas[i].first > 0){
                            beta_[sorted_betas[i].second] = sorted_betas[i].first / top_sum;
                            LOG(INFO) << "beta " << beta_[sorted_betas[i].second];
                        }
                    } 
                }
                else{
                    LOG(INFO) << "has negative value, do not change beta";
                }
                break;
            }
            case 3:
            {
                for(int i = 0; i < num_of_kernel_; i++){
                    if(sum_of_pure_mmd_[i] != 0){
                        sum_of_pure_mmd_[i] = sum_of_pure_mmd_[i] / (sqrt(variance_[i] + I_lambda_));
                    }
                }
                std::vector<std::pair<Dtype, int> > sorted_kernels = maxn(top_k_, sum_of_pure_mmd_, num_of_kernel_);
                caffe_set(num_of_kernel_, Dtype(0), beta_);
                Dtype top_sum = 0;
                for(int i = 0;i < top_k_;++i){
                    if(sorted_kernels[i].first > 0){
                        top_sum += sorted_kernels[i].first;
                    }
                }    
                LOG(INFO) << "top_sum " << top_sum;
                for(int i = 0;i < top_k_;++i){
                    LOG(INFO) << "mmd " << sorted_kernels[i].first;
                    LOG(INFO) << "id " << sorted_kernels[i].second;
                    if(sorted_kernels[i].first > 0){
                        beta_[sorted_kernels[i].second] = sorted_kernels[i].first / top_sum;
                        LOG(INFO) << "beta " << beta_[sorted_kernels[i].second];
                    }
                } 
                break;
            }
            default:
                break;
        }
        //use Q and sum_of_epoch_ to solve convex problem 
        caffe_set(num_of_kernel_ * num_of_kernel_, Dtype(0), Q_[0]);
        caffe_set(num_of_kernel_, Dtype(0), variance_);
        all_sample_num_ = 0;
        caffe_set(num_of_kernel_, Dtype(0), sum_of_pure_mmd_);
    }

    /*LOG(INFO) << num_of_kernel_;*/
    /*for(int i = 0; i < num_of_kernel_; i++){*/
        /*LOG(INFO) << "kernel" << i << ": " << sum_of_epoch_[i];*/
    /*}*/
    caffe_set(num_of_kernel_, Dtype(0), sum_of_epoch_);

    caffe_gpu_asum(input_num_ * data_dim_, bottom[0]->gpu_diff(), &sum);
    LOG(INFO) << "after mmd diff sum " << sum;
    LOG(INFO) << "------";
}

INSTANTIATE_LAYER_GPU_FUNCS(MMDLossLayer);

}


3.在/caffe-master/src/caffe/proto/caffe.proto裏聲明參數

   只需修改2處:

 1)在message LayerParameter裏增加一個option

 optional MMDParameter mmd_param = 147;

    所取值在message LayerParameter前的註釋會有說明。

// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available layer-specific ID: 147 (last added: recurrent_param)

2)在/caffe-master/src/caffe/proto/caffe.proto裏增加層的參數信息

// Message that stores parameters used by MMD layer
message MMDParameter {
    optional uint32 num_of_kernel = 1 [default = 5];
    optional float mmd_lambda = 2 [default = 1.0];
    optional uint32 iter_of_epoch = 3 [default = 0];
    optional float kernel_mul = 6[default = 2];
    optional MMDMethodParameter method_param = 4;
    optional string method = 5 [default = "none"];
    optional bool fix_gamma = 7 [default = false];
}

message MMDMethodParameter{
    optional uint32 top_num = 1[default = 1];
    optional float i_lambda = 2[default = 0.0001];
}

4.因爲使用到CGAL庫,所有需要將在/home/zhuangni/code/TransferMMD/caffe-mmd/Makefile中添加CGAL 庫

LIBRARIES += glog gflags protobuf boost_system boost_filesystem m hdf5_hl hdf5 CGAL

5.對caffe進行重新編譯即可

6.層的使用

layer {
  name: "mmd3/4d"
  type: "MMDLoss"
  bottom: "fc7"
  bottom: "label"
  top: "fc7"
  mmd_param {
    num_of_kernel: 5
    mmd_lambda: 0.3  // Users only need to tune <code>mmd_lambda</code> parameter.
    iter_of_epoch: 22 //<code>iter_of_epoch = (source_num + target_num) / batch_size</code>
    method_param {
      top_num: 5
      i_lambda: 0.0
    }
    method: "none"
    kernel_mul: 2.0
    fix_gamma: false
  }
}

<Learning Transferable Features with Deep Adaptation Networks> code github地址:https://github.com/littletotoro/mmd-caffe






 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章