caffe源碼解讀（６）－數據讀取層DataLayer

數據讀取層，除了讀取LMDB、LEVELDB之外，也可以從原始圖像直接讀取(ImageDataLayer).

一.數據結構描述

message DataParameter {
  enum DB {
    LEVELDB = 0;//使用LEVELDB
    LMDB = 1;//使用LMDB
  }
  optional string source = 1;
  optional uint32 batch_size = 4;
  optional uint32 rand_skip = 7 [default = 0];
  optional DB backend = 8 [default = LEVELDB];
  optional float scale = 2 [default = 1];
  optional string mean_file = 3;
  optional uint32 crop_size = 5 [default = 0];
  optional bool mirror = 6 [default = false];
  //強制編碼圖像爲３通道彩色圖像
  optional bool force_encoded_color = 9 [default = false];
  //預取隊列，預先放到主機內存中的隊列數：默認爲４個Batch
  optional uint32 prefetch = 10 [default = 4];
}

二.數據讀取層實現

數據讀取層實現位於src/caffe/layers/base_data_layer.cpp中，內容如下：
(1)BaseDataLayer

template <typename Dtype>
//構造函數，初始化Layer參數、數據變換器transform參數
BaseDataLayer<Dtype>::BaseDataLayer(const LayerParameter& param)
    : Layer<Dtype>(param),
      transform_param_(param.transform_param()) {
}
//BaseDataLayer層設置
template <typename Dtype>
void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  if (top.size() == 1) {//判斷輸出Blob個數，若爲１只輸出data，若爲２則輸出data和label.
    output_labels_ = false;
  } else {
    output_labels_ = true;
  }
  //初始化數據變換器對象
  data_transformer_.reset(
      new DataTransformer<Dtype>(transform_param_, this->phase_));
  data_transformer_->InitRand();//生成隨機數種子
  // The subclasses should setup the size of bottom and top
  DataLayerSetUp(bottom, top);//子類設置bottom和top的形狀
}

(2)BasePrefetchingDataLayer

template <typename Dtype>
//BasePrefetchingDataLayer構造函數
BasePrefetchingDataLayer<Dtype>::BasePrefetchingDataLayer(
    const LayerParameter& param)
    : BaseDataLayer<Dtype>(param),
      prefetch_(param.data_param().prefetch()),
      prefetch_free_(), prefetch_full_(), prefetch_current_() {
  for (int i = 0; i < prefetch_.size(); ++i) {
    prefetch_[i].reset(new Batch<Dtype>());
    prefetch_free_.push(prefetch_[i].get());//將Batch對象都放入空閒隊列中
  }
}
//BasePrefetchingDataLayer層配置函數
template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  BaseDataLayer<Dtype>::LayerSetUp(bottom, top);

  // Before starting the prefetch thread, we make cpu_data and gpu_data
  // calls so that the prefetch thread does not accidentally make simultaneous
  // cudaMalloc calls when the main thread is running. In some GPUs this
  // seems to cause failures if we do not so.
  //編譯CPU代碼
  for (int i = 0; i < prefetch_.size(); ++i) {
    prefetch_[i]->data_.mutable_cpu_data();
    if (this->output_labels_) {
      prefetch_[i]->label_.mutable_cpu_data();
    }
  }
//如果編譯選項沒有CPU_ONLY，則需要編譯GPU代碼
#ifndef CPU_ONLY
  if (Caffe::mode() == Caffe::GPU) {
    for (int i = 0; i < prefetch_.size(); ++i) {
      prefetch_[i]->data_.mutable_gpu_data();
      if (this->output_labels_) {
        prefetch_[i]->label_.mutable_gpu_data();
      }
    }
  }
#endif
  DLOG(INFO) << "Initializing prefetch";
  this->data_transformer_->InitRand();
  StartInternalThread();//開啓內部預取線程
  DLOG(INFO) << "Prefetch initialized.";
}

(3)InternalThreadEntry內部預取線程入口

template <typename Dtype>
//內部線程入口
void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
//創建CUDA Stream,非阻塞類型
#ifndef CPU_ONLY
  cudaStream_t stream;
  if (Caffe::mode() == Caffe::GPU) {
    CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
  }
#endif

  try {
    while (!must_stop()) {//循環載入批量數據
      Batch<Dtype>* batch = prefetch_free_.pop();//拿到一個空閒的batch
      load_batch(batch);//載入批量數據
#ifndef CPU_ONLY
      if (Caffe::mode() == Caffe::GPU) {
        batch->data_.data().get()->async_gpu_push(stream);
        if (this->output_labels_) {
          batch->label_.data().get()->async_gpu_push(stream);
        }
        CUDA_CHECK(cudaStreamSynchronize(stream));//同步到GPU
      }
#endif
      prefetch_full_.push(batch);//加入到帶負載的Batch隊列
    }
  } catch (boost::thread_interrupted&) {//捕獲到異常退出while循環
    // Interrupted exception is expected on shutdown
  }
#ifndef CPU_ONLY
  if (Caffe::mode() == Caffe::GPU) {
    CUDA_CHECK(cudaStreamDestroy(stream));//銷燬CUDA Stream
  }
#endif
}

(4)Forward_cpu前向傳播

template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  //從帶負載的隊列中取出一個Batch對象
  if (prefetch_current_) {
    prefetch_free_.push(prefetch_current_);
  }
  prefetch_current_ = prefetch_full_.pop("Waiting for data");
  // Reshape to loaded data.
  //Top Blob根據Batch中的data_形狀進行變形
  top[0]->ReshapeLike(prefetch_current_->data_);
  top[0]->set_cpu_data(prefetch_current_->data_.mutable_cpu_data());
  if (this->output_labels_) {//如果輸出標籤數據
    // Reshape to loaded labels.
     //Top Blob根據Batch中的label_形狀進行變形
    top[1]->ReshapeLike(prefetch_current_->label_);
    top[1]->set_cpu_data(prefetch_current_->label_.mutable_cpu_data());
  }
}