在c++程序中調用caffe訓練完畢的模型進行分類

在各位朋友從github下載caffe源碼時，在源碼中有一個example文件夾，在example文件夾中有一個cpp_classification的文件夾，打開它，有一個名爲classification的cpp文件，這就是caffe提供給我們的調用分類網絡進行前向計算，得到分類結果的接口，那麼，讓我們先來解析一下這個classification.cpp文件，按照慣例，先將源碼及注釋放出：

#include <caffe/caffe.hpp>
#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif  // USE_OPENCV
#include <algorithm>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
#include <vector>
 
#ifdef USE_OPENCV
using namespace caffe;  // NOLINT(build/namespaces)
using std::string;
 
/* Pair (label, confidence) representing a prediction. */
typedef std::pair<string, float> Prediction;//記錄每一個類的名稱以及概率
 
//Classifier爲構造函數，主要進行模型初始化，讀入訓練完畢的模型參數，均值文件和標籤文件
class Classifier {
 public:
  Classifier(const string& model_file,//model_file爲測試模型時記錄網絡結構的prototxt文件路徑
             const string& trained_file,//trained_file爲訓練完畢的caffemodel文件路徑
             const string& mean_file,//mean_file爲記錄數據集均值的文件路徑，數據集均值的文件的格式通常爲binaryproto
             const string& label_file);//label_file爲記錄類別標籤的文件路徑，標籤通常記錄在一個txt文件中，一行一個
 
  std::vector<Prediction> Classify(const cv::Mat& img, int N = 5);//Classify函數去進行網絡前傳，得到img屬於各個類的概率
 
 private:
  void SetMean(const string& mean_file);//SetMean函數主要進行均值設定，每張檢測圖輸入後會進行減去均值的操作，這個均值可以是模型使用的數據集圖像的均值
 
  std::vector<float> Predict(const cv::Mat& img);//Predict函數是Classify函數的主要組成部分，將img送入網絡進行前向傳播，得到最後的類別
 
  void WrapInputLayer(std::vector<cv::Mat>* input_channels);//WrapInputLayer函數將img各通道(input_channels)放入網絡的輸入blob中
 
  void Preprocess(const cv::Mat& img,
                  std::vector<cv::Mat>* input_channels);//Preprocess函數將輸入圖像img按通道分開(input_channels)
 
 private:
  shared_ptr<Net<float> > net_;//net_表示caffe中的網絡
  cv::Size input_geometry_;//input_geometry_表示了輸入圖像的高寬，同時也是網絡數據層中單通道圖像的高寬
  int num_channels_;//num_channels_表示了輸入圖像的通道數
  cv::Mat mean_;//mean_表示了數據集的均值，格式爲Mat
  std::vector<string> labels_;//字符串向量labels_表示了各個標籤
};
 
//構造函數Classifier進行了各種各樣的初始化工作，並對網絡的安全進行了檢驗
Classifier::Classifier(const string& model_file,
                       const string& trained_file,
                       const string& mean_file,
                       const string& label_file) {
#ifdef CPU_ONLY
  Caffe::set_mode(Caffe::CPU);//如果caffe是隻在cpu上運行的，將運行模式設置爲CPU
#else
  Caffe::set_mode(Caffe::GPU);//一般我們都是用的GPU模式
#endif
 
  /* Load the network. */
  net_.reset(new Net<float>(model_file, TEST));//從model_file路徑下的prototxt初始化網絡結構
  net_->CopyTrainedLayersFrom(trained_file);//從trained_file路徑下的caffemodel文件讀入訓練完畢的網絡參數
 
  CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input.";//覈驗是不是隻輸入了一張圖像，輸入的blob結構爲(N,C,H,W)，在這裏，N只能爲1
  CHECK_EQ(net_->num_outputs(), 1) << "Network should have exactly one output.";//覈驗輸出的blob結構，輸出的blob結構同樣爲(N,C,W,H)，在這裏，N同樣只能爲1
 
  Blob<float>* input_layer = net_->input_blobs()[0];//獲取網絡輸入的blob，表示網絡的數據層
  num_channels_ = input_layer->channels();//獲取輸入的通道數
  CHECK(num_channels_ == 3 || num_channels_ == 1)//覈驗輸入圖像的通道數是否爲3或者1，網絡只接收3通道或1通道的圖片
    << "Input layer should have 1 or 3 channels.";
  input_geometry_ = cv::Size(input_layer->width(), input_layer->height());//獲取輸入圖像的尺寸(寬與高)
 
  /* Load the binaryproto mean file. */
  SetMean(mean_file);//進行均值的設置
 
  /* Load labels. */
  std::ifstream labels(label_file.c_str());//從標籤文件路徑讀入定義的標籤文件
  CHECK(labels) << "Unable to open labels file " << label_file;
  string line;//line獲取標籤文件中的每一行(每一個標籤)
  while (std::getline(labels, line))
    labels_.push_back(string(line));//將所有的標籤放入labels_
 
  /*output_layer指向網絡最後的輸出，舉個例子，最後的分類器採用softmax分類，且類別有10類，那麼，輸出的blob就會有10個通道，每個通道的長
  寬都爲1(因爲是10個數，這10個數表徵輸入屬於10類中每一類的概率，這10個數之和應該爲1)，輸出blob的結構爲(1,10,1,1)*/
  Blob<float>* output_layer = net_->output_blobs()[0];
  CHECK_EQ(labels_.size(), output_layer->channels())//在這裏覈驗最後網絡輸出的通道數是否等於定義的標籤的通道數
    << "Number of labels is different from the output layer dimension.";
}
 
static bool PairCompare(const std::pair<float, int>& lhs,
                        const std::pair<float, int>& rhs) {
  return lhs.first > rhs.first;
}//PairCompare函數比較分類得到的物體屬於某兩個類別的概率的大小，若屬於lhs的概率大於屬於rhs的概率，返回真，否則返回假
 
/* Return the indices of the top N values of vector v. */
/*Argmax函數返回前N個得分概率的類標*/
static std::vector<int> Argmax(const std::vector<float>& v, int N) {
  std::vector<std::pair<float, int> > pairs;
  for (size_t i = 0; i < v.size(); ++i)
    pairs.push_back(std::make_pair(v[i], i));//按照分類結果存儲輸入屬於每一個類的概率以及類標
  std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end(), PairCompare);/*partial_sort函數按照概率大
  小篩選出pairs中概率最大的N個組合，並將它們按照概率從大到小放在pairs的前N個位置*/
 
  std::vector<int> result;
  for (int i = 0; i < N; ++i)
    result.push_back(pairs[i].second);//將前N個較大的概率對應的類標放在result中
  return result;
}
 
/* Return the top N predictions. */
std::vector<Prediction> Classifier::Classify(const cv::Mat& img, int N) {
  std::vector<float> output = Predict(img);//進行網絡的前向傳輸，得到輸入屬於每一類的概率，存儲在output中
 
  N = std::min<int>(labels_.size(), N);//找到想要得到的概率較大的前N類，這個N應該小於等於總的類別數目
  std::vector<int> maxN = Argmax(output, N);//找到概率最大的前N類，將他們按概率由大到小將類標存儲在maxN中
  std::vector<Prediction> predictions;
  for (int i = 0; i < N; ++i) {
    int idx = maxN[i];
    predictions.push_back(std::make_pair(labels_[idx], output[idx]));//在labels_找到分類得到的概率最大的N類對應的實際的名稱
  }
 
  return predictions;
}
 
/* Load the mean file in binaryproto format. */
void Classifier::SetMean(const string& mean_file) {//設置數據集的平均值
  BlobProto blob_proto;
  ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);//用定義的均值文件路徑將均值文件讀入proto中
 
  /* Convert from BlobProto to Blob<float> */
  Blob<float> mean_blob;
  mean_blob.FromProto(blob_proto);//將proto中存儲的均值文件轉移到blob中
  CHECK_EQ(mean_blob.channels(), num_channels_)//覈驗均值的通道數是否等於輸入圖像的通道數，如果不相等的話則爲異常
    << "Number of channels of mean file doesn't match input layer.";
 
  /* The format of the mean file is planar 32-bit float BGR or grayscale. */
  std::vector<cv::Mat> channels;//將mean_blob中的數據轉化爲Mat時的存儲向量
  float* data = mean_blob.mutable_cpu_data();//指向均值blob的指針
  for (int i = 0; i < num_channels_; ++i) {
    /* Extract an individual channel. */
    cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data);//存儲均值文件的每一個通道轉化得到的Mat
    channels.push_back(channel);//將均值文件的所有通道轉化成的Mat一個一個地存儲到channels中
    data += mean_blob.height() * mean_blob.width();//在均值文件上移動一個通道
  }
 
  /* Merge the separate channels into a single image. */
  cv::Mat mean;
  cv::merge(channels, mean);//將得到的所有通道合成爲一張圖
 
  /* Compute the global mean pixel value and create a mean image
   * filled with this value. */
  cv::Scalar channel_mean = cv::mean(mean);//求得均值文件的每個通道的平均值，記錄在channel_mean中
  mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean);//用上面求得的各個通道的平均值初始化mean_，作爲數據集圖像的均值
}
 
std::vector<float> Classifier::Predict(const cv::Mat& img) {
  Blob<float>* input_layer = net_->input_blobs()[0];//input_layer是網絡的輸入blob
  input_layer->Reshape(1, num_channels_,
                       input_geometry_.height, input_geometry_.width);//表示網絡只輸入一張圖像，圖像的通道數是num_channels_，高爲input_geometry_.height，寬爲input_geometry_.width
  /* Forward dimension change to all layers. */
  net_->Reshape();//初始化網絡的各層
 
  std::vector<cv::Mat> input_channels;//存儲輸入圖像的各個通道
  WrapInputLayer(&input_channels);//將存儲輸入圖像的各個通道的input_channels放入網絡的輸入blob中
  Preprocess(img, &input_channels);//將img的各通道分開並存儲在input_channels中
 
  net_->Forward();//進行網絡的前向傳輸
 
  /* Copy the output layer to a std::vector */
  Blob<float>* output_layer = net_->output_blobs()[0];//output_layer指向網絡輸出的數據，存儲網絡輸出數據的blob的規格是(1,c,1,1)
  const float* begin = output_layer->cpu_data();//begin指向輸入數據對應的第一類的概率
  const float* end = begin + output_layer->channels();//end指向輸入數據對應的最後一類的概率
  return std::vector<float>(begin, end);//返回輸入數據經過網絡前向計算後輸出的對應於各個類的分數
}
 
/* Wrap the input layer of the network in separate cv::Mat objects
 * (one per channel). This way we save one memcpy operation and we
 * don't need to rely on cudaMemcpy2D. The last preprocessing
 * operation will write the separate channels directly to the input
 * layer. */
void Classifier::WrapInputLayer(std::vector<cv::Mat>* input_channels) {
  Blob<float>* input_layer = net_->input_blobs()[0];//input_layer指向網絡輸入的blob
 
  int width = input_layer->width();//得到網絡指定的輸入圖像的寬
  int height = input_layer->height();//得到網絡指定的輸入圖像的高
  float* input_data = input_layer->mutable_cpu_data();//input_data指向網絡的輸入blob
  for (int i = 0; i < input_layer->channels(); ++i) {
    cv::Mat channel(height, width, CV_32FC1, input_data);//將網絡輸入blob的數據同Mat關聯起來
    input_channels->push_back(channel);//將上面的Mat同input_channels關聯起來
    input_data += width * height;//一個一個通道地操作
  }
}
 
void Classifier::Preprocess(const cv::Mat& img,
                            std::vector<cv::Mat>* input_channels) {
  /* Convert the input image to the input image format of the network. */
  cv::Mat sample;
  if (img.channels() == 3 && num_channels_ == 1)
    cv::cvtColor(img, sample, cv::COLOR_BGR2GRAY);
  else if (img.channels() == 4 && num_channels_ == 1)
    cv::cvtColor(img, sample, cv::COLOR_BGRA2GRAY);
  else if (img.channels() == 4 && num_channels_ == 3)
    cv::cvtColor(img, sample, cv::COLOR_BGRA2BGR);
  else if (img.channels() == 1 && num_channels_ == 3)
    cv::cvtColor(img, sample, cv::COLOR_GRAY2BGR);
  else
    sample = img;//if-else嵌套表示了要將輸入的img轉化爲num_channels_通道的
 
  cv::Mat sample_resized;
  if (sample.size() != input_geometry_)
    cv::resize(sample, sample_resized, input_geometry_);//將輸入圖像的尺寸強制轉化爲網絡規定的輸入尺寸
  else
    sample_resized = sample;
 
  cv::Mat sample_float;
  if (num_channels_ == 3)
    sample_resized.convertTo(sample_float, CV_32FC3);
  else
    sample_resized.convertTo(sample_float, CV_32FC1);//將輸入圖像轉化成爲網絡前傳合法的數據規格
 
  cv::Mat sample_normalized;
  cv::subtract(sample_float, mean_, sample_normalized);//將圖像減去均值
 
  /* This operation will write the separate BGR planes directly to the
   * input layer of the network because it is wrapped by the cv::Mat
   * objects in input_channels. */
  cv::split(sample_normalized, *input_channels);/*將減去均值的圖像分散在input_channels中，由於在WrapInputLayer函數中，
  input_channels已經和網絡的輸入blob關聯起來了，因此在這裏實際上是把圖像送入了網絡的輸入blob*/
 
  CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
        == net_->input_blobs()[0]->cpu_data())
    << "Input channels are not wrapping the input layer of the network.";//覈驗圖像是否被送入了網絡作爲輸入
}
 
int main(int argc, char** argv) {//主函數
  if (argc != 6) {/*覈驗命令行參數是否爲6，這6個參數分別爲classification編譯生成的可執行文件，測試模型時記錄網絡結構的prototxt文件路徑，
  訓練完畢的caffemodel文件路徑，記錄數據集均值的文件路徑，記錄類別標籤的文件路徑，需要送入網絡進行分類的圖片文件路徑*/
    std::cerr << "Usage: " << argv[0]
              << " deploy.prototxt network.caffemodel"
              << " mean.binaryproto labels.txt img.jpg" << std::endl;
    return 1;
  }
 
  ::google::InitGoogleLogging(argv[0]);//InitGoogleLogging做了一些初始化glog的工作
 
  //取四個參數
  string model_file   = argv[1];
  string trained_file = argv[2];
  string mean_file    = argv[3];
  string label_file   = argv[4];
  Classifier classifier(model_file, trained_file, mean_file, label_file);//進行檢測網絡的初始化
 
  string file = argv[5];//取得需要進行檢測的圖片的路徑
 
  std::cout << "---------- Prediction for "
            << file << " ----------" << std::endl;
 
  cv::Mat img = cv::imread(file, -1);//讀入圖片
  CHECK(!img.empty()) << "Unable to decode image " << file;
  std::vector<Prediction> predictions = classifier.Classify(img);//進行網絡的前向計算，並且取到概率最大的前N類對應的類別名稱
 
  /* Print the top N predictions. */
  for (size_t i = 0; i < predictions.size(); ++i) {//打印出概率最大的前N類並給出概率
    Prediction p = predictions[i];
    std::cout << std::fixed << std::setprecision(4) << p.second << " - \""
              << p.first << "\"" << std::endl;
  }
}
#else
int main(int argc, char** argv) {
  LOG(FATAL) << "This example requires OpenCV; compile with USE_OPENCV.";
}
#endif  // USE_OPENCV

以上是classification.cpp的源碼，在這個文件中，有一個類Classifier，而在這個Classifier主要由兩個部分組成，首先第一個部分是這個類的構造函數Classifier：

Classifier::Classifier(const string& model_file,
                       const string& trained_file,
                       const string& mean_file,
                       const string& label_file)

構造函數的主要作用是對操作網絡進行前傳得到分類結果的類對象進行初始化，初始化工作包括如下部分：
設置caffe的工作模式(CPU/GPU)->讀取網絡結構->讀取訓練得到的網絡參數->獲取網絡規定的單張輸入圖片的尺寸(寬與高)->讀取數據集的均值文件->讀取定義的所有類別標籤(類別名稱)

值得一提的是，在構造函數中讀取數據集的均值文件時候，使用了一個SetMean函數，該函數的主要作用是將均值文件(通常是binaryproto格式)讀到proto中，再由FromProto函數將proto中的均值文件讀取到blob中。

在構造函數中，還進行了其他的一些覈驗的工作，比如檢驗是不是隻輸入了一張圖像(進行模型調用時只輸入單張圖像)，檢驗模型輸出結果的blob中的n是否爲1，檢驗輸入圖像的通道數是否爲3或者1，檢驗網絡最後輸出的通道數是否等於標籤文檔中定義的標籤的數目。同時，在SetMean函數中，檢驗了均值blob的通道數是否等於輸入圖像的通道數(加入輸入圖像是三通道的，那麼R，G，B通道對應各自的均值)。

除了構造函數，第二部分是進行網絡前傳得到分類結果的Classify函數：

std::vector<Prediction> Classify(const cv::Mat& img, int N = 5)

Classify函數接受單張圖片，並得到概率最大的前N類結果，在這裏N默認爲5，Classify函數的核心爲Predict函數：

std::vector<float> Predict(const cv::Mat& img)

Predict函數進行網絡的前傳，如果網絡用softmax分類器的話，則返回的是網絡輸入對應於每一個類別的概率，這些分數存儲在一個vector<float>中。在這裏筆者舉個例子，分類網絡中分類器採用softmax分類，且類別有10類，那麼，輸出的blob就會有10個通道，每個通道的長寬都爲1(因爲是10個數，這10個數表徵輸入屬於10類中每一類的概率，這10個數之和應該爲1)，而這10個float數就會存儲在Predict中。Predict函數主要進行了以下的工作：
進行網絡輸入blob的初始化->進行網絡中各層的初始化->將輸入圖像的各個通道放入網絡的輸入blob中->進行網絡的前向傳播->獲取輸入圖片屬於每一個類別的概率

其中，上述各步驟中的第三步非常巧妙，caffe是在WrapInputLayer函數中首先將網絡的輸入blob同一個vector<Mat>* input_channels關聯起來，再在Preprocess函數中將輸入圖像逐通道放入input_channels中，這時，輸入圖像就被寫入到了輸入blob中。

參考文獻：

https://blog.csdn.net/jiongnima/article/details/70197866?depth_1-utm_source=distribute.pc_relevant.none-task&utm_source=distribute.pc_relevant.none-task

在c++程序中調用caffe訓練完畢的模型進行分類

如何使用 JS 判斷用戶是否處於活躍狀態

Mono 支持LoongArch架構

lightdb秒級增加列和刪除列（not null帶默認值）

lightdb數據庫超時相關控制參數

通過HPA+CronHPA組合應對業務複雜彈性伸縮場景

❤️‍🔥 Solon Cloud Event 新的事務特性與應用

lightdb mysql 8.0兼容之不可見主鍵

使用 JS 實現在瀏覽器控制檯打印圖片 console.image()

基於Ubuntu-22.04安裝K8s-v1.28.2實驗（四）使用域名訪問網站應用

caffe不同lr_policy參數設置方法

權重衰減（weight decay）與L2正則化

caffe常用網絡層及參數說明

不同車牌類型尺寸

進程與線程（代碼對比）

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結