最近在用C++做手寫體識別,踩了許多坑。。網上使用SVM的教程遇到的都比較坑,看了半天沒怎麼涉及原理,而代碼又比較亂,沒怎麼介紹,害我搞了一下午,所以就很煩,所幸最後終於找到了方法,所以想把這段比較痛苦的經歷記錄下來,造福後人。如果是想從本文弄懂原理的話,那比較抱歉。
說明
實驗環境是:VS2017 + OpenCV3.4.0+win10;
關於配置OpenCV.3.4.0, 整個過程的步驟比較簡單,結合代碼,從讀取數據,訓練和預測三個方面來展開。
讀取數據
先從官網下載好四個數據集,鏈接可以看這裏。
在讀取的時候,我對mnist數據集進行了二值化,將大於0的數據置爲255。
關於讀取步驟不多講,直接上代碼。
- 傳入文件名,讀取標籤集,將標籤數據轉爲Mat矩陣。讀取數據的格式需要指定爲CV_32SC1。這一步很重要!!其他版本可能不同,但是如果是其他類型的需要convertTo來轉換,不然在訓練數據的時候會報錯。
void read_Mnist_Label(string filename, Mat* &trainLabel)
{
ifstream file(filename, ios::binary);
if (file.is_open())
{
int magic_number = 0;
int number_of_images = 0;
file.read((char*)&magic_number, sizeof(magic_number));
file.read((char*)&number_of_images, sizeof(number_of_images));
magic_number = ReverseInt(magic_number);
number_of_images = ReverseInt(number_of_images);
cout << "magic number = " << magic_number << endl;
cout << "number of images = " << number_of_images << endl;
trainLabel = new Mat(number_of_images, 1, CV_32SC1);
for (int i = 0; i < number_of_images; i++)
{
unsigned char label = 0;
file.read((char*)&label, sizeof(label));
if (label > 0) label = 255;
trainLabel->at<float>(i, 0) = label;
//cout << "Label: " << labels[i] << endl;
}
}
}
- 傳入文件名,讀取訓練數據和測試數據集,將數據轉爲Mat矩陣。讀取數據的格式需要指定爲CV_32F,浮點型。這一步很重要!!其他版本可能不同,不然的話訓練會報莫名的錯誤。
void read_Mnist_Images(string filename, Mat* &trainImages)
{
ifstream file(filename, ios::binary);
if (file.is_open())
{
int magic_number = 0;
int number_of_images = 0;
int n_rows = 0;
int n_cols = 0;
file.read((char*)&magic_number, sizeof(magic_number));
file.read((char*)&number_of_images, sizeof(number_of_images));
file.read((char*)&n_rows, sizeof(n_rows));
file.read((char*)&n_cols, sizeof(n_cols));
magic_number = ReverseInt(magic_number);
number_of_images = ReverseInt(number_of_images);
n_rows = ReverseInt(n_rows);
n_cols = ReverseInt(n_cols);
cout << "magic number = " << magic_number << endl;
cout << "number of images = " << number_of_images << endl;
cout << "rows = " << n_rows << endl;
cout << "cols = " << n_cols << endl;
trainImages = new Mat(number_of_images, n_rows * n_cols, CV_32F);
for (int i = 0; i < number_of_images; i++)
{
for (int r = 0; r < n_rows; r++)
{
for (int c = 0; c < n_cols; c++)
{
unsigned char image = 0;
file.read((char*)&image, sizeof(image));
if (image > 0) image = 255;
trainImages->at<float>(i, r * n_cols + c) = image;
//if (i == 9999) cout << "IMAGE: " << i << " " << r * n_cols + c << " " << images[i][r * n_cols + c ] << endl;
//cout << images[i][r * n_cols + c] << endl;
}
}
}
}
}
其中數據需要從小端轉爲大端模式。
int ReverseInt(int i)
{
unsigned char ch1, ch2, ch3, ch4;
ch1 = i & 255;
ch2 = (i >> 8) & 255;
ch3 = (i >> 16) & 255;
ch4 = (i >> 24) & 255;
return((int)ch1 << 24) + ((int)ch2 << 16) + ((int)ch3 << 8) + ch4;
}
使用方法如下:
// 訓練 加載模型
// 讀取訓練樣本的數據
Mat* trainingDataMat = nullptr;
read_Mnist_Images("mnist_dataset/train-images.idx3-ubyte", trainingDataMat);
//訓練樣本的響應值
Mat* responsesMat = nullptr;
read_Mnist_Label("mnist_dataset/train-labels.idx1-ubyte", responsesMat);
Mat* testImage = nullptr;
Mat* testLabel = nullptr;
read_Mnist_Images("mnist_dataSet/t10k-images.idx3-ubyte", testImage);
read_Mnist_Label("mnist_dataSet/t10k-labels.idx1-ubyte", testLabel);
訓練
我是直接使用opencv內置的函數,所以整個過程關於訓練部分代碼量比較少。我CPU爲 i5,大概訓練時間爲2min。
主要有以下幾個步驟。
- 聲明SVM分類器
- 設置SVM參數
- 提取數據後開始訓練
- 保存訓練結果
- 創建SVM分類器,然後設置參數。
// 創建分類器並設置參數
Ptr<SVM> SVM_params = SVM::create();
SVM_params->setType(SVM::C_SVC); //C_SVC用於分類,C_SVR用於迴歸
SVM_params->setKernel(SVM::RBF);
// 註釋掉部分對本項目不影響,影響因子只有兩個
//SVM_params->setDegree(0); //核函數中的參數degree,針對多項式核函數;
SVM_params->setGamma(0.50625); //核函數中的參數gamma,針對多項式/RBF/SIGMOID核函數;
//SVM_params->setCoef0(0); //核函數中的參數,針對多項式/SIGMOID核函數;
SVM_params->setC(12.5); //SVM最優問題參數,設置C-SVC,EPS_SVR和NU_SVR的參數;
//SVM_params->setNu(0); //SVM最優問題參數,設置NU_SVC, ONE_CLASS 和NU_SVR的參數;
//SVM_params->setP(0); //SVM最優問題參數,設置EPS_SVR 中損失函數p的值.
//結束條件,即訓練1000次或者誤差小於0.01結束
SVM_params->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 0.01));
- 兩行代碼完成訓練。
Ptr<TrainData> tData = TrainData::create(*trainingDataMat, ROW_SAMPLE, *responsesMat);
SVM_params->train(tData);//訓練
-
保存模型
SVM_params->save("svm.xml");
預測
當訓練過後,可直接加載分類器模型,然後提取數據進行預測。
- 加載模型
SVM_params = SVM::load("svm.xml");
- 預測
int count = 0; // 統計正確個數
Mat* testImage = nullptr;
Mat* testLabel = nullptr;
read_Mnist_Images("mnist_dataSet/t10k-images.idx3-ubyte", testImage);
read_Mnist_Label("mnist_dataSet/t10k-labels.idx1-ubyte", testLabel);
int height = testImage->size().height; // 測試圖片的數量
int width = testImage->size().width; // 圖片的維度
for (int i = 0; i < height; i++) { // 遍歷所有測試圖片
Mat image(1, width, CV_32F); // 單張圖片
for (int j = 0; j < width; j++) { //
image.at<float>(0, j) = testImage->at<float>(i, j);
}
//cout << image.size().height << " " << image.size().width << " " << endl;
//cout << image.cols << " " << image.rows << " " << endl;
//cout << SVM_params->getVarCount() << " " << endl;
if (SVM_params->predict(image)) {
count++;
}
}
cout << "訓練預測的準確率爲:" << (double)count / height << endl;
system("pause");
完整源碼
#include <stdio.h>
#include <time.h>
#include <opencv2/opencv.hpp>
#include <opencv/cv.h>
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/ml/ml.hpp>
using namespace std;
using namespace cv;
using namespace ml;
Mat dealimage;
Mat src;
Mat yangben_gray;
Mat yangben_thresh;
int ReverseInt(int i)
{
unsigned char ch1, ch2, ch3, ch4;
ch1 = i & 255;
ch2 = (i >> 8) & 255;
ch3 = (i >> 16) & 255;
ch4 = (i >> 24) & 255;
return((int)ch1 << 24) + ((int)ch2 << 16) + ((int)ch3 << 8) + ch4;
}
void read_Mnist_Label(string filename, Mat* &trainLabel)
{
ifstream file(filename, ios::binary);
if (file.is_open())
{
int magic_number = 0;
int number_of_images = 0;
file.read((char*)&magic_number, sizeof(magic_number));
file.read((char*)&number_of_images, sizeof(number_of_images));
magic_number = ReverseInt(magic_number);
number_of_images = ReverseInt(number_of_images);
cout << "magic number = " << magic_number << endl;
cout << "number of images = " << number_of_images << endl;
trainLabel = new Mat(number_of_images, 1, CV_32SC1);
for (int i = 0; i < number_of_images; i++)
{
unsigned char label = 0;
file.read((char*)&label, sizeof(label));
if (label > 0) label = 255;
trainLabel->at<float>(i, 0) = label;
//cout << "Label: " << labels[i] << endl;
}
}
}
void read_Mnist_Images(string filename, Mat* &trainImages)
{
ifstream file(filename, ios::binary);
if (file.is_open())
{
int magic_number = 0;
int number_of_images = 0;
int n_rows = 0;
int n_cols = 0;
file.read((char*)&magic_number, sizeof(magic_number));
file.read((char*)&number_of_images, sizeof(number_of_images));
file.read((char*)&n_rows, sizeof(n_rows));
file.read((char*)&n_cols, sizeof(n_cols));
magic_number = ReverseInt(magic_number);
number_of_images = ReverseInt(number_of_images);
n_rows = ReverseInt(n_rows);
n_cols = ReverseInt(n_cols);
cout << "magic number = " << magic_number << endl;
cout << "number of images = " << number_of_images << endl;
cout << "rows = " << n_rows << endl;
cout << "cols = " << n_cols << endl;
trainImages = new Mat(number_of_images, n_rows * n_cols, CV_32F);
for (int i = 0; i < number_of_images; i++)
{
for (int r = 0; r < n_rows; r++)
{
for (int c = 0; c < n_cols; c++)
{
unsigned char image = 0;
file.read((char*)&image, sizeof(image));
if (image > 0) image = 255;
trainImages->at<float>(i, r * n_cols + c) = image;
//if (i == 9999) cout << "IMAGE: " << i << " " << r * n_cols + c << " " << images[i][r * n_cols + c ] << endl;
//cout << images[i][r * n_cols + c] << endl;
}
}
}
}
}
int main()
{
cout << "訓練數據請輸入 1, 直接使用訓練模型預測輸入2" << endl;
string flag = "";
while (1) {
cin >> flag;
if (flag == "1" || flag == "2")
break;
else {
cout << "輸入1,2" << endl;
}
}
// 創建分類器並設置參數
Ptr<SVM> SVM_params = SVM::create();
if (flag == "1") {
// 訓練 加載模型
// 讀取訓練樣本的數據
Mat* trainingDataMat = nullptr;
read_Mnist_Images("mnist_dataset/train-images.idx3-ubyte", trainingDataMat);
//訓練樣本的響應值
Mat* responsesMat = nullptr;
read_Mnist_Label("mnist_dataset/train-labels.idx1-ubyte", responsesMat);
////===============================創建SVM模型===============================////
cout << SVM_params->getVarCount() << " " << endl;
SVM_params->setType(SVM::C_SVC); //C_SVC用於分類,C_SVR用於迴歸
SVM_params->setKernel(SVM::RBF); //LINEAR線性核函數。SIGMOID爲高斯核函數
// 註釋掉部分對本項目不影響,影響因子只有兩個
//SVM_params->setDegree(0); //核函數中的參數degree,針對多項式核函數;
SVM_params->setGamma(0.50625); //核函數中的參數gamma,針對多項式/RBF/SIGMOID核函數;
//SVM_params->setCoef0(0); //核函數中的參數,針對多項式/SIGMOID核函數;
SVM_params->setC(12.5); //SVM最優問題參數,設置C-SVC,EPS_SVR和NU_SVR的參數;
//SVM_params->setNu(0); //SVM最優問題參數,設置NU_SVC, ONE_CLASS 和NU_SVR的參數;
//SVM_params->setP(0); //SVM最優問題參數,設置EPS_SVR 中損失函數p的值.
//結束條件,即訓練1000次或者誤差小於0.01結束
SVM_params->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 0.01));
//Mat* responsesTransfer = new Mat(responsesMat->size().height, 1, CV_32FC1);
//responsesMat->convertTo(*responsesMat, CV_32SC1); 類型爲CV_32SC1,此處省略是因爲讀取的時候已指明該格式了。
//trainingDataMat->convertTo(*trainingDataMat, CV_32F); 此處需要注意訓練數據類型爲 CV_32F
//訓練數據和標籤的結合
cout << "開始訓練" << endl;
cout << "訓練數據長度" << trainingDataMat->size().width << " 高度 " << trainingDataMat->size().height << endl;
cout << "標籤數據長度" << responsesMat->size().width << " 高度 " << responsesMat->size().height << endl;
Ptr<TrainData> tData = TrainData::create(*trainingDataMat, ROW_SAMPLE, *responsesMat);
// 訓練分類器
SVM_params->train(tData);//訓練
SVM_params->save("svm.xml");
cout << SVM_params->getVarCount() << " " << endl;
//保存模型
SVM_params->save("svm.xml");
cout << "訓練好了!!!" << endl;
delete trainingDataMat;
delete responsesMat;
trainingDataMat = NULL;
responsesMat = NULL;
}
else if (flag == "2") {
cout << "訓練模型參數加載" << endl;
SVM_params = SVM::load("svm.xml");
//cout << SVM_params.empty() << endl;
}
cout << "-------SVM 開始預測-------------------------------" << endl;
int count = 0; // 統計正確率
Mat* testImage = nullptr;
Mat* testLabel = nullptr;
read_Mnist_Images("mnist_dataSet/t10k-images.idx3-ubyte", testImage);
read_Mnist_Label("mnist_dataSet/t10k-labels.idx1-ubyte", testLabel);
int height = testImage->size().height; // 測試圖片的數量
int width = testImage->size().width; // 圖片的維度
for (int i = 0; i < height; i++) { // 遍歷所有測試圖片
Mat image(1, width, CV_32F); // 單張圖片
for (int j = 0; j < width; j++) { //
image.at<float>(0, j) = testImage->at<float>(i, j);
}
//cout << image.size().height << " " << image.size().width << " " << endl;
//cout << image.cols << " " << image.rows << " " << endl;
//cout << SVM_params->getVarCount() << " " << endl;
if (SVM_params->predict(image) == testLabel[i]) {
count++;
}
}
cout << "訓練預測的準確率爲:" << (double)count / height << endl;
system("pause");
//waitKey(0);
return 0;
}