最近在用C++做手寫體識別，踩了許多坑。。網上使用SVM的教程遇到的都比較坑，看了半天沒怎麼涉及原理，而代碼又比較亂，沒怎麼介紹，害我搞了一下午，所以就很煩，所幸最後終於找到了方法，所以想把這段比較痛苦的經歷記錄下來，造福後人。如果是想從本文弄懂原理的話，那比較抱歉。

說明

實驗環境是：VS2017 + OpenCV3.4.0+win10;

關於配置OpenCV.3.4.0, 整個過程的步驟比較簡單，結合代碼，從讀取數據，訓練和預測三個方面來展開。

讀取數據

先從官網下載好四個數據集，鏈接可以看這裏。

在讀取的時候，我對mnist數據集進行了二值化，將大於0的數據置爲255。

關於讀取步驟不多講，直接上代碼。

傳入文件名，讀取標籤集，將標籤數據轉爲Mat矩陣。讀取數據的格式需要指定爲CV_32SC1。這一步很重要！！其他版本可能不同，但是如果是其他類型的需要convertTo來轉換，不然在訓練數據的時候會報錯。

void read_Mnist_Label(string filename, Mat* &trainLabel)
{
	ifstream file(filename, ios::binary);
	if (file.is_open())
	{
		int magic_number = 0;
		int number_of_images = 0;
		file.read((char*)&magic_number, sizeof(magic_number));
		file.read((char*)&number_of_images, sizeof(number_of_images));
		magic_number = ReverseInt(magic_number);
		number_of_images = ReverseInt(number_of_images);
		cout << "magic number = " << magic_number << endl;
		cout << "number of images = " << number_of_images << endl;

		trainLabel = new Mat(number_of_images, 1, CV_32SC1);

		for (int i = 0; i < number_of_images; i++)
		{
			unsigned char label = 0;
			file.read((char*)&label, sizeof(label));
			if (label > 0) label = 255;
			trainLabel->at<float>(i, 0) = label;
			//cout << "Label: " << labels[i] << endl;
		}

	}
}

傳入文件名，讀取訓練數據和測試數據集，將數據轉爲Mat矩陣。讀取數據的格式需要指定爲CV_32F，浮點型。這一步很重要！！其他版本可能不同，不然的話訓練會報莫名的錯誤。


void read_Mnist_Images(string filename, Mat* &trainImages)
{
	ifstream file(filename, ios::binary);
	if (file.is_open())
	{
		int magic_number = 0;
		int number_of_images = 0;
		int n_rows = 0;
		int n_cols = 0;
		file.read((char*)&magic_number, sizeof(magic_number));
		file.read((char*)&number_of_images, sizeof(number_of_images));
		file.read((char*)&n_rows, sizeof(n_rows));
		file.read((char*)&n_cols, sizeof(n_cols));
		magic_number = ReverseInt(magic_number);
		number_of_images = ReverseInt(number_of_images);
		n_rows = ReverseInt(n_rows);
		n_cols = ReverseInt(n_cols);

		cout << "magic number = " << magic_number << endl;
		cout << "number of images = " << number_of_images << endl;
		cout << "rows = " << n_rows << endl;
		cout << "cols = " << n_cols << endl;

		trainImages = new Mat(number_of_images, n_rows * n_cols, CV_32F);

		for (int i = 0; i < number_of_images; i++)
		{
			for (int r = 0; r < n_rows; r++)
			{
				for (int c = 0; c < n_cols; c++)
				{
					unsigned char image = 0;
					file.read((char*)&image, sizeof(image));
					if (image > 0) image = 255;
					trainImages->at<float>(i, r * n_cols + c) = image;
					//if (i == 9999) cout << "IMAGE: " << i << " " << r * n_cols + c  << " " << images[i][r * n_cols + c ] << endl;
					//cout << images[i][r * n_cols + c] << endl;
				}
			}
		}
	}
}

其中數據需要從小端轉爲大端模式。

int ReverseInt(int i)
{
	unsigned char ch1, ch2, ch3, ch4;
	ch1 = i & 255;
	ch2 = (i >> 8) & 255;
	ch3 = (i >> 16) & 255;
	ch4 = (i >> 24) & 255;
	return((int)ch1 << 24) + ((int)ch2 << 16) + ((int)ch3 << 8) + ch4;
}

使用方法如下：

	// 訓練 加載模型
      // 讀取訓練樣本的數據
    Mat* trainingDataMat = nullptr;
	read_Mnist_Images("mnist_dataset/train-images.idx3-ubyte", trainingDataMat);

        //訓練樣本的響應值  
	Mat* responsesMat = nullptr;
	read_Mnist_Label("mnist_dataset/train-labels.idx1-ubyte", responsesMat);


	Mat* testImage = nullptr;
	Mat* testLabel = nullptr;

	read_Mnist_Images("mnist_dataSet/t10k-images.idx3-ubyte", testImage);
	read_Mnist_Label("mnist_dataSet/t10k-labels.idx1-ubyte", testLabel);

訓練

我是直接使用opencv內置的函數，所以整個過程關於訓練部分代碼量比較少。我CPU爲 i5，大概訓練時間爲2min。

主要有以下幾個步驟。

聲明SVM分類器
設置SVM參數
提取數據後開始訓練
保存訓練結果

創建SVM分類器，然後設置參數。

// 創建分類器並設置參數
Ptr<SVM> SVM_params = SVM::create();

SVM_params->setType(SVM::C_SVC);     //C_SVC用於分類，C_SVR用於迴歸
SVM_params->setKernel(SVM::RBF);   

// 註釋掉部分對本項目不影響，影響因子只有兩個
//SVM_params->setDegree(0);            //核函數中的參數degree,針對多項式核函數;
SVM_params->setGamma(0.50625);       //核函數中的參數gamma,針對多項式/RBF/SIGMOID核函數; 
//SVM_params->setCoef0(0);             //核函數中的參數,針對多項式/SIGMOID核函數；
SVM_params->setC(12.5);                //SVM最優問題參數，設置C-SVC，EPS_SVR和NU_SVR的參數；
//SVM_params->setNu(0);                //SVM最優問題參數，設置NU_SVC， ONE_CLASS 和NU_SVR的參數； 
//SVM_params->setP(0);                 //SVM最優問題參數，設置EPS_SVR 中損失函數p的值. 
//結束條件，即訓練1000次或者誤差小於0.01結束
SVM_params->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 0.01));

兩行代碼完成訓練。

Ptr<TrainData> tData = TrainData::create(*trainingDataMat, ROW_SAMPLE, *responsesMat);
SVM_params->train(tData);//訓練

保存模型

	SVM_params->save("svm.xml");

預測

當訓練過後，可直接加載分類器模型，然後提取數據進行預測。

加載模型

SVM_params = SVM::load("svm.xml");

預測

    int count = 0;   // 統計正確個數
	Mat* testImage = nullptr;
	Mat* testLabel = nullptr;

	read_Mnist_Images("mnist_dataSet/t10k-images.idx3-ubyte", testImage);
	read_Mnist_Label("mnist_dataSet/t10k-labels.idx1-ubyte", testLabel);

	int height = testImage->size().height;  // 測試圖片的數量
	int width = testImage->size().width;    // 圖片的維度

	for (int i = 0; i < height; i++) { // 遍歷所有測試圖片
		Mat image(1, width, CV_32F);  // 單張圖片
		for (int j = 0; j < width; j++) { //
			image.at<float>(0, j) = testImage->at<float>(i, j);
		}
		//cout << image.size().height << " " << image.size().width << " " << endl;
		//cout << image.cols << " " << image.rows << " " << endl;
		//cout << SVM_params->getVarCount() << " " << endl;
		if (SVM_params->predict(image)) {
			count++;
		}

	}
	cout << "訓練預測的準確率爲:" << (double)count / height << endl;
	system("pause");

完整源碼

#include <stdio.h>  
#include <time.h>  
#include <opencv2/opencv.hpp>  
#include <opencv/cv.h>  
#include <iostream> 
#include <opencv2/core/core.hpp>  
#include <opencv2/highgui/highgui.hpp>  
#include <opencv2/ml/ml.hpp>  



using namespace std;
using namespace cv;
using namespace ml;

Mat dealimage;
Mat src;
Mat yangben_gray;
Mat yangben_thresh;

int ReverseInt(int i)
{
	unsigned char ch1, ch2, ch3, ch4;
	ch1 = i & 255;
	ch2 = (i >> 8) & 255;
	ch3 = (i >> 16) & 255;
	ch4 = (i >> 24) & 255;
	return((int)ch1 << 24) + ((int)ch2 << 16) + ((int)ch3 << 8) + ch4;
}

void read_Mnist_Label(string filename, Mat* &trainLabel)
{
	ifstream file(filename, ios::binary);
	if (file.is_open())
	{
		int magic_number = 0;
		int number_of_images = 0;
		file.read((char*)&magic_number, sizeof(magic_number));
		file.read((char*)&number_of_images, sizeof(number_of_images));
		magic_number = ReverseInt(magic_number);
		number_of_images = ReverseInt(number_of_images);
		cout << "magic number = " << magic_number << endl;
		cout << "number of images = " << number_of_images << endl;

		trainLabel = new Mat(number_of_images, 1, CV_32SC1);

		for (int i = 0; i < number_of_images; i++)
		{
			unsigned char label = 0;
			file.read((char*)&label, sizeof(label));
			if (label > 0) label = 255;
			trainLabel->at<float>(i, 0) = label;
			//cout << "Label: " << labels[i] << endl;
		}

	}
}

void read_Mnist_Images(string filename, Mat* &trainImages)
{
	ifstream file(filename, ios::binary);
	if (file.is_open())
	{
		int magic_number = 0;
		int number_of_images = 0;
		int n_rows = 0;
		int n_cols = 0;
		file.read((char*)&magic_number, sizeof(magic_number));
		file.read((char*)&number_of_images, sizeof(number_of_images));
		file.read((char*)&n_rows, sizeof(n_rows));
		file.read((char*)&n_cols, sizeof(n_cols));
		magic_number = ReverseInt(magic_number);
		number_of_images = ReverseInt(number_of_images);
		n_rows = ReverseInt(n_rows);
		n_cols = ReverseInt(n_cols);

		cout << "magic number = " << magic_number << endl;
		cout << "number of images = " << number_of_images << endl;
		cout << "rows = " << n_rows << endl;
		cout << "cols = " << n_cols << endl;

		trainImages = new Mat(number_of_images, n_rows * n_cols, CV_32F);

		for (int i = 0; i < number_of_images; i++)
		{
			for (int r = 0; r < n_rows; r++)
			{
				for (int c = 0; c < n_cols; c++)
				{
					unsigned char image = 0;
					file.read((char*)&image, sizeof(image));
					if (image > 0) image = 255;
					trainImages->at<float>(i, r * n_cols + c) = image;
					//if (i == 9999) cout << "IMAGE: " << i << " " << r * n_cols + c  << " " << images[i][r * n_cols + c ] << endl;
					//cout << images[i][r * n_cols + c] << endl;
				}
			}
		}
	}
}


int main()
{
	cout << "訓練數據請輸入 1, 直接使用訓練模型預測輸入2" << endl;
	string flag = "";

	while (1) {
		cin >> flag;
		if (flag == "1" || flag == "2")
			break;
		else {
			cout << "輸入1，2" << endl;
		}
	}

	// 創建分類器並設置參數
	Ptr<SVM> SVM_params = SVM::create();

	if (flag == "1") {
		// 訓練 加載模型
		// 讀取訓練樣本的數據
		Mat* trainingDataMat = nullptr;
		read_Mnist_Images("mnist_dataset/train-images.idx3-ubyte", trainingDataMat);

		//訓練樣本的響應值  
		Mat* responsesMat = nullptr;
		read_Mnist_Label("mnist_dataset/train-labels.idx1-ubyte", responsesMat);

		////===============================創建SVM模型===============================////
		cout << SVM_params->getVarCount() << " " << endl;
		SVM_params->setType(SVM::C_SVC);     //C_SVC用於分類，C_SVR用於迴歸
		SVM_params->setKernel(SVM::RBF);  //LINEAR線性核函數。SIGMOID爲高斯核函數

		// 註釋掉部分對本項目不影響，影響因子只有兩個
		//SVM_params->setDegree(0);            //核函數中的參數degree,針對多項式核函數;
		SVM_params->setGamma(0.50625);       //核函數中的參數gamma,針對多項式/RBF/SIGMOID核函數; 
		//SVM_params->setCoef0(0);             //核函數中的參數,針對多項式/SIGMOID核函數；
		SVM_params->setC(12.5);              //SVM最優問題參數，設置C-SVC，EPS_SVR和NU_SVR的參數；
		//SVM_params->setNu(0);                //SVM最優問題參數，設置NU_SVC， ONE_CLASS 和NU_SVR的參數； 
		//SVM_params->setP(0);                 //SVM最優問題參數，設置EPS_SVR 中損失函數p的值. 
		//結束條件，即訓練1000次或者誤差小於0.01結束
		SVM_params->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 0.01));
		//Mat* responsesTransfer = new Mat(responsesMat->size().height, 1, CV_32FC1);

		//responsesMat->convertTo(*responsesMat, CV_32SC1);      類型爲CV_32SC1，此處省略是因爲讀取的時候已指明該格式了。
		//trainingDataMat->convertTo(*trainingDataMat, CV_32F);  此處需要注意訓練數據類型爲 CV_32F

		//訓練數據和標籤的結合
		cout << "開始訓練" << endl;
		cout << "訓練數據長度" << trainingDataMat->size().width << " 高度 " << trainingDataMat->size().height << endl;
		cout << "標籤數據長度" << responsesMat->size().width << " 高度 " << responsesMat->size().height << endl;

		Ptr<TrainData> tData = TrainData::create(*trainingDataMat, ROW_SAMPLE, *responsesMat);

		// 訓練分類器
		SVM_params->train(tData);//訓練
		SVM_params->save("svm.xml");
		cout << SVM_params->getVarCount() << " " << endl;

		//保存模型
		SVM_params->save("svm.xml");
		cout << "訓練好了！！！" << endl;
		delete trainingDataMat;
		delete responsesMat;
		trainingDataMat = NULL;
		responsesMat = NULL;
	}
	else if (flag == "2") {
		cout << "訓練模型參數加載" << endl;
		SVM_params = SVM::load("svm.xml");
		//cout << SVM_params.empty() << endl;
	}
	


	cout << "-------SVM 開始預測-------------------------------" << endl;

	int count = 0;   // 統計正確率
	Mat* testImage = nullptr;
	Mat* testLabel = nullptr;

	read_Mnist_Images("mnist_dataSet/t10k-images.idx3-ubyte", testImage);
	read_Mnist_Label("mnist_dataSet/t10k-labels.idx1-ubyte", testLabel);

	int height = testImage->size().height;  // 測試圖片的數量
	int width = testImage->size().width;    // 圖片的維度

	for (int i = 0; i < height; i++) { // 遍歷所有測試圖片
		Mat image(1, width, CV_32F);  // 單張圖片
		for (int j = 0; j < width; j++) { //
			image.at<float>(0, j) = testImage->at<float>(i, j);
		}
		//cout << image.size().height << " " << image.size().width << " " << endl;
		//cout << image.cols << " " << image.rows << " " << endl;
		//cout << SVM_params->getVarCount() << " " << endl;
		if (SVM_params->predict(image) == testLabel[i]) {
			count++;
		}

	}
	cout << "訓練預測的準確率爲:" << (double)count / height << endl;
	system("pause");
	//waitKey(0);
	return 0;
}

使用OpenCV3.4的SVM實現mnist手寫體的訓練和預測

說明

讀取數據

訓練

預測

完整源碼

爲什麼要⽤ Foundry

【筆記】動手學深度學習-預備知識

py發送email

MySQL 分庫分表方案，總結太全了。。

Qt/C++音視頻開發71-指定mjpeg/h264格式採集本地攝像頭/存儲文件到mp4/設備推流/採集推流

WPF開源輕便、快速的桌面啓動器

公司來了個新同事，把 DDD 運用得爐火純青！

經典排序算法C++全實現：插入、選擇、冒泡、快排、歸併、基數，堆排、希爾...

TCP傳輸中的“三次握手”建立連接和“四次握手”釋放連接過程

使用OpenCV3.4的SVM實現mnist手寫體的訓練和預測

pytorch 學習 | 使用pytorch動手實現LSTM模塊

pytorch學習筆記 | Focal loss的原理與pytorch實現

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結