目錄
0.參考博客
[1]寫好了train和detect部分的代碼(python和C++):
https://blog.csdn.net/hongbin_xu/article/details/79845290
[2]利用OpenCV裏面提供的HOG和SVM工具做的行人檢測(沒有訓練部分):
https://blog.csdn.net/lindamtd/article/details/80693720
[3]從原理上講解了HOG的生成原理,主要是要理解HOG的特徵向量的維度的計算方法:
https://blog.csdn.net/chaipp0607/article/details/70888899
[4]對detectMultiScale函數多尺度行人檢測的解析:
http://blog.sina.com.cn/s/blog_844b767a0102wq9q.html
[5]OpenCV的cv::ml::SVM模塊的參數優化思路:
https://blog.csdn.net/justin_kang/article/details/79015601
[6]對於HOG的深刻理解,瞭解HOG的本質:
https://www.cnblogs.com/zhazhiqiang/p/3595266.html
https://www.cnblogs.com/wjgaas/p/3597248.html
https://www.leiphone.com/news/201708/ZKsGd2JRKr766wEd.html
[7]80行python代碼實現HOG,非常值得參考:
https://blog.csdn.net/ppp8300885/article/details/71078555
[8]理解PSO算法,利用MatLab實現PSO算法,主要是抓住更新位置和更新速度的方法:
https://blog.csdn.net/weixin_40679412/article/details/80571854
1. 手寫HOG&HOI特徵提取器
Histogram of Oriented Gradient,簡稱HOG,顧名思義就是,方向梯度直方圖。
Histogram of Intensity,簡稱HOI,也就是習慣說的,灰度直方圖,在這裏你可以叫它,亮度直方圖。
其實,這兩個的本質都是:統計圖像局部區域的梯度方向信息或者亮度信息來作爲該局部圖像區域的表徵
HOG:局部歸一化的梯度方向直方圖,是一種對圖像局部重疊區域的密集型描述符, 它通過計算局部區域的梯度方向直方圖來構成特徵
HOI:看完相關的HOI論文後,瞭解HOI也就是在HOG的框架上將局部區域的特徵提取方法,換爲亮度直方圖,nothing new!
那麼我們首先實現HOG的提取方法,然後將中間局部區域的特徵提取方法替換爲亮度直方圖,就可以得到hog和hoi了!
對於HOG特徵向量的提取,我們參考博文[7]的python實現方法,用C++復現和封裝!
1.1. 梯度反向直方圖
中間: 一個網格用箭頭表示梯度 右邊: 這個網格用數字表示的梯度
爲這些8*8的網格創建直方圖,直方圖包含了9個bin來對應0,20,40,...160這些角度。
下面這張圖解釋了這個過程。我們用了上一張圖裏面的那個網格的梯度幅值和方向。根據方向選擇用哪個bin, 根據幅值來確定這個bin的大小。先來看藍色圓圈圈出來的像素點,它的角度是80,幅值是2,所以它在第五個bin裏面加了2,再來看紅色的圈圓圈圈出來的像素點,它的角度是10,幅值是4,因爲角度10介於0-20度的中間(正好一半),所以把幅值一分爲二地放到0和20兩個bin裏面去。
1.2. HOG特徵提取算法的整個實現過程
首先定義HOG.h頭文件
#pragma once
#ifndef _HOG_H
#define _HOG_H
#include<opencv2\opencv.hpp>
#include<iostream>
#include<math.h>
#define PI 3.14159
//int bin_size;//角度分區,最小單元特徵向量維度數
//int cell_size;//圖像分塊,行像素點數量
//int scaleBlock;//行cell數量
//int stride;//block,滑動步長,單位是cell_size,即滑動一步的步長爲一個cell_size
//MYHOGIDescriptor(int bin_size, int cell_size, int saleBlock, int stride);
class MYHOGDescriptor
{
public:
MYHOGDescriptor(int bin_size, int cell_size, int saleBlock, int stride);//構建函數
~MYHOGDescriptor();//析構函數
std::vector<std::vector<double>> compute(cv::Mat &img);//計算特徵向量
cv::Mat getHOGpic();//得到HOG特徵向量的反向映射圖
private:
std::vector<double> cal_cell_gradient(cv::Mat &cell_magnitude, cv::Mat &cell_angle);//計算單個cell的方向梯度直方圖
double findMax3D(std::vector<std::vector<std::vector<double>>> vec);//尋找向量中的最大值
void normalizeVector(std::vector<double> &vec);//向量的歸一化
private:
int _height;//圖像的行數
int _width;//圖像的列數
int _angle_unit;//angle_unit=360 / bin_size
int _he;//he=int(height/cell_size)
int _wi;//wi=int(width/cell_size)
int _block_size;//行像素點數量,block_size=scaleBlock*cell_size,多餘變量
std::vector<std::vector<std::vector<double> > > _cell_gradient_vector;//用來裝載每個cell中未歸一化的方向梯度向量
int _bin_size;//角度分區,最小單元特徵向量維度數
int _cell_size;//圖像分塊,行像素點數量
int _scaleBlock;//行cell數量
int _stride;//block,滑動步長,單位是cell_size,即滑動一步的步長爲一個cell_size
};
#endif // !_HOG_H
接着是HOG.cpp
#include"HOG.h"
using namespace cv;
using namespace std;
MYHOGDescriptor::MYHOGDescriptor(int bin_size, int cell_size, int scaleBlock, int stride)
{
_bin_size = bin_size;
_cell_size = cell_size;
_scaleBlock = scaleBlock;
_stride = stride;
_angle_unit = 360 / _bin_size; //360 / 8 = 45
_block_size = _scaleBlock*_cell_size;
};
MYHOGDescriptor::~MYHOGDescriptor()
{
};
std::vector<std::vector<double>> MYHOGDescriptor::compute(cv::Mat &img)
{
_height = img.rows;//圖像的高
_width = img.cols;//圖像的寬
_he = _height / _cell_size;//縱向cell數
_wi = _width / _cell_size;//橫向cell數
std::vector<std::vector<std::vector<double> > > tmp_cell_gradient_vector(_he, std::vector<std::vector<double> >(_wi, std::vector<double>(_bin_size)));
_cell_gradient_vector.swap(tmp_cell_gradient_vector);//用vector中的swap方法,初始化_cell_gradient_vector
Mat MatTemp;
cvtColor(img, MatTemp, CV_BGR2GRAY);//cvtColor將圖像轉爲灰度圖像
//求sobel梯度
int scale = 1;
int delta = 0;
Mat gradient_values_x = Mat::zeros(MatTemp.size(), CV_64FC1);
Mat gradient_values_y = Mat::zeros(MatTemp.size(), CV_64FC1);
Sobel(MatTemp, gradient_values_x, CV_64FC1, 1, 0, 5, scale, delta, BORDER_DEFAULT);//cv::Sobel對得圖像x軸方向、y軸方向的偏導圖
Sobel(MatTemp, gradient_values_y, CV_64FC1, 0, 1, 5, scale, delta, BORDER_DEFAULT);
//cout << "gradient_values_x --> "<< gradient_values_x << endl;
//cout << "gradient_values_y --> " << gradient_values_y << endl;
//計算梯度的幅值和角度
Mat gradient_magnitude = Mat::zeros(MatTemp.size(), CV_64FC1);
Mat gradient_angle = Mat::zeros(MatTemp.size(), CV_64FC1);
addWeighted(gradient_values_x, 0.5, gradient_values_y, 0.5, 0, gradient_magnitude);//cv::addWeighted將兩個偏導圖合併爲梯度幅值圖
gradient_magnitude = abs(gradient_magnitude);
phase(gradient_values_x, gradient_values_y, gradient_angle, true);//cv::phase將兩個偏導圖合併爲梯度方向圖
//cout << "gradient_magnitude.shape --> " << gradient_magnitude.size() << "gradient_angle.shape --> " << gradient_angle.size() << endl;
//cout << "gradient_magnitude --> "<< gradient_magnitude << endl;
//cout << "gradient_angle --> " << gradient_angle << endl;
//cout << "img.shape --> "<<img.size()<< endl;
//cout << "cell_gradient_vector --> "<< endl;
////輸出三維數組
//for (int x = 0; x < he; x++)
//{
// for (int y = 0; y < he; y++)
// {
// for (int k = 0; k < bin_size; k++)
// {
// cout << setw(5) << cell_gradient_vector[x][y][k] << " ";
// }
// cout << endl;
// }
// cout << endl;
//}
//cout << "cell_gradient_vector.size() --> " << cell_gradient_vector.size() << endl;
//cout << "he --> " << he << endl;
//cout << "wi --> " << wi << endl;
//cout << "cell_gradient_vector.shape[0] --> " << cell_gradient_vector.size() << endl;//16
//cout << "cell_gradient_vector.shape[1] --> " << cell_gradient_vector[0].size() << endl;//21
//cout << "cell_gradient_vector.shape[2] --> " << cell_gradient_vector[0][0].size() << endl;//8
for (int i = 0; i < _cell_gradient_vector.size(); i++)//16
{
for (int j = 0; j < _cell_gradient_vector[0].size(); j++)//21
{
//依次取出各個cell對應的幅值圖和方向圖,分別爲cell_magnitude和cell_angle
Mat cell_magnitude = gradient_magnitude(Range(i * _cell_size, (i + 1) * _cell_size), Range(j * _cell_size, (j + 1) * _cell_size));
Mat cell_angle = gradient_angle(Range(i * _cell_size, (i + 1) * _cell_size), Range(j * _cell_size, (j + 1) * _cell_size));
//cout << "cell_magnitude --> " << format(cell_magnitude, Formatter::FMT_PYTHON) << endl;
//cout << "cell_angle --> " << format(cell_angle, Formatter::FMT_PYTHON) << endl;
//double minv = 0.0, maxv = 0.0;
//double* minp = &minv;
//double* maxp = &maxv;
//minMaxIdx(cell_angle, minp, maxp);
//cout << maxv << endl;
_cell_gradient_vector[i][j] = cal_cell_gradient(cell_magnitude, cell_angle);//將cell對應的幅值圖和方向圖輸入cal_cell_gradient,計算cell的方向梯度直方圖
}
}
vector<vector<double>> hog_vector;
//按照之前定義的bolck包含的cell數量,以及block的滑行步長stride,stride爲cell的整數倍
int xconfig = _cell_gradient_vector.size() - (_scaleBlock - 2) - _stride;
int yconfig = _cell_gradient_vector[0].size() - (_scaleBlock - 2) - _stride;
for (int i = 0; i < xconfig; i = i + _stride)
{
for (int j = 0; j < yconfig; j = j + _stride)
{
vector<double> block_vector;
for (int z = 0; z < _scaleBlock; z++)
{
for (int k = 0; k < _scaleBlock; k++)
{
//用block_vector裝載單個block的全部cell中的方向梯度直方圖
block_vector.insert(block_vector.end(), _cell_gradient_vector[i + z][j + k].begin(), _cell_gradient_vector[i + z][j + k].end());
}
}
normalizeVector(block_vector);//normalizeVector對一個bolck中的所有方向梯度直方圖做歸一化
hog_vector.push_back(block_vector);//推入hog_vector中,單個block計算完畢!
}
}
//cout << "[ " << hog_vector.size() << "," << hog_vector[0].size() << " ]" << endl;
return hog_vector;
}
//將cell對應的幅值圖和方向圖輸入cal_cell_gradient,計算cell的方向梯度直方圖
vector<double> MYHOGDescriptor::cal_cell_gradient(Mat &cell_magnitude, Mat &cell_angle)
{
vector<double> orientation_centers = vector<double>(_bin_size);
for (int i = 0; i < cell_magnitude.rows; i++)
{
for (int j = 0; j < cell_magnitude.cols; j++)
{
//gradient_angle中的角度在0~360之間
double gradient_strength = cell_magnitude.at<double>(i, j);
double gradient_angle = cell_angle.at<double>(i, j);//80
int min_angle = int(gradient_angle / _angle_unit) % _bin_size;//(80/45)%8=1
int max_angle = (min_angle + 1) % _bin_size;//(1+1)%8=2//這裏的最大角度是指角度可能的最大角度,一般就是max_angle=min_angle+45,有增強邊緣特徵的作用
int mod = int(gradient_angle) % _angle_unit; //80 % 45=35
orientation_centers[min_angle] += (gradient_strength * (1 - (double(mod) / double(_angle_unit))));
orientation_centers[max_angle] += (gradient_strength * (double(mod) / double(_angle_unit)));
}
}
//cout << "orientation_centers --> " << endl;
//for (int i = 0; i < orientation_centers.size(); i++)
//{
// cout << orientation_centers[i] << ",";
//}
//cout << endl;
//cout << "orientation_centers.size() --> " << orientation_centers.size() << endl;
return orientation_centers;
}
Mat MYHOGDescriptor::getHOGpic()
{
Mat hog_image = Mat::zeros(Size(_width,_height), CV_8U);
vector<vector<vector<double> > > cell_gradient(_cell_gradient_vector);
int cell_width = _cell_size / 2;
double max_mag = findMax3D(cell_gradient);
for (int x = 0; x < cell_gradient.size(); x++)
{
for (int y = 0; y < cell_gradient[0].size(); y++)
{
vector<double> cell_grad = vector<double>(_bin_size);
double angle = 0.0;
int angle_gap = _angle_unit;
for (int k = 0; k < cell_grad.size(); k++)
{
cell_grad[k] = cell_gradient[x][y][k] / max_mag;
double magnitude = cell_grad[k];
double angle_radian = angle*PI / 180;
int x1 = int(x * _cell_size + magnitude * cell_width * cos(angle_radian));//計算餘弦值
int y1 = int(y * _cell_size + magnitude * cell_width * sin(angle_radian));
int x2 = int(x * _cell_size - magnitude * cell_width * cos(angle_radian));
int y2 = int(y * _cell_size - magnitude * cell_width * sin(angle_radian));
line(hog_image, Point(y1, x1), Point(y2, x2), Scalar(int(255 * sqrt(magnitude))));
angle += angle_gap;
}
}
}
return hog_image;
}
double MYHOGDescriptor::findMax3D(vector<vector<vector<double> > > vec)
{
double max = -999;
for (int i = 0; i < vec.size(); i++)
{
for (int j = 0; j < vec[0].size(); j++)
{
for (int k = 0; k < vec[0][0].size(); k++)
{
if (max < vec[i][j][k])
{
max = vec[i][j][k];
}
}
}
}
return max;
}
void MYHOGDescriptor::normalizeVector(vector<double> &vec)
{
//採用Min-Max Normalization,簡單縮放
double ymax = 1;
float ymin = 0;
float dMaxValue = *max_element(vec.begin(), vec.end()); //求最大值
float dMinValue = *min_element(vec.begin(), vec.end()); //求最小值
for (int f = 0; f < vec.size(); ++f)
{
vec[f] = (ymax - ymin)*(vec[f] - dMinValue) / (dMaxValue - dMinValue + 1e-8) + ymin;
}
}
當個block中的所有cell方向梯度直方圖做歸一化的時候也可以嘗試用其他的歸一化方法,效果會有所不同
這裏我只是用了最簡單的Min-Max Normalization,簡單縮放至[0,1]之間!也可以嘗試用Z-score規範化(標準差標準化 / 零均值標準化),只是這樣的話,特徵向量的值中會有正有負!
void MYHOGDescriptor::normalizeVector(vector<double> &vec)
{
////採用Min-Max Normalization,簡單縮放
//double ymax = 1;
//float ymin = 0;
//float dMaxValue = *max_element(vec.begin(), vec.end()); //求最大值
//float dMinValue = *min_element(vec.begin(), vec.end()); //求最小值
//for (int f = 0; f < vec.size(); ++f)
//{
// vec[f] = (ymax - ymin)*(vec[f] - dMinValue) / (dMaxValue - dMinValue + 1e-8) + ymin;
//}
//採用Z-score規範化(標準差標準化 / 零均值標準化)
double sum = 0;
for (int i = 0; i < vec.size(); i++)
{
sum += vec[i];
}
double mean = sum/ vec.size();
double var = 0;
for (int i = 0; i < vec.size(); i++)
{
var += (vec[i] - mean)*(vec[i] - mean);
}
var = var / (vec.size()-1);
double std = sqrt(var);
for (int i = 0; i < vec.size(); i++)
{
vec[i] = (vec[i] - mean) / std;
}
}
接着是,調用HOG.h中MYHOGDescriptor類的實例
#include"HOG.h"
using namespace cv;
using namespace std;
int main()
{
Mat img = imread("203.png");
////創建HOG特徵提取器,設置HOG特徵提取器參數
int bin_size=8;//角度分區,最小單元特徵向量維度數
int cell_size=8;//圖像分塊,行像素點數量
int scaleBlock=2;//行cell數量
int stride=1;//block,滑動步長,單位是cell_size,即滑動一步的步長爲一個cell_size
MYHOGDescriptor hogi(bin_size, cell_size, scaleBlock, stride);
//計算圖片對應的HOG特徵
vector<vector<double>> hog_vector = hogi.compute(img);
//得到HOG映射的特徵圖
Mat hog_image = hogi.getHOGpic();
resize(hog_image, hog_image, Size(0, 0), 3, 3);
imshow("hog_image", hog_image);
resize(img, img, Size(0, 0), 3, 3);
imshow("img", img);
waitKey(0);
return 0;
}
2. cv::ml::SVM模塊使用
機器學習裏面一個非常經典的分類器,支持向量機Support Vector Machine,簡稱SVM
OpenCV裏面有做好的cv::ml::SVM模塊,前面做好了HOG特徵描述子,這裏直接調用就可以了
cv::ml::SVM模塊的使用可以參考博文[1]和[2],寫的非常好!這裏我就直接上代碼了!
首先是SVM.h
#pragma once
#ifndef _SVM_H
#define _SVM_H
#include"HOG.h"
#include <fstream>
#include <stdio.h>
#include<string>
typedef enum { PNG, MP4, TXT } InputFileType;
class HOGSVM
{
public:
HOGSVM(std::string cpatch,std::string depatch,FeatureType ft);
~HOGSVM();
//按照txt文件中的圖片,讀入圖像的路徑,訓練圖片保存在TrainImagePath和TrainImageClass中,
//測試圖片保存在TestImagePath和TestImageClass中
//pos爲類別1,neg爲類別-1
void dataPrepare();
void detectPIC(std::string filename);//測試圖片函數
//這裏寫好其他測試接口,
//方便後面添加對其他測試文件的測試方法,比如mp4文件等
void demo(std::string filename, InputFileType type);
//將TrainImagePath和TrainImageClass中的圖片讀出,並按照FeatureType的方法提取出特徵向量
//從而構建訓練數據集
void DataOfTrain(int bin_size, int cell_size, int scaleBlock, int stride);
//將TestImagePath和TestImageClass中的圖片讀出,並按照FeatureType的方法提取出特徵向量
//從而構建測試數據集
void DataOfTest(int bin_size, int cell_size, int scaleBlock, int stride);
//訓練分類器
void TrainClassifier(int bin_size, int cell_size, int scaleBlock, int stride, int kerneltype, int maxCount);
//測試分類器
float TestClassifier(int bin_size, int cell_size, int scaleBlock, int stride, int kerneltype, int maxCount);
private:
std::ifstream trainingData;
std::ifstream testingData;
std::vector<std::string> TrainImagePath;
std::vector<int> TrainImageClass;
std::vector<std::string> TestImagePath;
std::vector<int> TestImageClass;
cv::Mat featureVectorOfTrain;
cv::Mat classOfTrain;
cv::Mat featureVectorOfTest;
cv::Mat classOfTest;
int lenOfHogFeature;//特徵向量的長度
int numOfTrainData = 0;//訓練集圖片總數
int numOfTestData = 0;//測試集圖片總數
std::string CLASSFILEPATH;//分類數據的實際存放路徑
std::string DETECTFILEPATH;//檢測數據的實際存放路徑
FeatureType MYHOG;//特徵提取器的提取方式,定義在HOG.h中,HOG, HOI, HOGI,OPENCV
};
#endif // !_SVM_H
SVM.cpp
#include"SVM.h"
using namespace cv;
using namespace std;
HOGSVM::HOGSVM(std::string cpatch, std::string depatch, FeatureType ft)
{
CLASSFILEPATH= cpatch;
DETECTFILEPATH= depatch;
MYHOG= ft;
}
HOGSVM::~HOGSVM()
{
}
void HOGSVM::dataPrepare()
{
double dur;
clock_t start, end;
start = clock();
string buffer;
trainingData.open(string(CLASSFILEPATH) + "Train/pos.txt", ios::in);//ios::in 表示以只讀的方式讀取文件
if (trainingData.fail())//文件打開失敗:返回0
{
cout << "open fail!" << endl;
}
while (getline(trainingData, buffer, '\n'))
{
//cout << buffer << endl;
numOfTrainData++;
TrainImageClass.push_back(1);
TrainImagePath.push_back(string(CLASSFILEPATH) + buffer);
}
trainingData.close();
trainingData.open(string(CLASSFILEPATH) + "Train/neg.txt", ios::in);//ios::in 表示以只讀的方式讀取文件
if (trainingData.fail())//文件打開失敗:返回0
{
cout << "open fail!" << endl;
}
while (getline(trainingData, buffer, '\n'))
{
//cout << buffer << endl;
numOfTrainData++;
TrainImageClass.push_back(-1);
TrainImagePath.push_back(string(CLASSFILEPATH) + buffer);
}
trainingData.close();
cout << "訓練數據讀入完成!" << endl;
testingData.open(string(CLASSFILEPATH) + "Test/pos.txt", ios::in);//ios::in 表示以只讀的方式讀取文件
if (testingData.fail())//文件打開失敗:返回0
{
cout << "open fail!" << endl;
}
while (getline(testingData, buffer, '\n'))
{
//cout << buffer << endl;
numOfTestData++;
TestImageClass.push_back(1);
TestImagePath.push_back(string(CLASSFILEPATH) + buffer);
}
testingData.close();
testingData.open(string(CLASSFILEPATH) + "Test/neg.txt", ios::in);//ios::in 表示以只讀的方式讀取文件
if (testingData.fail())//文件打開失敗:返回0
{
cout << "open fail!" << endl;
}
while (getline(testingData, buffer, '\n'))
{
//cout << buffer << endl;
numOfTestData++;
TestImageClass.push_back(-1);
TestImagePath.push_back(string(CLASSFILEPATH) + buffer);
}
testingData.close();
cout << "測試數據讀入完成!" << endl;
end = clock();
dur = (double)(end - start);
printf("dataPrepare 函數 Use Time: %f\n", (dur / CLOCKS_PER_SEC));
}
//int bin_size = 8;//角度分區,最小單元特徵向量維度數
//int cell_size = 8;//圖像分塊,行像素點數量
//int scaleBlock = 2;//行cell數量
//int stride = 1;//block,滑動步長,單位是cell_size,即滑動一步的步長爲一個cell_size
void HOGSVM::DataOfTrain(int bin_size, int cell_size, int scaleBlock, int stride)
{
if (MYHOG != OPENCV)
{
cout << "採用自己的HOGI特徵提取器!" << endl;
}
else
{
cout << "採用OpenCV的HOG特徵提取器!" << endl;
}
double dur;
clock_t start, end;
start = clock();
////////////////////////////////獲取樣本的HOG特徵///////////////////////////////////////////////////
cout << "正在生成訓練數據集..........." << endl;
int height = 64;//134//64//128
int width = 32;//169//32//64
int he = height / cell_size;//8
int wi = width / cell_size;//4
int blocksize = scaleBlock*cell_size;
int xconfig = he - (scaleBlock - 2) - stride;
int yconfig = wi - (scaleBlock - 2) - stride;
int zconfig = pow(scaleBlock, 2)*bin_size;
cout << "HOGI特徵尺寸 --> " << "[ " << xconfig << " x " << yconfig << " x " << zconfig << " ]" << endl;
lenOfHogFeature = xconfig*yconfig*zconfig;
//樣本特徵向量矩陣
int numOfSample = numOfTrainData;
featureVectorOfTrain = Mat::zeros(numOfSample, lenOfHogFeature, CV_32FC1);
//樣本的類別
classOfTrain = Mat::zeros(numOfSample, 1, CV_32SC1);
cv::Mat convertedImg;
cv::Mat trainImg;
for (vector<string>::size_type i = 0; i < TrainImagePath.size(); i++)
{
//cout << "Processing: " << TrainImagePath[i] << endl;
cv::Mat trainImg = cv::imread(TrainImagePath[i]);
if (trainImg.empty())
{
cout << "can not load the image:" << TrainImagePath[i] << endl;
continue;
}
//cv::resize(src, trainImg, cv::Size(64, 128));
//提取HOG特徵
vector<float> descriptors;
if (MYHOG != OPENCV)
{
MYHOGIDescriptor hogi(bin_size, cell_size, scaleBlock, stride);
vector<vector<double>> hogi_vector = hogi.compute(trainImg, MYHOG);
//cout << "[ " << hogi_vector.size() << "," << hogi_vector[0].size() << " ]" << endl;//[ 105,32 ]
for (int i = 0; i < hogi_vector.size(); i++)
{
vector<double> vector = hogi_vector[i];
descriptors.insert(descriptors.end(), vector.begin(), vector.end());
}
//cout << "hog feature vector: --> " << descriptors.size() << endl;
}
else
{
cv::HOGDescriptor hog(cv::Size(width, height), cv::Size(blocksize, blocksize), cv::Size(stride*cell_size, stride*cell_size), cv::Size(cell_size, cell_size), bin_size);
//cv::HOGDescriptor hog(cv::Size(64, 128), cv::Size(16, 16), cv::Size(8, 8), cv::Size(8, 8), 8);
hog.compute(trainImg, descriptors);
}
//ofstream outfile;
//outfile.open("HOGDATA.txt");
//outfile << setiosflags(ios::fixed) << setprecision(6) << setiosflags(ios::left) << endl;
//for (int j = 0; j <21 ; j++)
//{
// for (i = 0; i < 32; i++)
// {
// outfile << descriptors[j*32+i] << " ";
// }
// outfile <<"\n" ;
//}
//outfile.close();
for (vector<float>::size_type j = 0; j < descriptors.size(); j++)
{
featureVectorOfTrain.at<float>(i, j) = descriptors[j];
}
classOfTrain.at<int>(i, 0) = TrainImageClass[i];
}
cout << "size of featureVectorOfTrain: " << featureVectorOfTrain.size() << endl;
cout << "size of classOfTrain: " << classOfTrain.size() << endl;
cout << "訓練數據集生成完畢!" << endl;
//vector<string>().swap(TrainImagePath);
//vector<int>().swap(TrainImageClass);
end = clock();
dur = (double)(end - start);
printf("DataOfTrain 函數 Use Time: %f\n", (dur / CLOCKS_PER_SEC));
}
//int bin_size = 8;//角度分區,最小單元特徵向量維度數
//int cell_size = 8;//圖像分塊,行像素點數量
//int scaleBlock = 2;//行cell數量
//int stride = 1;//block,滑動步長,單位是cell_size,即滑動一步的步長爲一個cell_size
void HOGSVM::DataOfTest(int bin_size, int cell_size, int scaleBlock, int stride)
{
double dur;
clock_t start, end;
start = clock();
////////////////////////////////獲取樣本的HOG特徵///////////////////////////////////////////////////
cout << "正在生成測試數據集..........." << endl;
//創建HOG特徵提取器,設置HOG特徵提取器參數
//樣本的類別
int numOfSample = numOfTestData;
classOfTest = Mat::zeros(numOfSample, 1, CV_32SC1);
cv::Mat convertedImg;
cv::Mat testImg;
int height = 64;//134//64//128
int width = 32;//169//32//64
int he = height / cell_size;
int wi = width / cell_size;
int blocksize = scaleBlock*cell_size;
int xconfig = he - (scaleBlock - 2) - stride;
int yconfig = wi - (scaleBlock - 2) - stride;
int zconfig = pow(scaleBlock, 2)*bin_size;
//cout << xconfig << " " << yconfig << " " << zconfig << endl;
//樣本的數據
lenOfHogFeature = xconfig*yconfig*zconfig;
featureVectorOfTest = Mat::zeros(numOfSample, lenOfHogFeature, CV_32FC1);
for (vector<string>::size_type i = 0; i < TestImagePath.size(); i++)
{
//cout << "Processing: " << TrainImagePath[i] << endl;
cv::Mat testImg = cv::imread(TestImagePath[i]);
if (testImg.empty())
{
cout << "can not load the image:" << TestImagePath[i] << endl;
continue;
}
//cv::resize(src, testImg, cv::Size(64, 128));
vector<float> descriptors;
if (MYHOG != OPENCV)
{
//樣本特徵向量矩陣
MYHOGIDescriptor hogi(bin_size, cell_size, scaleBlock, stride);
vector<vector<double>> hogi_vector = hogi.compute(testImg, MYHOG);
//cout << "[ " << hogi_vector.size() << "," << hogi_vector[0].size() << " ]" << endl;//[ 105,32 ]
for (int i = 0; i < hogi_vector.size(); i++)
{
vector<double> vector = hogi_vector[i];
descriptors.insert(descriptors.end(), vector.begin(), vector.end());
}
//cout << "hog feature vector: --> " << descriptors.size() << endl;
}
else
{
cv::HOGDescriptor hog(cv::Size(width, height), cv::Size(blocksize, blocksize), cv::Size(stride*cell_size, stride*cell_size), cv::Size(cell_size, cell_size), bin_size);
//cv::HOGDescriptor hog(cv::Size(64, 128), cv::Size(16, 16), cv::Size(8, 8), cv::Size(8, 8), 8);
hog.compute(testImg, descriptors);
}
for (vector<float>::size_type j = 0; j < descriptors.size(); j++)
{
featureVectorOfTest.at<float>(i, j) = descriptors[j];
}
classOfTest.at<int>(i, 0) = TestImageClass[i];
}
cout << "size of featureVectorOfTest: " << featureVectorOfTest.size() << endl;
cout << "size of classOfTest: " << classOfTest.size() << endl;
cout << "測試數據集生成完畢!" << endl;
//vector<string>().swap(TestImagePath);
//vector<int>().swap(TestImageClass);
end = clock();
dur = (double)(end - start);
printf("DataOfTest 函數 Use Time: %f\n", (dur / CLOCKS_PER_SEC));
}
void HOGSVM::TrainClassifier(int bin_size, int cell_size, int scaleBlock, int stride, int kerneltype, int maxCount)
{
double dur;
clock_t start, end;
start = clock();
cout << "開始訓練分類器!" << endl;
///////////////////////////////////使用SVM分類器訓練///////////////////////////////////////////////////
//設置參數,注意Ptr的使用
cv::Ptr<cv::ml::SVM> svm = cv::ml::SVM::create();
svm->setType(cv::ml::SVM::C_SVC);
//svm->setKernel(cv::ml::SVM::LINEAR);//RBF//LINEAR
if (kerneltype == 0)
{
svm->setKernel(cv::ml::SVM::LINEAR);//RBF//LINEAR
}
else
{
svm->setKernel(cv::ml::SVM::RBF);//RBF//LINEAR
}
svm->setTermCriteria(cv::TermCriteria(CV_TERMCRIT_ITER, maxCount, FLT_EPSILON));
//訓練SVM
svm->train(featureVectorOfTrain, cv::ml::ROW_SAMPLE, classOfTrain);
//保存訓練好的分類器(其中包含了SVM的參數,支持向量,α和rho)
svm->save(string(CLASSFILEPATH) +to_string(bin_size)+"_"+to_string(cell_size)+"_"+to_string(scaleBlock)+"_"+to_string(stride)+"_"+to_string(kerneltype)+"_"+ to_string(maxCount)+"_classifier.xml");
/*
SVM訓練完成後得到的XML文件裏面,有一個數組,叫做support vector,還有一個數組,叫做alpha,有一個浮點數,叫做rho;
將alpha矩陣同support vector相乘,注意,alpha*supportVector,將得到一個行向量,將該向量前面乘以-1。之後,再該行向量的最後添加一個元素rho。
如此,變得到了一個分類器,利用該分類器,直接替換opencv中行人檢測默認的那個分類器(cv::HOGDescriptor::setSVMDetector()),
*/
//獲取支持向量
cv::Mat supportVector = svm->getSupportVectors();
//獲取alpha和rho
cv::Mat alpha;
cv::Mat svIndex;
float rho = svm->getDecisionFunction(0, alpha, svIndex);
//轉換類型:這裏一定要注意,需要轉換爲32的
cv::Mat alpha2;
alpha.convertTo(alpha2, CV_32FC1);
//結果矩陣,兩個矩陣相乘
cv::Mat result(1, lenOfHogFeature, CV_32FC1);
result = alpha2 * supportVector;
//乘以-1,這裏爲什麼會乘以-1?
//注意因爲svm.predict使用的是alpha*sv*another-rho,如果爲負的話則認爲是正樣本,在HOG的檢測函數中,使用rho+alpha*sv*another(another爲-1)
//for (int i = 0;i < 3780;i++)
//result.at<float>(0, i) *= -1;
//將分類器保存到文件,便於HOG識別
//這個纔是真正的判別函數的參數(ω),HOG可以直接使用該參數進行識別
FILE *fp = fopen((string(CLASSFILEPATH) + to_string(bin_size) + "_" + to_string(cell_size) + "_" + to_string(scaleBlock) + "_" + to_string(stride) + "_" + to_string(kerneltype) + "_" + to_string(maxCount) + "_HOG_SVM.txt").c_str(), "wb");
for (int i = 0; i<lenOfHogFeature; i++)
{
fprintf(fp, "%f \n", result.at<float>(0, i));
}
fprintf(fp, "%f", rho);
fclose(fp);
cout << "訓練結束!" << endl;
featureVectorOfTrain.release();
classOfTrain.release();
end = clock();
dur = (double)(end - start);
printf("TrainClassifier 函數 Use Time: %f\n", (dur / CLOCKS_PER_SEC));
}
float HOGSVM::TestClassifier(int bin_size, int cell_size, int scaleBlock, int stride, int kerneltype, int maxCount)
{
double dur;
clock_t start, end;
start = clock();
cout << "開始測試分類準確率!" << endl;
string modelpath = string(CLASSFILEPATH) + to_string(bin_size) + "_" + to_string(cell_size) + "_" + to_string(scaleBlock) + "_" + to_string(stride) + "_" + to_string(kerneltype) + "_" + to_string(maxCount) + "_classifier.xml";
Ptr<ml::SVM> svm = Algorithm::load<ml::SVM>(modelpath);
int featureVectorrows = featureVectorOfTest.rows;
int featureVectorcols = featureVectorOfTest.cols;
float TrueNum = 0;
for (int i = 0; i < featureVectorrows; i++)
{
Mat testFeatureMat = Mat::zeros(1, featureVectorcols, CV_32FC1);
for (int j = 0; j<featureVectorcols; j++)
{
testFeatureMat.at<float>(0, j) = featureVectorOfTest.at<float>(i, j);
}
testFeatureMat.convertTo(testFeatureMat, CV_32F);
int response = (int)svm->predict(testFeatureMat);
//cout << "response: " << response << " vs " << "theTrue: " << classOfTest.at<int>(i, 0) << endl;
if (response == classOfTest.at<int>(i, 0))
{
TrueNum++;
}
}
std::cout << "TrueNum: " << TrueNum << endl;
std::cout << "TotalNum: " << featureVectorrows << endl;
std::cout << "Result: " << TrueNum / float(featureVectorrows) * 100 << "%" << endl;
featureVectorOfTest.release();
classOfTest.release();
end = clock();
dur = (double)(end - start);
printf("TestClassifier 函數 Use Time: %f\n", (dur / CLOCKS_PER_SEC));
return TrueNum / float(featureVectorrows) * 100;
}
void HOGSVM::detectPIC(string filename)
{
ifstream f;
// 獲取測試圖片文件路徑
f.open(string(DETECTFILEPATH) + filename, ios::in);//ios::in 表示以只讀的方式讀取文件
if (f.fail())
{
fprintf(stderr, "ERROR: the specified file could not be loaded\n");
return;
}
//加載訓練好的判別函數的參數(注意,與svm->save保存的分類器不同)
vector<float> detector;
ifstream fileIn(string(CLASSFILEPATH) + "HOG_SVM.txt", ios::in);
float val = 0.0f;
while (!fileIn.eof())
{
fileIn >> val;
detector.push_back(val);
}
fileIn.close();
//設置HOG
//cv::HOGDescriptor hog;
//blocksize=scaleblock*cell_size
cv::HOGDescriptor hog(cv::Size(32, 64), cv::Size(16, 16), cv::Size(8, 8), cv::Size(8, 8), 8);
hog.setSVMDetector(detector);
//hog.setSVMDetector(cv::HOGDescriptor::getDefaultPeopleDetector());
cv::namedWindow("people detector", 1);
// 檢測圖片
string buffer;
while (getline(f, buffer, '\n'))
{
cv::Mat img = cv::imread(string(DETECTFILEPATH) + buffer);
//resize(img, img, Size(0, 0), 2, 2);
cout << buffer << endl;
if (!img.data)
continue;
vector<cv::Rect> found, found_filtered;
//多尺度檢測
hog.detectMultiScale(img, found, -8, cv::Size(16, 16), cv::Size(0, 0), 1.05, 5);
//hog.detectMultiScale(img, found, -6, cv::Size(2, 2), cv::Size(0, 0), 1.05, 20);
cout << "found.size() :" << found.size() << endl;
//if (!found.size())
//{
// continue;
//}
size_t i, j;
//去掉空間中具有內外包含關係的區域,保留大的
for (i = 0; i < found.size(); i++)
{
cv::Rect r = found[i];
for (j = 0; j < found.size(); j++)
if (j != i && (r & found[j]) == r)
break;
if (j == found.size())
found_filtered.push_back(r);
}
// 適當縮小矩形
for (i = 0; i < found_filtered.size(); i++)
{
cv::Rect r = found_filtered[i];
// the HOG detector returns slightly larger rectangles than the real objects.
// so we slightly shrink the rectangles to get a nicer output.
r.x += cvRound(r.width*0.5);
//r.width = cvRound(r.width*1.5);
//r.y -= cvRound(r.height*0.65);
//r.height = cvRound(r.height*1.7);
rectangle(img, r.tl(), r.br(), cv::Scalar(0, 0, 255), 1);
}
resize(img, img, Size(0, 0), 3, 3);
imshow("people detector", img);
int c = cv::waitKey(30);
if (c == 'q' || c == 'Q' || !f)
break;
}
}
void HOGSVM::demo(string filename, InputFileType type)
{
if (type == TXT)
{
detectPIC(filename);
}
}
最後寫一個測試,分類訓練和分類測試的圖片路徑放在LSIFIR_equalHist\Classification\Train和LSIFIR_equalHist\Classification\Test的pos.txt和neg.txt中,複製同一個文件夾下的pos.lst和neg.lst,可以修改需要訓練和測試的圖片數量
pos.txt文件部分截圖如下
#include"SVM.h"
using namespace cv;
using namespace std;
string CLASSFILEPATH = "E:/LSIFIR_equalHist/Classification/";
string DETECTFILEPATH = "E:/LSIFIR_equalHist/Detection/";
FeatureType MYHOG = HOGI;//HOGI
void OneTrainNTest();
void MultiTest();
int main()
{
OneTrainNTest();
//MultiTest();
return 0;
}
void MultiTest()
{
HOGSVM *hogsvmer = new HOGSVM(CLASSFILEPATH, DETECTFILEPATH, MYHOG);
string filename = "Train/pos.txt";
hogsvmer->demo(filename, TXT);
}
void OneTrainNTest()
{
int bin_size = 8;//角度分區,最小單元特徵向量維度數
int cell_size = 8;//圖像分塊,行像素點數量
int scaleBlock = 2;//行cell數量
int stride = 1;//block,滑動步長,單位是cell_size,即滑動一步的步長爲一個cell_size
int kerneltype = 0;
int maxCount = 1000000;
HOGSVM *hogsvmer = new HOGSVM(CLASSFILEPATH, DETECTFILEPATH, MYHOG);
hogsvmer->dataPrepare();
hogsvmer->DataOfTrain(bin_size, cell_size, scaleBlock, stride);
hogsvmer->DataOfTest(bin_size, cell_size, scaleBlock, stride);
hogsvmer->TrainClassifier(bin_size, cell_size, scaleBlock, stride, kerneltype, maxCount);
float target = hogsvmer->TestClassifier(bin_size, cell_size, scaleBlock, stride, kerneltype, maxCount);
}
訓練分類效果
用2001張pos和2001張neg圖片作分類訓練,分類準確率89.6%,一般般
檢測的效果也是很一般,漏檢率和誤檢率都很高!
3. 手寫PSO粒子羣優化方法
粒子羣優化Partical Swarm Optimization,簡稱PSO
參考了一些論文,可以採用優化方法對SVM和HOG進行參數尋優,那麼最後的檢測效果也相應可以提高!
當然,這個只是論文上玩一玩,實際的使用,也還是費時費力不討好!
PSO算法最重要是要抓住粒子位置和速度的更新方法
這裏寫一個簡單的多維度PSO方法,源碼中會把PSO封裝進去,這裏只介紹簡單的構造PSO的代碼
首先是PSO.h
#pragma once
#ifndef _PSO_H
#define _PSO_H
#include<iostream>
#include<vector>
#include<utility>
#include<string>
#include<iomanip>
/*
int particlesize = 50;
std::pair<int, int> temprange[6] =
{
make_pair(-10,10),
make_pair(-10,10),
make_pair(-10,10),
make_pair(-10,10),
make_pair(-10,10),
make_pair(-10,10),
};
vector<pair<int, int>> xrange;
xrange.insert(xrange.begin(), temprange, temprange + 6);
PSO *pso = new PSO(xrange, particlesize);
pso->search();
delete pso;
*/
typedef struct partical
{
double fitness;
std::vector<double> x;
std::vector<double> v;
int narvs;
}partical;
class PSO
{
public:
PSO(std::vector<std::pair<int, int>> &input_range, int psize)
{
particlesize = psize;
narvs = input_range.size();
input_range.swap(xrange);
init();
}
~PSO()
{
}
void update();
void search();
double targetfunction(std::vector<double> &x);
void BubbleSort(std::vector<partical> &arr);
private:
void init();
public:
std::vector<partical> p;
std::vector<partical> personalbest;
partical globalbest;
std::vector<double > ff;
partical finalbest;
private:
double E = 0.000001;
int maxnum = 800; //最大迭代次數
int c1 = 2; //每個粒子的個體學習因子,加速度常數
int c2 = 2; //每個粒子的社會學習因子,加速度常數
double w = 0.6; //慣性因子
int vmax = 5; //粒子的最大飛翔速度
int particlesize;//粒子數
int narvs;//粒子的維數
std::vector<std::pair<int, int>> xrange;//粒子的各個維度的限制條件
};
#endif // !_PSO_H
然後是PSO.cpp
#include"PSO.h"
using namespace std;
void PSO::update()
{
for (int i = 0; i < p.size(); i++)
{
p[i].fitness = targetfunction(p[i].x);
if (p[i].fitness < personalbest[i].fitness)
{
personalbest[i] = p[i];
}
}
vector<partical >temp(personalbest);
BubbleSort(temp);
globalbest = temp[0];
for (int i = 0; i < p.size(); i++)
{
for (int j = 0; j < p[i].narvs; j++)
{
p[i].v[j] = w*p[i].v[j]
+ c1*((rand() % 100) / (double)(100))*(personalbest[i].x[j] - p[i].x[j])
+ c2*((rand() % 100) / (double)(100))*(globalbest.x[j] - p[i].x[j]);//更新速度
if (p[i].v[j] > vmax)
{
p[i].v[j] = vmax;
}
else if(p[i].v[j] < -vmax)
{
p[i].v[j] = -vmax;
}
p[i].x[j] = p[i].x[j] + p[i].v[j];//更新調整參數
if (p[i].x[j] > xrange[j].second)//限制搜索範圍
{
p[i].x[j] = xrange[j].second;
}
else if (p[i].x[j] < xrange[j].first)
{
p[i].x[j] = xrange[j].first;
}
}
}
}
void PSO::search()
{
int k = 1;
while (k <= maxnum)
{
update();
ff.push_back(globalbest.fitness);
cout.precision(4);
cout << "PSO --> " << "fitness: " << globalbest.fitness <<"\t";
for (int i = 0; i < globalbest.narvs; i++)
{
cout << to_string(i) << " : " << globalbest.x[i] <<"\t";
}
cout << endl;
if (globalbest.fitness < E)
{
break;
}
k++;
}
finalbest = globalbest;
cout << "FIN --> " << "fitness: " << globalbest.fitness << "\t";
for (int i = 0; i < globalbest.narvs; i++)
{
cout << to_string(i) << " : " << globalbest.x[i] << "\t";
}
cout << endl;
}
double PSO::targetfunction(vector<double>& x)
{
double fitness = 0;
for (int i = 0; i < x.size(); i++)
{
fitness += pow(x[i], 2);
}
fitness /= x.size();
return fitness;
}
void PSO::BubbleSort(std::vector<partical>& arr)
{
for (int i = 0; i < arr.size() - 1; i++)
{
for (int j = 0; j < arr.size() - i - 1; j++)
{
if (arr[j].fitness > arr[j + 1].fitness)
{
partical temp = arr[j];
arr[j] = arr[j + 1];
arr[j + 1] = temp;
}
}
}
}
void PSO::init()
{
for (int i = 0; i < particlesize; i++)
{
partical temp;
temp.narvs = narvs;
for (int j = 0; j < temp.narvs; j++)
{
temp.v.push_back((rand() % 100) / (double)(100));
temp.x.push_back(xrange[j].first + xrange[j].second * (rand() % 100) / (double)(100));
temp.fitness = targetfunction(temp.x);
}
p.push_back(temp);
}
vector<partical> temp1(p);
BubbleSort(temp1);
globalbest = temp1[0];
vector<partical> temp2(p);
temp2.swap(personalbest);
}
這裏是對一個簡單的target函數(各維度平方和/維數)進行優化
測試實例,初始化50個粒子,6個維度,每個維度的限制條件均爲[-10,10]
#include"PSO.h"
using namespace std;
int main()
{
int particlesize = 50;
std::pair<int, int> temprange[6] =
{
make_pair(-10,10),
make_pair(-10,10),
make_pair(-10,10),
make_pair(-10,10),
make_pair(-10,10),
make_pair(-10,10),
};
vector<pair<int, int>> xrange;
xrange.insert(xrange.begin(), temprange, temprange + 6);
PSO *pso = new PSO(xrange, particlesize);
pso->search();
delete pso;
//vector<double> ff(pso->ff);
//for (int i = 0; i < ff.size(); i++)
//{
// cout << i << " : " << ff[i] << endl;
//}
//partical finalbest = pso->finalbest;
//for (int i = 0; i < finalbest.narvs; i++)
//{
// cout << finalbest.x[i] << endl;
//}
return 0;
}
結果
4. 數據集和源碼下載
自行下載數據集和源碼,有疑問可以私聊博主
全部源碼打包(CSDN下載)
https://download.csdn.net/download/jin739738709/11467578
數據集下載(百度雲)
鏈接:https://pan.baidu.com/s/16kXSOYeo4SHRW7JNV0PErA
提取碼:fq1i