很多都是參考網上的大神分析
放幾個KCF中主要的cpp和hpp文件,回去接着看代碼
fhog.cpp
#include "fhog.hpp"
#ifdef HAVE_TBB
#include <tbb/tbb.h>
#include “tbb/parallel_for.h”
#include “tbb/blocked_range.h”
#endif
#ifndef max
#define max(a,b) (((a) > (b)) ? (a) : (b))
#endif
#ifndef min
#define min(a,b) (((a) < (b)) ? (a) : (b))
#endif
/**********************************************************************************
函數功能:計算image的hog特徵,結果在map結構中的map大小爲sizeXsizeYNUM_SECTOR3(Getting feature map for the selected subimage)
函數參數:選中的子圖,cell的大小,返回的特徵圖
RESULT:Error status
**********************************************************************************/
int getFeatureMaps( const IplImage image, const int k, CvLSVMFeatureMapCaskade **map )
{
//總體過程是:
//1.計算每個像素的水平梯度dx和垂直梯度dy
//2.計算每個像素的通道間最大梯度大小r及其最鄰近梯度方向的索引值
//3.計算每個block(2+4+2)(2+4+2)的梯度直方圖(分爲9和18bin)存於map中//每個block的特徵是以一個cell爲中心,根據像素的位置以及像素點的梯度強度進行加權獲得的
int sizeX, sizeY;
int p, px, stringSize;
int height, width, numChannels;
int i, j, kk, c, ii, jj, d;
float * datadx, *datady;
int ch;
float magnitude, x, y, tx, ty;
IplImage * dx, *dy;
int *nearest;
float *w, a_x, b_x;
// 橫向和縱向的3長度{-1,0,1}矩陣
float kernel[3] = { -1.f, 0.f, 1.f };
CvMat kernel_dx = cvMat( 1, 3, CV_32F, kernel ); // 1*3的矩陣
CvMat kernel_dy = cvMat( 3, 1, CV_32F, kernel ); // 3*1的矩陣
float * r;//記錄每個像素點的每個通道的最大梯度
int * alfa;//記錄每個像素的梯度方向的索引值,分別爲9份時的索引值和18份時的索引值。
float boundary_x[NUM_SECTOR + 1]; // boundary_x[10]
float boundary_y[NUM_SECTOR + 1];
float max, dotProd;
int maxi;
height = image->height;
width = image->width;
numChannels = image->nChannels;
// 採樣圖像大小的Ipl圖像
dx = cvCreateImage( cvSize( image->width, image->height ),
IPL_DEPTH_32F, 3 );
dy = cvCreateImage( cvSize( image->width, image->height ),
IPL_DEPTH_32F, 3 );
// 向下取整的(邊界大小/4),k = cell_size
sizeX = width / k; // 將圖像分割成多個元胞(cell),x方向上cell的個數
sizeY = height / k; // y方向上cell的個數
px = 3 * NUM_SECTOR; // 三通道?NUM_SECTOR=9 Hog特徵中的9個角度範圍
p = px; // p=27
stringSize = sizeX * p; // stringSize = 27*sizeX
allocFeatureMapObject( map, sizeX, sizeY, p ); // 爲map初始化內存sizeX*sizeY*p=sizeY*stringSize
// image:輸入圖像.
// dx:輸出圖像.
// kernel_dx:卷積核, 單通道浮點矩陣. 如果想要應用不同的核於不同的通道,先用 cvSplit 函數分解圖像到單個色彩通道上,然後單獨處理。
// cvPoint(-1, 0):核的錨點表示一個被濾波的點在覈內的位置。 錨點應該處於核內部。缺省值 (-1,-1) 表示錨點在覈中心。
// 函數 cvFilter2D 對圖像進行線性濾波,支持 In-place 操作。當核運算部分超出輸入圖像時,函數從最近鄰的圖像內部象素差值得到邊界外面的象素值。
cvFilter2D( image, dx, &kernel_dx, cvPoint( -1, 0 ) ); // 起點在(x-1,y),按x方向濾波
cvFilter2D( image, dy, &kernel_dy, cvPoint( 0, -1 ) ); // 起點在(x,y-1),按y方向濾波
// 初始化cos和sin函數
float arg_vector;
// 計算梯度角的邊界,並存儲在boundary__y中
for( i = 0; i <= NUM_SECTOR; i++ )
{
arg_vector = ( ( float ) i ) * ( ( float ) ( PI ) / ( float ) ( NUM_SECTOR ) ); // 每個角的角度
boundary_x[i] = cosf( arg_vector ); // 每個角度對應的餘弦值
boundary_y[i] = sinf( arg_vector ); // 每個角度對應的正弦值
}/*for(i = 0; i <= NUM_SECTOR; i++) */
r = ( float * ) malloc( sizeof( float ) * ( width * height ) );
alfa = ( int * ) malloc( sizeof( int ) * ( width * height * 2 ) );
//2.
for( j = 1; j < height - 1; j++ )
{
// 記錄每一行的首地址
datadx = ( float* ) ( dx->imageData + dx->widthStep * j );
datady = ( float* ) ( dy->imageData + dy->widthStep * j );
for( i = 1; i < width - 1; i++ ) // 遍歷一行中的非邊界像素
{
c = 0; // 第一顏色通道
x = ( datadx[i * numChannels + c] );
y = ( datady[i * numChannels + c] );
r[j * width + i] = sqrtf( x * x + y * y ); // 計算0通道的梯度大小
// 使用向量大小最大的通道替代儲存值
for( ch = 1; ch < numChannels; ch++ ) // 計算其他兩個通道
{
tx = ( datadx[i * numChannels + ch] );
ty = ( datady[i * numChannels + ch] );
magnitude = sqrtf( tx * tx + ty * ty ); // 計算幅值
if( magnitude > r[j * width + i] ) // 找出每個像素點的梯度的最大值(有三個顏色空間對應的梯度),並記錄通道數以及水平梯度以及垂直梯度
{
r[j * width + i] = magnitude; // r表示最大幅值
c = ch; // c表示這個幅值來自的通道序號
x = tx; // x表示這個幅值對應的座標處的x梯度
y = ty; // y表示這個幅值對應的座標處的y梯度
}
}/*for(ch = 1; ch < numChannels; ch++)*/
// 使用sqrt(cos*x*cos*x+sin*y*sin*y)最大的替換掉
max = boundary_x[0] * x + boundary_y[0] * y;
maxi = 0;
// 假設像素點的梯度方向爲a,梯度方向爲t,梯度大小爲r,則dotProd=r*cosa*cost+r*sina*sint=r*cos(a-t)
for( kk = 0; kk < NUM_SECTOR; kk++ ) // 遍歷9個HOG劃分的角度範圍
{
dotProd = boundary_x[kk] * x + boundary_y[kk] * y; // 計算兩個向量的點乘
// 若dotProd最大,則說明t最接近a
if( dotProd > max )
{
max = dotProd;
maxi = kk;
}
// 若-dotProd最大,則說明t最接近a+pi
else
{
if( -dotProd > max )
{
max = -dotProd; // 取相反數
maxi = kk + NUM_SECTOR; // ?
}
}
}
// 看起來有點像儲存cos和sin的週期值
alfa[j * width * 2 + i * 2] = maxi % NUM_SECTOR; //
alfa[j * width * 2 + i * 2 + 1] = maxi;
}/*for(i = 0; i < width; i++)*/
}/*for(j = 0; j < height; j++)*/
// nearest=[-1,-1,1,1];
nearest = ( int * ) malloc( sizeof( int ) * k );
w = ( float* ) malloc( sizeof( float ) * ( k * 2 ) );
// 給nearest初始化,爲了方便以後利用相鄰的cell的特徵計算block(8*8,每個block以一個cell爲中心,以半個cell爲邊界厚度)的屬性
for( i = 0; i < k / 2; i++ )
{
nearest[i] = -1;
}/*for(i = 0; i < k / 2; i++)*/
for( i = k / 2; i < k; i++ )
{
nearest[i] = 1;
}/*for(i = k / 2; i < k; i++)*/
//給w初始化?不明白w的作用,可能是cell(4*4)中每個像素貢獻給直方圖的權值(1/8+3/8+5/8+7/8+7/8+5/8+3/8+1/8)*(1/8+3/8+5/8+7/8+7/8+5/8+3/8+1/8)=4*4
for( j = 0; j < k / 2; j++ )
{
b_x = k / 2 + j + 0.5f;
a_x = k / 2 - j - 0.5f;
w[j * 2] = 1.0f / a_x * ( ( a_x * b_x ) / ( a_x + b_x ) );
w[j * 2 + 1] = 1.0f / b_x * ( ( a_x * b_x ) / ( a_x + b_x ) );
}/*for(j = 0; j < k / 2; j++)*/
for( j = k / 2; j < k; j++ )
{
a_x = j - k / 2 + 0.5f;
b_x = -j + k / 2 - 0.5f + k;
w[j * 2] = 1.0f / a_x * ( ( a_x * b_x ) / ( a_x + b_x ) );
w[j * 2 + 1] = 1.0f / b_x * ( ( a_x * b_x ) / ( a_x + b_x ) );
}/*for(j = k / 2; j < k; j++)*/
//3.
for( i = 0; i < sizeY; i++ )
{
for( j = 0; j < sizeX; j++ )
{
for( ii = 0; ii < k; ii++ )
{
for( jj = 0; jj < k; jj++ )
{
//第i行的第j個cell的第ii行第jj個像素
if( ( i * k + ii > 0 ) &&
( i * k + ii < height - 1 ) &&
( j * k + jj > 0 ) &&
( j * k + jj < width - 1 ) )//要跳過厚度爲1的邊界像素,因爲邊界的梯度值不準確,但這樣會導致含有邊界的cell統計不完整
{
d = ( k * i + ii ) * width + ( j * k + jj );
( *map )->map[i * stringSize + j * ( *map )->numFeatures + alfa[d * 2]] +=
r[d] * w[ii * 2] * w[jj * 2];//第i行第j個cell的第alfa[d * 2]個梯度方向(0-8)
( *map )->map[i * stringSize + j * ( *map )->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] +=
r[d] * w[ii * 2] * w[jj * 2];//第i行第j個cell的第alfa[d * 2+1]個梯度方向(9-26)
if( ( i + nearest[ii] >= 0 ) &&
( i + nearest[ii] <= sizeY - 1 ) )
{
( *map )->map[( i + nearest[ii] ) * stringSize + j * ( *map )->numFeatures + alfa[d * 2]] +=
r[d] * w[ii * 2 + 1] * w[jj * 2];
( *map )->map[( i + nearest[ii] ) * stringSize + j * ( *map )->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] +=
r[d] * w[ii * 2 + 1] * w[jj * 2];
}
if( ( j + nearest[jj] >= 0 ) &&
( j + nearest[jj] <= sizeX - 1 ) )
{
( *map )->map[i * stringSize + ( j + nearest[jj] ) * ( *map )->numFeatures + alfa[d * 2]] +=
r[d] * w[ii * 2] * w[jj * 2 + 1];
( *map )->map[i * stringSize + ( j + nearest[jj] ) * ( *map )->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] +=
r[d] * w[ii * 2] * w[jj * 2 + 1];
}
if( ( i + nearest[ii] >= 0 ) &&
( i + nearest[ii] <= sizeY - 1 ) &&
( j + nearest[jj] >= 0 ) &&
( j + nearest[jj] <= sizeX - 1 ) )
{
( *map )->map[( i + nearest[ii] ) * stringSize + ( j + nearest[jj] ) * ( *map )->numFeatures + alfa[d * 2]] +=
r[d] * w[ii * 2 + 1] * w[jj * 2 + 1];
( *map )->map[( i + nearest[ii] ) * stringSize + ( j + nearest[jj] ) * ( *map )->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] +=
r[d] * w[ii * 2 + 1] * w[jj * 2 + 1];
}
}
}/*for(jj = 0; jj < k; jj++)*/
}/*for(ii = 0; ii < k; ii++)*/
}/*for(j = 1; j < sizeX - 1; j++)*/
}/*for(i = 1; i < sizeY - 1; i++)*/
// 釋放變量
cvReleaseImage( &dx );
cvReleaseImage( &dy );
free( w );
free( nearest );
free( r );
free( alfa );
return LATENT_SVM_OK;
}
/*****************************************************************************
函數功能:特徵圖標準化與截斷(Feature map Normalization and Truncation)
函數參數:特徵圖,截斷閾值
函數輸出:標準化與截斷之後的特徵圖
RESULT:Error status
*****************************************************************************/
int normalizeAndTruncate( CvLSVMFeatureMapCaskade map, const float alfa )
{
//計算步驟:
//1.分別計算每個block(除去邊界)的9分特性的9個特性的平方和
//2.分別計算每個block在各個方向上的9分特性的2範數
//3.用各個屬性(共27個)除以各個方向上的2範數,得到歸一化的274個屬性
int i, j, ii;
int sizeX, sizeY, p, pos, pp, xp, pos1, pos2;
float * partOfNorm; // norm of C(i, j)
float * newData;
float valOfNorm;//大小爲block的總數,計算每個block的前九個特徵的2範數
sizeX = map->sizeX;
sizeY = map->sizeY;
partOfNorm = ( float * ) malloc( sizeof( float ) * ( sizeX * sizeY ) );
p = NUM_SECTOR;//每個cell的bin的數目
xp = NUM_SECTOR * 3;//每個block的總特徵數(9+18)
pp = NUM_SECTOR * 12;
for( i = 0; i < sizeX * sizeY; i++ )
{
valOfNorm = 0.0f;
pos = i * map->numFeatures;//第i個block的第一個特徵點索引號
for( j = 0; j < p; j++ )
{
valOfNorm += map->map[pos + j] * map->map[pos + j];//計算第i個block的前9個特徵的平方和
}/*for(j = 0; j < p; j++)*/
partOfNorm[i] = valOfNorm;
}/*for(i = 0; i < sizeX * sizeY; i++)*/
sizeX -= 2;//去掉第一列和最後一列的block
sizeY -= 2;//去掉一第行和最後一行的block
newData = ( float * ) malloc( sizeof( float ) * ( sizeX * sizeY * pp ) );
//normalization
for( i = 1; i <= sizeY; i++ )
{
for( j = 1; j <= sizeX; j++ )
{
//右下
valOfNorm = sqrtf(
partOfNorm[( i ) *( sizeX + 2 ) + ( j )] +
partOfNorm[( i ) *( sizeX + 2 ) + ( j + 1 )] +
partOfNorm[( i + 1 )*( sizeX + 2 ) + ( j )] +
partOfNorm[( i + 1 )*( sizeX + 2 ) + ( j + 1 )] ) + FLT_EPSILON;//計算該block右下四個block的9分屬性的2範數
pos1 = ( i ) * ( sizeX + 2 ) * xp + ( j ) * xp;//第i行第j列的block的屬性的第一個值的索引值
pos2 = ( i - 1 ) * ( sizeX ) * pp + ( j - 1 ) * pp;//除掉邊框後的第i-1行第j-列的block的newdata的首地址
for( ii = 0; ii < p; ii++ )
{
newData[pos2 + ii] = map->map[pos1 + ii] / valOfNorm;
}/*for(ii = 0; ii < p; ii++)*/
for( ii = 0; ii < 2 * p; ii++ )
{
newData[pos2 + ii + p * 4] = map->map[pos1 + ii + p] / valOfNorm;
}/*for(ii = 0; ii < 2 * p; ii++)*/
//右上
valOfNorm = sqrtf(
partOfNorm[( i ) *( sizeX + 2 ) + ( j )] +
partOfNorm[( i ) *( sizeX + 2 ) + ( j + 1 )] +
partOfNorm[( i - 1 )*( sizeX + 2 ) + ( j )] +
partOfNorm[( i - 1 )*( sizeX + 2 ) + ( j + 1 )] ) + FLT_EPSILON;
for( ii = 0; ii < p; ii++ )
{
newData[pos2 + ii + p] = map->map[pos1 + ii] / valOfNorm;
}/*for(ii = 0; ii < p; ii++)*/
for( ii = 0; ii < 2 * p; ii++ )
{
newData[pos2 + ii + p * 6] = map->map[pos1 + ii + p] / valOfNorm;
}/*for(ii = 0; ii < 2 * p; ii++)*/
//左下
valOfNorm = sqrtf(
partOfNorm[( i ) *( sizeX + 2 ) + ( j )] +
partOfNorm[( i ) *( sizeX + 2 ) + ( j - 1 )] +
partOfNorm[( i + 1 )*( sizeX + 2 ) + ( j )] +
partOfNorm[( i + 1 )*( sizeX + 2 ) + ( j - 1 )] ) + FLT_EPSILON;
for( ii = 0; ii < p; ii++ )
{
newData[pos2 + ii + p * 2] = map->map[pos1 + ii] / valOfNorm;
}/*for(ii = 0; ii < p; ii++)*/
for( ii = 0; ii < 2 * p; ii++ )
{
newData[pos2 + ii + p * 8] = map->map[pos1 + ii + p] / valOfNorm;
}/*for(ii = 0; ii < 2 * p; ii++)*/
//左上
valOfNorm = sqrtf(
partOfNorm[( i ) *( sizeX + 2 ) + ( j )] +
partOfNorm[( i ) *( sizeX + 2 ) + ( j - 1 )] +
partOfNorm[( i - 1 )*( sizeX + 2 ) + ( j )] +
partOfNorm[( i - 1 )*( sizeX + 2 ) + ( j - 1 )] ) + FLT_EPSILON;
for( ii = 0; ii < p; ii++ )
{
newData[pos2 + ii + p * 3] = map->map[pos1 + ii] / valOfNorm;
}/*for(ii = 0; ii < p; ii++)*/
for( ii = 0; ii < 2 * p; ii++ )
{
newData[pos2 + ii + p * 10] = map->map[pos1 + ii + p] / valOfNorm;
}/*for(ii = 0; ii < 2 * p; ii++)*/
}/*for(j = 1; j <= sizeX; j++)*/
}/*for(i = 1; i <= sizeY; i++)*/
//truncation
for( i = 0; i < sizeX * sizeY * pp; i++ )
{
if( newData[i] > alfa ) newData[i] = alfa;
}/*for(i = 0; i < sizeX * sizeY * pp; i++)*/
//swop data
map->numFeatures = pp;
map->sizeX = sizeX;
map->sizeY = sizeY;
free( map->map );
free( partOfNorm );
map->map = newData;
return LATENT_SVM_OK;
}
/*****************************************************************************
函數功能:特徵圖降維(Feature map reduction)
In each cell we reduce dimension of the feature vector according to original paper special procedure
函數參數:特徵圖
函數輸出:特徵圖
RESULT:Error status
*****************************************************************************/
int PCAFeatureMaps( CvLSVMFeatureMapCaskade *map )
{
//步驟:
//1.計算每個18分屬性在4個方向上的和;
//2.計算每個9分屬性在4個方向上的和
//3.計算4個方向上18分屬性的和
int i, j, ii, jj, k;
int sizeX, sizeY, p, pp, xp, yp, pos1, pos2;
float * newData;
float val;
float nx, ny;
sizeX = map->sizeX;
sizeY = map->sizeY;
p = map->numFeatures;
pp = NUM_SECTOR * 3 + 4;
yp = 4;
xp = NUM_SECTOR;
nx = 1.0f / sqrtf( ( float ) ( xp * 2 ) );
ny = 1.0f / sqrtf( ( float ) ( yp ) );
newData = ( float * ) malloc( sizeof( float ) * ( sizeX * sizeY * pp ) );
for( i = 0; i < sizeY; i++ )
{
for( j = 0; j < sizeX; j++ )
{
pos1 = ( ( i ) *sizeX + j )*p;//去掉邊界後的第i行第j列的block的的第一個屬性值的索引值
pos2 = ( ( i ) *sizeX + j )*pp;//newData關於第i行第j列的block的的第一個屬性值的索引值
k = 0;
for( jj = 0; jj < xp * 2; jj++ )//18分屬性
{
val = 0;
for( ii = 0; ii < yp; ii++ )
{
val += map->map[pos1 + yp * xp + ii * xp * 2 + jj];//計算每個block的18分屬性在四個方向的和
}/*for(ii = 0; ii < yp; ii++)*/
newData[pos2 + k] = val * ny;
k++;
}/*for(jj = 0; jj < xp * 2; jj++)*/
for( jj = 0; jj < xp; jj++ )//9分屬性
{
val = 0;
for( ii = 0; ii < yp; ii++ )
{
val += map->map[pos1 + ii * xp + jj];
}/*for(ii = 0; ii < yp; ii++)*/
newData[pos2 + k] = val * ny;
k++;
}/*for(jj = 0; jj < xp; jj++)*/
for( ii = 0; ii < yp; ii++ )
{
val = 0;
for( jj = 0; jj < 2 * xp; jj++ )
{
val += map->map[pos1 + yp * xp + ii * xp * 2 + jj];//計算每個block的18分屬性在一個方向上的和,
}/*for(jj = 0; jj < xp; jj++)*/
newData[pos2 + k] = val * nx;
k++;
} /*for(ii = 0; ii < yp; ii++)*/
}/*for(j = 0; j < sizeX; j++)*/
}/*for(i = 0; i < sizeY; i++)*/
//swop data
map->numFeatures = pp;
free( map->map );
map->map = newData;
return LATENT_SVM_OK;
}
//modified from “lsvmc_routine.cpp”
int allocFeatureMapObject( CvLSVMFeatureMapCaskade **obj, const int sizeX,
const int sizeY, const int numFeatures )
{
int i;
( *obj ) = ( CvLSVMFeatureMapCaskade * ) malloc( sizeof( CvLSVMFeatureMapCaskade ) );
( *obj )->sizeX = sizeX;
( *obj )->sizeY = sizeY;
( *obj )->numFeatures = numFeatures;
( *obj )->map = ( float * ) malloc( sizeof( float ) *
( sizeX * sizeY * numFeatures ) );
for( i = 0; i < sizeX * sizeY * numFeatures; i++ )
{
( *obj )->map[i] = 0.0f;
}
return LATENT_SVM_OK;
}
int freeFeatureMapObject( CvLSVMFeatureMapCaskade **obj )
{
if( *obj == NULL ) return LATENT_SVM_MEM_NULL;
free( ( *obj )->map );
free( *obj );
( *obj ) = NULL;
return LATENT_SVM_OK;
}
kcftracker.cpp
#ifndef _KCFTRACKER_HEADERS
#include "kcftracker.hpp"
#include "ffttools.hpp"
#include "recttools.hpp"
#include "fhog.hpp"
#include "labdata.hpp"
#endif
// Constructor
/*
根據配置選項初始化一些參數
*/
KCFTracker::KCFTracker( bool hog, bool fixed_window, bool multiscale, bool lab )
{
// Parameters equal in all cases
lambda = 0.0001; //regularization
padding = 2.5; //horizontal area surrounding the target, relative to its size
//output_sigma_factor = 0.1;
output_sigma_factor = 0.125; //bandwidth of gaussian target
if( hog )
{
// HOG
// VOT
interp_factor = 0.012; // linear interpolation factor for adaptation
sigma = 0.6; //gaussian kernel bandwidth
// TPAMI
//interp_factor = 0.02;
//sigma = 0.5;
cell_size = 4; //HOG cell size
_hogfeatures = true;
if( lab )
{
interp_factor = 0.005;
sigma = 0.4;
//output_sigma_factor = 0.025;
output_sigma_factor = 0.1;
_labfeatures = true;
_labCentroids = cv::Mat( nClusters, 3, CV_32FC1, &data );
cell_sizeQ = cell_size*cell_size;
}
else
{
_labfeatures = false;
}
}
else
{
// RAW
interp_factor = 0.075;
sigma = 0.2;
cell_size = 1;
_hogfeatures = false;
if( lab )
{
printf( "Lab features are only used with HOG features.\n" );
_labfeatures = false;
}
}
if( multiscale )
{
// multiscale
template_size = 96;
//template_size = 100;
scale_step = 1.05;
scale_weight = 0.95;
if( !fixed_window )
{
//printf("Multiscale does not support non-fixed window.\n");
fixed_window = true;
}
}
else if( fixed_window )
{
// fit correction without multiscale
template_size = 96;
//template_size = 100;
scale_step = 1;
}
else
{
template_size = 1;
scale_step = 1;
}
}
/*****************************************************************************
*函數功能:初始化跟蹤器,包括迴歸參數的計算,變量的初始化(Initialize tracker)
*函數參數:目標初始框的引用,初始幀、
*****************************************************************************/
void KCFTracker::init( const cv::Rect &roi, cv::Mat image )
{
_roi = roi; //_roi是基類Tracker的protected成員變量
assert( roi.width >= 0 && roi.height >= 0 );
_tmpl = getFeatures( image, 1 ); // 獲取特徵,在train裏面每幀修改
_prob = createGaussianPeak( size_patch[0], size_patch[1] ); // 這個不修改了,只初始化一次 24 14
_alphaf = cv::Mat( size_patch[0], size_patch[1], CV_32FC2, float( 0 ) ); // 獲取特徵,在train裏面每幀修改
//_num = cv::Mat(size_patch[0], size_patch[1], CV_32FC2, float(0));
//_den = cv::Mat(size_patch[0], size_patch[1], CV_32FC2, float(0));
train( _tmpl, 1.0 ); // train with initial frame
}
/*****************************************************************************
*函數功能:獲取當前幀的目標位置以及尺度(Update position based on the new frame)
*函數參數:當前幀的整幅圖像
*****************************************************************************/
cv::Rect KCFTracker::update( cv::Mat image )
{
if( _roi.x + _roi.width <= 0 ) _roi.x = -_roi.width + 1;//如果越界,就讓框框保持在開始越界的地方
if( _roi.y + _roi.height <= 0 ) _roi.y = -_roi.height + 1;
if( _roi.x >= image.cols - 1 ) _roi.x = image.cols - 2;
if( _roi.y >= image.rows - 1 ) _roi.y = image.rows - 2;
//跟蹤框中心
float cx = _roi.x + _roi.width / 2.0f;
float cy = _roi.y + _roi.height / 2.0f;
float peak_value; // 尺度不變時檢測峯值結果
cv::Point2f res = detect( _tmpl, getFeatures( image, 0, 1.0f ), peak_value );
// 略大尺度和略小尺度進行檢測
if( scale_step != 1 )
{
// Test at a smaller _scale 使用較小的尺度進行檢測
float new_peak_value;
cv::Point2f new_res = detect( _tmpl, getFeatures( image, 0, 1.0f / scale_step ), new_peak_value );
// 做減益還比同尺度大就認爲是目標
if( scale_weight * new_peak_value > peak_value )
{
res = new_res;
peak_value = new_peak_value;
_scale /= scale_step;
_roi.width /= scale_step;
_roi.height /= scale_step;
}
// Test at a bigger _scale 使用較大的尺度進行檢測
new_res = detect( _tmpl, getFeatures( image, 0, scale_step ), new_peak_value );
if( scale_weight * new_peak_value > peak_value )
{
res = new_res;
peak_value = new_peak_value;
_scale *= scale_step;
_roi.width *= scale_step;
_roi.height *= scale_step;
}
}
// Adjust by cell size and _scale
_roi.x = cx - _roi.width / 2.0f + ( ( float ) res.x * cell_size * _scale );
_roi.y = cy - _roi.height / 2.0f + ( ( float ) res.y * cell_size * _scale );
if( _roi.x >= image.cols - 1 ) _roi.x = image.cols - 1;
if( _roi.y >= image.rows - 1 ) _roi.y = image.rows - 1;
if( _roi.x + _roi.width <= 0 ) _roi.x = -_roi.width + 2;
if( _roi.y + _roi.height <= 0 ) _roi.y = -_roi.height + 2;
assert( _roi.width >= 0 && _roi.height >= 0 );
cv::Mat x = getFeatures( image, 0 );
train( x, interp_factor );
return _roi;
}
/*****************************************************************************
*函數功能:根據上一幀結果計算當前幀的目標位置(Detect object in the current frame)
*函數參數:之前訓練(初始化)的結果,當前的特徵圖,當前最高得分(引用)
*****************************************************************************/
cv::Point2f KCFTracker::detect( cv::Mat z, cv::Mat x, float &peak_value )
{
using namespace FFTTools;
// 做變換得到計算結果res
cv::Mat k = gaussianCorrelation( x, z ); // 計算x和z之間的高斯相關核(公式)
cv::Mat res = ( real( fftd( complexMultiplication( _alphaf, fftd( k ) ), true ) ) ); // 計算目標得分(公式)
// minMaxLoc only accepts doubles for the peak, and integer points for the coordinates
// 使用opencv的minMaxLoc來定位峯值座標位置
cv::Point2i pi;
double pv;
cv::minMaxLoc( res, NULL, &pv, NULL, &pi );
peak_value = ( float ) pv;
// subpixel peak estimation, coordinates will be non-integer
// 子像素峯值檢測,座標是非整形的
cv::Point2f p( ( float ) pi.x, ( float ) pi.y );
if( pi.x > 0 && pi.x < res.cols - 1 )
{
p.x += subPixelPeak( res.at<float>( pi.y, pi.x - 1 ), peak_value, res.at<float>( pi.y, pi.x + 1 ) );
}
if( pi.y > 0 && pi.y < res.rows - 1 )
{
p.y += subPixelPeak( res.at<float>( pi.y - 1, pi.x ), peak_value, res.at<float>( pi.y + 1, pi.x ) );
}
p.x -= ( res.cols ) / 2;
p.y -= ( res.rows ) / 2;
return p;
}
/*****************************************************************************
*函數功能:根據每一幀的結果訓練樣本並更新模板(train tracker with a single image)
*函數參數:新的目標圖像,訓練因子train_interp_factor是interp_factor
*****************************************************************************/
void KCFTracker::train( cv::Mat x, float train_interp_factor )
{
using namespace FFTTools;
cv::Mat k = gaussianCorrelation( x, x );
cv::Mat alphaf = complexDivision( _prob, ( fftd( k ) + lambda ) ); //計算嶺迴歸係數(公式)
// 更新模板的特徵
_tmpl = ( 1 - train_interp_factor ) * _tmpl + ( train_interp_factor ) * x; // 公式
// 更新嶺迴歸係數的值
_alphaf = ( 1 - train_interp_factor ) * _alphaf + ( train_interp_factor ) * alphaf; // 公式
/*cv::Mat kf = fftd(gaussianCorrelation(x, x));
cv::Mat num = complexMultiplication(kf, _prob);
cv::Mat den = complexMultiplication(kf, kf + lambda);
_tmpl = (1 - train_interp_factor) * _tmpl + (train_interp_factor) * x;
_num = (1 - train_interp_factor) * _num + (train_interp_factor) * num;
_den = (1 - train_interp_factor) * _den + (train_interp_factor) * den;
_alphaf = complexDivision(_num, _den);*/
}
/*****************************************************************************
*函數功能:使用帶寬SIGMA計算高斯卷積核以用於所有圖像X和Y之間的相對位移
必須都是MxN大小。二者必須都是週期的(即,通過一個cos窗口進行預處理)
Evaluates a Gaussian kernel with bandwidth SIGMA for all relative shifts between input images X and Y, which must both be MxN. They must also be periodic (ie., pre-processed with a cosine window)
*函數參數:高斯核的兩個參數
*****************************************************************************/
cv::Mat KCFTracker::gaussianCorrelation( cv::Mat x1, cv::Mat x2 )
{
using namespace FFTTools;
cv::Mat c = cv::Mat( cv::Size( size_patch[1], size_patch[0] ), CV_32F, cv::Scalar( 0 ) );
// HOG features
if( _hogfeatures )
{
cv::Mat caux;
cv::Mat x1aux;
cv::Mat x2aux;
for( int i = 0; i < size_patch[2]; i++ )
{
x1aux = x1.row( i ); // Procedure do deal with cv::Mat multichannel bug
x1aux = x1aux.reshape( 1, size_patch[0] ); // 將第i個屬性排列成原來cell的排列形式
x2aux = x2.row( i ).reshape( 1, size_patch[0] );
// 兩個傅立葉頻譜的每個元素的乘法 相乘-頻譜
// 輸入數組1、輸入數組2、輸出數組(和輸入數組有相同的類型和大小)
cv::mulSpectrums( fftd( x1aux ), fftd( x2aux ), caux, 0, true ); // 核相關性公式
caux = fftd( caux, true );
rearrange( caux );
caux.convertTo( caux, CV_32F );
c = c + real( caux );
}
}
else // Gray features
{
cv::mulSpectrums( fftd( x1 ), fftd( x2 ), c, 0, true );
c = fftd( c, true );
rearrange( c );
c = real( c );
}
cv::Mat d;
cv::Mat testx1 = x1.mul( x1 );
cv::Mat testx2 = x2.mul( x2 );
cv::Scalar ax1 = cv::sum( testx1 );
cv::Scalar ax2 = cv::sum( testx2 );
cv::max( ( ( cv::sum( x1.mul( x1 ) )[0] + cv::sum( x2.mul( x2 ) )[0] ) - 2. * c ) / ( size_patch[0] * size_patch[1] * size_patch[2] ), 0, d );
cv::Mat k;
cv::exp( ( -d / ( sigma * sigma ) ), k );
return k;
}
/*****************************************************************************
*函數功能:創建高斯峯函數,僅在第一幀時被執行
*Create Gaussian Peak. Function called only in the first frame
*函數參數:二維高斯峯的X、Y的大小
*****************************************************************************/
cv::Mat KCFTracker::createGaussianPeak( int sizey, int sizex )
{
cv::Mat_<float> res( sizey, sizex );
int syh = ( sizey ) / 2;
int sxh = ( sizex ) / 2;
float output_sigma = std::sqrt( ( float ) sizex * sizey ) / padding * output_sigma_factor;//?
float mult = -0.5 / ( output_sigma * output_sigma );//?
for( int i = 0; i < sizey; i++ )
{
for( int j = 0; j < sizex; j++ )
{
int ih = i - syh;
int jh = j - sxh;
res( i, j ) = std::exp( mult * ( float ) ( ih * ih + jh * jh ) );
}
}
return FFTTools::fftd( res );
}
/*****************************************************************************
*函數功能:提取目標窗口的特徵(Obtain sub-window from image, with replication-padding and extract features)
*函數參數:圖像,是否使用漢寧窗,尺度調整因子
*****************************************************************************/
cv::Mat KCFTracker::getFeatures( const cv::Mat & image, bool inithann, float scale_adjust )
{
//步驟:
//1.根據給定的框框找到合適的框框
//2.提取HOG特徵
//3.對特徵進行歸一化和截斷
//4.對特徵進行降維
//5.獲取Lab特徵,並將結果與hog特徵進行連接
//6.創建一個常數陣,對所有特徵根據cell的位置進行加權?
cv::Rect extracted_roi;
//center point of ROI
float cx = _roi.x + _roi.width / 2;
float cy = _roi.y + _roi.height / 2;
// 初始化hanning窗, 其實只執行一次,只在第一幀的時候inithann=1
if( inithann )
{
int padded_w = _roi.width * padding;
int padded_h = _roi.height * padding;
if( template_size > 1 ) // template_size=96
{
// Fit largest dimension to the given template size
// 按照長寬比例修改_tmpl長寬大小,保證比較大的邊爲template_size大小96
if( padded_w >= padded_h )
{
//fit to width
_scale = padded_w / ( float ) template_size;
}
else
{
_scale = padded_h / ( float ) template_size;
}
_tmpl_sz.width = padded_w / _scale;
_tmpl_sz.height = padded_h / _scale;
}
else
{
//No template size given, use ROI size
_tmpl_sz.width = padded_w;
_tmpl_sz.height = padded_h;
_scale = 1;
// original code from paper:
/*if (sqrt(padded_w * padded_h) >= 100) { //Normal size
_tmpl_sz.width = padded_w;
_tmpl_sz.height = padded_h;
_scale = 1;
}
else { //ROI is too big, track at half size
_tmpl_sz.width = padded_w / 2;
_tmpl_sz.height = padded_h / 2;
_scale = 2;
}*/
}
// 設置_tmpl_sz的長寬:向上取原來長寬的最小2*cell_size倍
// 其中,較大邊長爲104
if( _hogfeatures )
{
// Round to cell size and also make it even
_tmpl_sz.width = ( ( ( int ) ( _tmpl_sz.width / ( 2 * cell_size ) ) ) * 2 * cell_size ) + cell_size * 2;
_tmpl_sz.height = ( ( ( int ) ( _tmpl_sz.height / ( 2 * cell_size ) ) ) * 2 * cell_size ) + cell_size * 2;
}
else
{
// Make number of pixels even (helps with some logic involving half-dimensions)
_tmpl_sz.width = ( _tmpl_sz.width / 2 ) * 2;
_tmpl_sz.height = ( _tmpl_sz.height / 2 ) * 2;
}
}
// 以上都是調整_tmpl_sz的大小爲了各種適應
// 檢測區域大小
extracted_roi.width = scale_adjust * _scale * _tmpl_sz.width;
extracted_roi.height = scale_adjust * _scale * _tmpl_sz.height;
// center roi with new size 檢測區域的左上角座標
extracted_roi.x = cx - extracted_roi.width / 2;
extracted_roi.y = cy - extracted_roi.height / 2;
// 提取目標區域像素,超邊界則做填充
cv::Mat FeaturesMap;
cv::Mat z = RectTools::subwindow( image, extracted_roi, cv::BORDER_REPLICATE );//檢驗extracted_roi似乎否在image範圍內,若有超出部分,通過邊界補全
// 按照比例縮小邊界大小
if( z.cols != _tmpl_sz.width || z.rows != _tmpl_sz.height )
{
cv::resize( z, z, _tmpl_sz );
}
// HOG features 提取HOG特徵點
if( _hogfeatures )
{
IplImage z_ipl = z; // 之前的圖像類
CvLSVMFeatureMapCaskade *map; // 申請指針
getFeatureMaps( &z_ipl, cell_size, &map ); // 對map賦值,獲取hog特徵map爲sizeX*sizeY*3*NUM_SECTOR
normalizeAndTruncate( map, 0.2f ); // 對hog特徵進行歸一化和截斷;結果由map指向,大小sizeX*sizeY*3*NUM_SECTOR*4,但是此時的sizeX和sizezY均比之前少2.
PCAFeatureMaps( map ); // 對HOG特徵進行降維
size_patch[0] = map->sizeY; // HOG特徵的sizeY
size_patch[1] = map->sizeX; // HOG特徵的sizeX
size_patch[2] = map->numFeatures; // HOG特徵的特徵個數
FeaturesMap = cv::Mat( cv::Size( map->numFeatures, map->sizeX*map->sizeY ), CV_32F, map->map ); // Procedure do deal with cv::Mat multichannel bug
FeaturesMap = FeaturesMap.t( ); // 288*31
freeFeatureMapObject( &map );
// Lab features
// 我測試結果,帶有Lab特徵在一些跟蹤環節效果並不好
if( _labfeatures )
{
cv::Mat imgLab;
cvtColor( z, imgLab, CV_BGR2Lab );
unsigned char *input = ( unsigned char* ) ( imgLab.data );
// Sparse output vector
cv::Mat outputLab = cv::Mat( _labCentroids.rows, size_patch[0] * size_patch[1], CV_32F, float( 0 ) );//每一列是一個cell的nClusters個屬性
int cntCell = 0;//代表的是除邊界以外的cell的索引號
// Iterate through each cell
for( int cY = cell_size; cY < z.rows - cell_size; cY += cell_size )
{
for( int cX = cell_size; cX < z.cols - cell_size; cX += cell_size )
{//遍歷除邊界以外的cell,第cy行第cx列的cell
// Iterate through each pixel of cell (cX,cY)
//對每個cell的每個像素的lab值進行根據_labCentroids進行分類,分類標準:歐氏距離的平方
for( int y = cY; y < cY + cell_size; ++y )
{
for( int x = cX; x < cX + cell_size; ++x )
{//遍歷cell中的每一個像素,第y行第x列的像素
// Lab components for each pixel
float l = ( float ) input[( z.cols * y + x ) * 3];//三個通道,分別代表LAB空間的三個值
float a = ( float ) input[( z.cols * y + x ) * 3 + 1];
float b = ( float ) input[( z.cols * y + x ) * 3 + 2];
// Iterate trough each centroid(質心,矩心)
float minDist = FLT_MAX;
int minIdx = 0;
float *inputCentroid = ( float* ) ( _labCentroids.data );
for( int k = 0; k < _labCentroids.rows; ++k )
{
float dist = ( ( l - inputCentroid[3 * k] ) * ( l - inputCentroid[3 * k] ) )
+ ( ( a - inputCentroid[3 * k + 1] ) * ( a - inputCentroid[3 * k + 1] ) )
+ ( ( b - inputCentroid[3 * k + 2] ) * ( b - inputCentroid[3 * k + 2] ) );//好像是求的lab和某一個特定lab的距離
if( dist < minDist )
{
minDist = dist;
minIdx = k;
}
}
// Store result at output
outputLab.at<float>( minIdx, cntCell ) += 1.0 / cell_sizeQ;
//((float*) outputLab.data)[minIdx * (size_patch[0]*size_patch[1]) + cntCell] += 1.0 / cell_sizeQ;
}
}
cntCell++;
}
}
// Update size_patch[2] and add features to FeaturesMap
size_patch[2] += _labCentroids.rows;
FeaturesMap.push_back( outputLab );//將根據lab空間計算得到的結果和通過hog特徵計算的結果進行合併
}
}
else //if not hog
{
FeaturesMap = RectTools::getGrayImage( z );
FeaturesMap -= ( float ) 0.5; // In Paper;
size_patch[0] = z.rows;
size_patch[1] = z.cols;
size_patch[2] = 1;
}
if( inithann )
{
createHanningMats( ); // 創建了一個和FeatureMap大小相關的常數Mat(sizeX*sizezY)*size_patch[2]
}
FeaturesMap = hann.mul( FeaturesMap ); // 點乘
return FeaturesMap;
}
/*****************************************************************************
*函數功能:初始化cosine window,僅在第一幀調用(Initialize Hanning window. Function called only in the first frame.)
*函數參數:無
*****************************************************************************/
void KCFTracker::createHanningMats( )
{
cv::Mat hann1t = cv::Mat( cv::Size( size_patch[1], 1 ), CV_32F, cv::Scalar( 0 ) );
cv::Mat hann2t = cv::Mat( cv::Size( 1, size_patch[0] ), CV_32F, cv::Scalar( 0 ) );
for( int i = 0; i < hann1t.cols; i++ )
hann1t.at<float >( 0, i ) = 0.5 * ( 1 - std::cos( 2 * 3.14159265358979323846 * i / ( hann1t.cols - 1 ) ) );
for( int i = 0; i < hann2t.rows; i++ )
hann2t.at<float >( i, 0 ) = 0.5 * ( 1 - std::cos( 2 * 3.14159265358979323846 * i / ( hann2t.rows - 1 ) ) );
cv::Mat hann2d = hann2t * hann1t;
// HOG features
if( _hogfeatures )
{
cv::Mat hann1d = hann2d.reshape( 1, 1 ); // Procedure do deal with cv::Mat multichannel bug
hann = cv::Mat( cv::Size( size_patch[0] * size_patch[1], size_patch[2] ), CV_32F, cv::Scalar( 0 ) );
for( int i = 0; i < size_patch[2]; i++ )
{
for( int j = 0; j < size_patch[0] * size_patch[1]; j++ )
{
hann.at<float>( i, j ) = hann1d.at<float>( 0, j );
}
}
}
// Gray features
else
{
hann = hann2d;
}
}
/*****************************************************************************
*函數功能:對目標的位置插值,提高精度,計算一維亞像素峯值
*使用幅值做差來定位峯值的位置,返回的是需要改變的偏移量大小
*Calculate sub-pixel peak for one dimension
*函數參數:無
*****************************************************************************/
float KCFTracker::subPixelPeak( float left, float center, float right )
{
float divisor = 2 * center - right - left;
if( divisor == 0 )
return 0;
return 0.5 * ( right - left ) / divisor;
}
kcftracker.hpp
#pragma once
#include “tracker.h”
#ifndef OPENCV_KCFTRACKER_HPP
#define OPENCV_KCFTRACKER_HPP
#endif
class KCFTracker : public Tracker
{
public:
// Constructor
// Constructor
// 構造KCF跟蹤器的類
KCFTracker( bool hog = true, // 使用hog特徵
bool fixed_window = true, // 使用固定窗口大小
bool multiscale = true, // 使用多尺度
bool lab = true ); // 使用lab色空間特徵
// Initialize tracker
virtual void init(const cv::Rect &roi, cv::Mat image);
// Update position based on the new frame
virtual cv::Rect update(cv::Mat image);
float interp_factor; // linear interpolation factor for adaptation 自適應的線性插值因子,會因爲hog,lab的選擇而變化
float sigma; // gaussian kernel bandwidth 高斯(卷積)核帶寬,會因爲hog,lab的選擇而變化
float lambda; // regularization 正則化 0.0001
int cell_size; // HOG cell size HOG特徵中元胞的大小 4
int cell_sizeQ; // cell size^2, to avoid repeated operations 元胞數組內像素數目,16,爲了計算省事
float padding; // extra area surrounding the target 目標擴展區域,2.5
float output_sigma_factor; // bandwidth of gaussian target 高斯目標的帶寬,不同hog,lab會不同
int template_size; // template size 模板大小,在計算_tmpl_sz時,較大變成被歸一成96,而較小邊長按比例縮小
float scale_step; // scale step for multi-scale estimation 多尺度估計的尺度步長
float scale_weight; // to downweight detection scores of other scales for added stability 爲了增加其他尺度檢測時的穩定性,給檢測結果峯值做一定衰減,爲原來的0.95倍
protected:
// Detect object in the current frame.
cv::Point2f detect(cv::Mat z, cv::Mat x, float &peak_value);
// train tracker with a single image
void train(cv::Mat x, float train_interp_factor);
// Evaluates a Gaussian kernel with bandwidth SIGMA for all relative shifts between input images X and Y, which must both be MxN. They must also be periodic (ie., pre-processed with a cosine window).
cv::Mat gaussianCorrelation(cv::Mat x1, cv::Mat x2);
// Create Gaussian Peak. Function called only in the first frame.
cv::Mat createGaussianPeak(int sizey, int sizex);
// Obtain sub-window from image, with replication-padding and extract features
cv::Mat getFeatures(const cv::Mat & image, bool inithann, float scale_adjust = 1.0f);
// Initialize Hanning window. Function called only in the first frame.
void createHanningMats();
// Calculate sub-pixel peak for one dimension
float subPixelPeak(float left, float center, float right);
cv::Mat _alphaf; // 初始化/訓練結果alphaf,用於檢測部分中結果的計算
cv::Mat _prob; // 初始化結果prob,不再更改,用於訓練
cv::Mat _tmpl; // 初始化/訓練的結果,用於detect的z
cv::Mat _num; // 貌似都被註釋掉了
cv::Mat _den; // 貌似都被註釋掉了
cv::Mat _labCentroids; // lab質心數組
private:
int size_patch[3]; // hog特徵的sizeY,sizeX,numFeatures
cv::Mat hann; // createHanningMats()的計算結果
cv::Size _tmpl_sz; // hog元胞對應的數組大小
float _scale; // 修正成_tmpl_sz後的尺度大小
int _gaussian_size; // 未引用???
bool _hogfeatures; // hog標誌位
bool _labfeatures; // lab標誌位
};