接着上一個博客http://blog.csdn.net/lanxuecc/article/details/52688605在弱分類器訓練的主體函數cvCreateCARTClassifier中我們看到主要是調用cvCreateMTStumpClassifier函數來訓練得到弱分類器的結點,下面註釋下這個函數
/*
* cvCreateMTStumpClassifier
*
* Multithreaded stump classifier constructor
* Includes huge train data support through callback function
*/
CV_BOOST_IMPL
CvClassifier* cvCreateMTStumpClassifier( CvMat* trainData, //訓練樣本HAAR特徵值矩陣
int flags, // 1.按行排列,0.按列排列
CvMat* trainClasses, // 樣本類別{-1,1}
CvMat* /*typeMask*/, // 爲了便於回調函數統一格式
CvMat* missedMeasurementsMask, // 未知,很少用到
CvMat* compIdx, // 特徵序列(必須爲NULL)(行向量)
CvMat* sampleIdx, // 實際訓練樣本序列(行向量)
CvMat* weights, // 實際訓練樣本樣本權重(行向量)
CvClassifierTrainParams* trainParams ) //這個結構體中指明一些參數和數據,比如分類誤差計算方法,特徵總數以及多線程運行時每個線程處理的特徵數
{
CvStumpClassifier* stump = NULL; // 弱分類器(樁)
int m = 0; // 樣本總數
int n = 0; // 所有特徵個數
uchar* data = NULL; // trainData數據指針
size_t cstep = 0; // trainData一行字節數
size_t sstep = 0; // trainData元素字節數
int datan = 0; // 預計算特徵個數
uchar* ydata = NULL; // trainClasses數據指針
size_t ystep = 0; // trainClasses元素字節數
uchar* idxdata = NULL; // sampleIdx數據指針
size_t idxstep = 0; // sampleIdx單個元素字節數
int l = 0; // 實際訓練樣本個數
uchar* wdata = NULL; // weights數據指針
size_t wstep = 0; // weights元素字節數
/*sortedIdx爲事先計算好的特徵值-樣本矩陣,包含有預計算的所有HAAR特徵對應於所有樣本的特徵值(按大小排列) */
uchar* sorteddata = NULL; // sortedIdx數據指針
int sortedtype = 0; // sortedIdx元素類型
size_t sortedcstep = 0; // sortedIdx一行字節數
size_t sortedsstep = 0; // sortedIdx元素字節數
int sortedn = 0; // sortedIdx行數(預計算特徵個數)
int sortedm = 0; // sortedIdx列數(實際訓練樣本個數)
char* filter = NULL; // 樣本存在標示(行向量),如果樣本存在則爲1,否則爲0
int i = 0;
int compidx = 0; // 每組特徵的起始序號
int stumperror; // 計算閾值方法:1.misclass 2.gini 3.entropy 4.least sum of squares
int portion; // 每組特徵個數,對所有特徵n進行分組處理,每組portion個
/* private variables */
CvMat mat; // 補充特徵-樣本矩陣
CvValArray va;
float lerror; // 閾值左側誤差
float rerror; // 閾值右側誤差
float left; // 置信度(左分支)
float right; // 置信度(右分支)
float threshold; // 閾值
int optcompidx; // 最優特徵
float sumw;
float sumwy;
float sumwyy;
/*臨時變量,循環用*/
int t_compidx;
int t_n;
int ti;
int tj;
int tk;
uchar* t_data; // 指向data
size_t t_cstep; // cstep
size_t t_sstep; // sstep
size_t matcstep; // mat一行字節數
size_t matsstep; // mat元素字節數
int* t_idx; // 樣本序列
/* end private variables */
CV_Assert( trainParams != NULL );
CV_Assert( trainClasses != NULL );
CV_Assert( CV_MAT_TYPE( trainClasses->type ) == CV_32FC1 );
CV_Assert( missedMeasurementsMask == NULL );
CV_Assert( compIdx == NULL );
// 計算閾值方法:1.misclass 2.gini 3.entropy 4.least sum of squares
stumperror = (int) ((CvMTStumpTrainParams*) trainParams)->error;
//樣本類別
ydata = trainClasses->data.ptr;
if( trainClasses->rows == 1 )
{
m = trainClasses->cols;
ystep = CV_ELEM_SIZE( trainClasses->type );
}
else
{
m = trainClasses->rows;
ystep = trainClasses->step;
}
//樣本權重
wdata = weights->data.ptr;
if( weights->rows == 1 )
{
CV_Assert( weights->cols == m );
wstep = CV_ELEM_SIZE( weights->type );
}
else
{
CV_Assert( weights->rows == m );
wstep = weights->step;
}
//事先計算好的排序好的所有樣本的所有特徵值排序好的序號
//sortedIdx爲空,trainData爲行向量(1*m);sortedIdx不爲空,trainData爲矩陣(m*datan);
if( ((CvMTStumpTrainParams*) trainParams)->sortedIdx != NULL )
{
sortedtype =
CV_MAT_TYPE( ((CvMTStumpTrainParams*) trainParams)->sortedIdx->type );
assert( sortedtype == CV_16SC1 || sortedtype == CV_32SC1
|| sortedtype == CV_32FC1 );
sorteddata = ((CvMTStumpTrainParams*) trainParams)->sortedIdx->data.ptr;
sortedsstep = CV_ELEM_SIZE( sortedtype );
sortedcstep = ((CvMTStumpTrainParams*) trainParams)->sortedIdx->step;
sortedn = ((CvMTStumpTrainParams*) trainParams)->sortedIdx->rows;
sortedm = ((CvMTStumpTrainParams*) trainParams)->sortedIdx->cols;
}
//事先計算好的排序好的所有樣本的所有特徵值
if( trainData == NULL ) //爲空的情況沒有遇到
{
assert( ((CvMTStumpTrainParams*) trainParams)->getTrainData != NULL );
n = ((CvMTStumpTrainParams*) trainParams)->numcomp;
assert( n > 0 );
}
else
{
assert( CV_MAT_TYPE( trainData->type ) == CV_32FC1 );
data = trainData->data.ptr;
if( CV_IS_ROW_SAMPLE( flags ) ) //trainData爲矩陣
{
cstep = CV_ELEM_SIZE( trainData->type );
sstep = trainData->step;
assert( m == trainData->rows );
datan = n = trainData->cols;
}
else //trainData爲向量
{
sstep = CV_ELEM_SIZE( trainData->type );
cstep = trainData->step;
assert( m == trainData->cols );
datan = n = trainData->rows;
}
// trainData爲矩陣,當trainData爲向量時,datan = n = 1
if( ((CvMTStumpTrainParams*) trainParams)->getTrainData != NULL )
{
n = ((CvMTStumpTrainParams*) trainParams)->numcomp; // 總特徵個數
}
}
//預計算特徵個數一定要小於特徵總數
assert( datan <= n );
if( sampleIdx != NULL ) // 已經剔除小權值樣本
{
assert( CV_MAT_TYPE( sampleIdx->type ) == CV_32FC1 );
idxdata = sampleIdx->data.ptr;
idxstep = ( sampleIdx->rows == 1 )
? CV_ELEM_SIZE( sampleIdx->type ) : sampleIdx->step;
l = ( sampleIdx->rows == 1 ) ? sampleIdx->cols : sampleIdx->rows;
// sorteddata中存放的是所有訓練樣本,需要篩選出實際訓練樣本
if( sorteddata != NULL )
{
filter = (char*) cvAlloc( sizeof( char ) * m );
memset( (void*) filter, 0, sizeof( char ) * m );
for( i = 0; i < l; i++ )
{
filter[(int) *((float*) (idxdata + i * idxstep))] = (char) 1; // 存在則爲1,不存在則爲0
}
}
}
else // 未剔除小權值樣本
{
l = m;
}
//樁,分配一個結點的內存空間,用來存儲
stump = (CvStumpClassifier*) cvAlloc( sizeof( CvStumpClassifier) );
memset( (void*) stump, 0, sizeof( CvStumpClassifier ) );
//每組特徵個數,個從理解是爲多線程計算,爲提高性能將所有特徵分成很多組
portion = ((CvMTStumpTrainParams*)trainParams)->portion;
if( portion < 1 )
{
/* auto portion */
portion = n;
#ifdef _OPENMP
portion /= omp_get_max_threads();
#endif /* _OPENMP */
}
stump->eval = cvEvalStumpClassifier;
stump->tune = NULL;
stump->save = NULL;
stump->release = cvReleaseStumpClassifier;
stump->lerror = FLT_MAX;
stump->rerror = FLT_MAX;
stump->left = 0.0F;
stump->right = 0.0F;
compidx = 0;
// 並行計算,默認爲關閉的
#ifdef _OPENMP
#pragma omp parallel private(mat, va, lerror, rerror, left, right, threshold, \
optcompidx, sumw, sumwy, sumwyy, t_compidx, t_n, \
ti, tj, tk, t_data, t_cstep, t_sstep, matcstep, \
matsstep, t_idx)
#endif /* _OPENMP */
{
lerror = FLT_MAX;
rerror = FLT_MAX;
left = 0.0F;
right = 0.0F;
threshold = 0.0F;
optcompidx = 0;
sumw = FLT_MAX;
sumwy = FLT_MAX;
sumwyy = FLT_MAX;
t_compidx = 0;
t_n = 0;
ti = 0;
tj = 0;
tk = 0;
t_data = NULL;
t_cstep = 0;
t_sstep = 0;
matcstep = 0;
matsstep = 0;
t_idx = NULL;
mat.data.ptr = NULL;
// 預計算特徵個數小於特徵總數,則說明存在新特徵,用於計算樣本的新特徵,存放在mat中
if( datan < n )
{
if( CV_IS_ROW_SAMPLE( flags ) )
{
mat = cvMat( m, portion, CV_32FC1, 0 );
matcstep = CV_ELEM_SIZE( mat.type );
matsstep = mat.step;
}
else
{
mat = cvMat( portion, m, CV_32FC1, 0 );
matcstep = mat.step;
matsstep = CV_ELEM_SIZE( mat.type );
}
mat.data.ptr = (uchar*) cvAlloc( sizeof( float ) * mat.rows * mat.cols );
}
// 將實際訓練樣本序列存放進t_idx
if( filter != NULL || sortedn < n )
{
t_idx = (int*) cvAlloc( sizeof( int ) * m );
if( sortedn == 0 || filter == NULL )
{
if( idxdata != NULL )
{
for( ti = 0; ti < l; ti++ )
{
t_idx[ti] = (int) *((float*) (idxdata + ti * idxstep));
}
}
else
{
for( ti = 0; ti < l; ti++ )
{
t_idx[ti] = ti;
}
}
}
}
#ifdef _OPENMP
#pragma omp critical(c_compidx)
#endif /* _OPENMP */
// 初始化計算特徵範圍
{
t_compidx = compidx;
compidx += portion;
}
// 尋找最優弱分類器
while( t_compidx < n )
{
t_n = portion; // 每組特徵個數
if( t_compidx < datan ) // 已經計算過的特徵
{
t_n = ( t_n < (datan - t_compidx) ) ? t_n : (datan - t_compidx);
t_data = data;
t_cstep = cstep;
t_sstep = sstep;
}
else // 新特徵
{
t_n = ( t_n < (n - t_compidx) ) ? t_n : (n - t_compidx);
t_cstep = matcstep;
t_sstep = matsstep;
t_data = mat.data.ptr - t_compidx * ((size_t) t_cstep );
// 計算每個新特徵對應於每個訓練樣本的特徵值
((CvMTStumpTrainParams*)trainParams)->getTrainData( &mat,
sampleIdx, compIdx, t_compidx, t_n,
((CvMTStumpTrainParams*)trainParams)->userdata );
}
/* 預計算特徵部分,直接尋找最優特徵,也就是傳說中的最優弱分類器 */
if( sorteddata != NULL )
{
if( filter != NULL ) //需要提取實際訓練樣本
{
switch( sortedtype )
{
case CV_16SC1: // 這裏重複度很高,只註釋一個分支,剩下的都一個道理
// 從一組特徵(datan個預計算特徵)中尋找最優特徵
for( ti = t_compidx; ti < MIN( sortedn, t_compidx + t_n ); ti++ )
{
tk = 0;
// 提取實際訓練樣本
for( tj = 0; tj < sortedm; tj++ )
{
int curidx = (int) ( *((short*) (sorteddata
+ ti * sortedcstep + tj * sortedsstep)) );
if( filter[curidx] != 0 )
{
t_idx[tk++] = curidx;
}
}
// 如果findStumpThreshold_32s返回值爲1, 則更新最優特徵
if( findStumpThreshold_32s[stumperror](
t_data + ti * t_cstep, t_sstep,
wdata, wstep, ydata, ystep,
(uchar*) t_idx, sizeof( int ), tk,
&lerror, &rerror,
&threshold, &left, &right,
&sumw, &sumwy, &sumwyy ) )
{
optcompidx = ti;
}
}
break;
case CV_32SC1:
for( ti = t_compidx; ti < MIN( sortedn, t_compidx + t_n ); ti++ )
{
tk = 0;
for( tj = 0; tj < sortedm; tj++ )
{
int curidx = (int) ( *((int*) (sorteddata
+ ti * sortedcstep + tj * sortedsstep)) );
if( filter[curidx] != 0 )
{
t_idx[tk++] = curidx;
}
}
if( findStumpThreshold_32s[stumperror](
t_data + ti * t_cstep, t_sstep,
wdata, wstep, ydata, ystep,
(uchar*) t_idx, sizeof( int ), tk,
&lerror, &rerror,
&threshold, &left, &right,
&sumw, &sumwy, &sumwyy ) )
{
optcompidx = ti;
}
}
break;
case CV_32FC1:
for( ti = t_compidx; ti < MIN( sortedn, t_compidx + t_n ); ti++ )
{
tk = 0;
for( tj = 0; tj < sortedm; tj++ )
{
int curidx = (int) ( *((float*) (sorteddata
+ ti * sortedcstep + tj * sortedsstep)) );
if( filter[curidx] != 0 )
{
t_idx[tk++] = curidx;
}
}
if( findStumpThreshold_32s[stumperror](
t_data + ti * t_cstep, t_sstep,
wdata, wstep, ydata, ystep,
(uchar*) t_idx, sizeof( int ), tk,
&lerror, &rerror,
&threshold, &left, &right,
&sumw, &sumwy, &sumwyy ) )
{
optcompidx = ti;
}
}
break;
default:
assert( 0 );
break;
}
}
else //所有訓練樣本均參與計算
{
switch( sortedtype )
{
case CV_16SC1:/*遍歷特徵尋找使左右誤差最小的特徵*/
for( ti = t_compidx; ti < MIN( sortedn, t_compidx + t_n ); ti++ )
{
/*
t_data + ti * t_cstep:第ti個特徵模版
t_sstep:特徵模版存儲的跨距
wdata:樣本的權重
wstep:樣本權重數組的跨距
ydata:樣本的類別標籤
ystep:樣本的類別標籤數組的跨距
sorteddata + ti * sortedcstep:第ti個樣本排序好的特徵值的序號
sortedsstep:跨距
sortedm:序號的列數也就是實際樣本列數
lerror:閾值左側誤差
rerror:閾值右側誤差
threshold:閾值
left:左分支置信度
right:右分支置信度
optcompidx:最優特徵
*/
if( findStumpThreshold_16s[stumperror](
t_data + ti * t_cstep, t_sstep,
wdata, wstep, ydata, ystep,
sorteddata + ti * sortedcstep, sortedsstep, sortedm,
&lerror, &rerror,
&threshold, &left, &right,
&sumw, &sumwy, &sumwyy ) )
{
optcompidx = ti;
}
}
break;
case CV_32SC1:
for( ti = t_compidx; ti < MIN( sortedn, t_compidx + t_n ); ti++ )
{
if( findStumpThreshold_32s[stumperror](
t_data + ti * t_cstep, t_sstep,
wdata, wstep, ydata, ystep,
sorteddata + ti * sortedcstep, sortedsstep, sortedm,
&lerror, &rerror,
&threshold, &left, &right,
&sumw, &sumwy, &sumwyy ) )
{
optcompidx = ti;
}
}
break;
case CV_32FC1:
for( ti = t_compidx; ti < MIN( sortedn, t_compidx + t_n ); ti++ )
{
if( findStumpThreshold_32f[stumperror](
t_data + ti * t_cstep, t_sstep,
wdata, wstep, ydata, ystep,
sorteddata + ti * sortedcstep, sortedsstep, sortedm,
&lerror, &rerror,
&threshold, &left, &right,
&sumw, &sumwy, &sumwyy ) )
{
optcompidx = ti;
}
}
break;
default:
assert( 0 );
break;
}
}
}
/* 新特徵部分,要對樣本特徵值進行排序,然後再尋找最優特徵 */
ti = MAX( t_compidx, MIN( sortedn, t_compidx + t_n ) );
for( ; ti < t_compidx + t_n; ti++ )
{
va.data = t_data + ti * t_cstep;
va.step = t_sstep;
// 對樣本特徵值進行排序
icvSortIndexedValArray_32s( t_idx, l, &va );
// 繼續尋找最優特徵
if( findStumpThreshold_32s[stumperror](
t_data + ti * t_cstep, t_sstep,
wdata, wstep, ydata, ystep,
(uchar*)t_idx, sizeof( int ), l,
&lerror, &rerror,
&threshold, &left, &right,
&sumw, &sumwy, &sumwyy ) )
{
optcompidx = ti;
}
}
#ifdef _OPENMP
#pragma omp critical(c_compidx)
#endif /* _OPENMP */
// 更新特徵計算範圍
{
t_compidx = compidx;
compidx += portion;
}
}
#ifdef _OPENMP
#pragma omp critical(c_beststump)
#endif /* _OPENMP */
// 設置最優弱分類器
{
if( lerror + rerror < stump->lerror + stump->rerror )
{
stump->lerror = lerror;
stump->rerror = rerror;
stump->compidx = optcompidx;
stump->threshold = threshold;
stump->left = left;
stump->right = right;
}
}
/* free allocated memory */
if( mat.data.ptr != NULL )
{
cvFree( &(mat.data.ptr) );
}
if( t_idx != NULL )
{
cvFree( &t_idx );
}
} /* end of parallel region */
/* END */
/* free allocated memory */
if( filter != NULL )
{
cvFree( &filter );
}
// 如果設置爲離散型,置信度應爲1或者-1
if( ((CvMTStumpTrainParams*) trainParams)->type == CV_CLASSIFICATION_CLASS ) /*要滿足這個條件才轉成離散*/
{
stump->left = 2.0F * (stump->left >= 0.5F) - 1.0F; /*在這裏將計算出來的左右置信度浮點數轉成1或-1*/
stump->right = 2.0F * (stump->right >= 0.5F) - 1.0F;
}
return (CvClassifier*) stump;
}
從上面函數的代碼中觀察到在遍歷特徵時會調用findStumpThreshold_16s、findStumpThreshold_32s、findStumpThreshold_32f數組中定義了的總共12個函數指針,根據參數的不同調用不同的函數,例如findStumpThreshold_16s中的四個函數指針如下:
/*這個數組的類型是一個函數指針*/
CvFindThresholdFunc findStumpThreshold_16s[4] = {
icvFindStumpThreshold_misc_16s,
icvFindStumpThreshold_gini_16s,
icvFindStumpThreshold_entropy_16s,
icvFindStumpThreshold_sq_16s
};
例如函數指針icvFindStumpThreshold_misc_16s我並未找到函數實現在哪,
在這些數組聲明的上面聲明的一些宏,其實實現這些函數的
舉個例子
宏1
/* misclassification error
* err = MIN( wpos, wneg );
*/
#define ICV_DEF_FIND_STUMP_THRESHOLD_MISC( suffix, type ) \
ICV_DEF_FIND_STUMP_THRESHOLD( misc_##suffix, type, \
wposl = 0.5F * ( wl + wyl ); \
wposr = 0.5F * ( wr + wyr ); \
curleft = 0.5F * ( 1.0F + curleft ); \
curright = 0.5F * ( 1.0F + curright ); \
curlerror = MIN( wposl, wl - wposl ); \
currerror = MIN( wposr, wr - wposr ); \
)
宏2
#define ICV_DEF_FIND_STUMP_THRESHOLD( suffix, type, error ) \
CV_BOOST_IMPL int icvFindStumpThreshold_##suffix( \
uchar* data, size_t datastep, \
uchar* wdata, size_t wstep, \
uchar* ydata, size_t ystep, \
uchar* idxdata, size_t idxstep, int num, \
float* lerror, \
float* rerror, \
float* threshold, float* left, float* right, \
float* sumw, float* sumwy, float* sumwyy )
{
。。。。。。。。。。。。。。。。
}
根據宏在預編譯階段的解析原理,在預編譯階段聲明:
ICV_DEF_FIND_STUMP_THRESHOLD_MISC( 16s, short )時會被上述第一個宏代替,變成
ICV_DEF_FIND_STUMP_THRESHOLD( misc_16s, short, \
wposl = 0.5F * ( wl + wyl ); \
wposr = 0.5F * ( wr + wyr ); \
curleft = 0.5F * ( 1.0F + curleft ); \
curright = 0.5F * ( 1.0F + curright ); \
curlerror = 2.0F * wposl * ( 1.0F - curleft ); \
currerror = 2.0F * wposr * ( 1.0F - curright ); \
)
接着上述ICV_DEF_FIND_STUMP_THRESHOLD宏解析後被CV_BOOST_IMPL int icvFindStumpThreshold_##suffix替代變成:
CV_BOOST_IMPL int icvFindStumpThreshold_misc_16s( \
uchar* data, size_t datastep, \
uchar* wdata, size_t wstep, \
uchar* ydata, size_t ystep, \
uchar* idxdata, size_t idxstep, int num, \
float* lerror, \
float* rerror, \
float* threshold, float* left, float* right, \
float* sumw, float* sumwy, float* sumwyy )
{
。。。。。。。。。。
/*後面函數體中的所有"type"都被替換成"short"*/
/*函數體中的"error"被*/
wposl = 0.5F * ( wl + wyl ); \
wposr = 0.5F * ( wr + wyr ); \
curleft = 0.5F * ( 1.0F + curleft ); \
curright = 0.5F * ( 1.0F + curright ); \
curlerror = 2.0F * wposl * ( 1.0F - curleft ); \
currerror = 2.0F * wposr * ( 1.0F - curright );
/*替換*/
}
這樣就實現了icvFindStumpThreshold_misc_16s的函數定義,其他的聲明也是同樣的方法,這就很巧妙的將12個很相似的函數,用宏聲明的方式給分別定義了,而不用重複寫很多代碼。
所以findStumpThreshold_16s、findStumpThreshold_32s、findStumpThreshold_32f數組中函數指針指定的函數都是由下述宏實現的,只是要改下參數和error的計算方法::::
#define ICV_DEF_FIND_STUMP_THRESHOLD( suffix, type, error ) \
CV_BOOST_IMPL int icvFindStumpThreshold_##suffix( \
uchar* data, size_t datastep, \
uchar* wdata, size_t wstep, \
uchar* ydata, size_t ystep, \
uchar* idxdata, size_t idxstep, int num, \
float* lerror, \
float* rerror, \
float* threshold, float* left, float* right, \
float* sumw, float* sumwy, float* sumwyy )
{ \
int found = 0; \
float wyl = 0.0F; \
float wl = 0.0F; \
float wyyl = 0.0F; \
float wyr = 0.0F; \
float wr = 0.0F; \
\
float curleft = 0.0F; \
float curright = 0.0F; \
float* prevval = NULL; \
float* curval = NULL; \
float curlerror = 0.0F; \
float currerror = 0.0F; \
float wposl; \
float wposr; \
\
int i = 0; \
int idx = 0; \
\
wposl = wposr = 0.0F; \
if( *sumw == FLT_MAX ) \
{ \
/* calculate sums */ \
float *y = NULL; \
float *w = NULL; \
float wy = 0.0F; \
\
*sumw = 0.0F; \
*sumwy = 0.0F; \
*sumwyy = 0.0F; \
for( i = 0; i < num; i++ ) \
{ \
idx = (int) ( *((type*) (idxdata + i*idxstep)) ); \
w = (float*) (wdata + idx * wstep); \
*sumw += *w; \
y = (float*) (ydata + idx * ystep); \
wy = (*w) * (*y); \
*sumwy += wy; \
*sumwyy += wy * (*y); \
} \
}
/*num:實際樣本個數,遍歷樣本找到使左右誤差最小的閾值curval位置*/ \
for( i = 0; i < num; i++ ) \
{ \
idx = (int) ( *((type*) (idxdata + i*idxstep)) ); \
curval = (float*) (data + idx * datastep); \
/* for debug purpose */ \
if( i > 0 ) assert( (*prevval) <= (*curval) ); \
\
wyr = *sumwy - wyl; \
wr = *sumw - wl; \
\
if( wl > 0.0 ) curleft = wyl / wl; \
else curleft = 0.0F; \
\
if( wr > 0.0 ) curright = wyr / wr; \
else curright = 0.0F; \
\
error \
\
if( curlerror + currerror < (*lerror) + (*rerror) ) \
{ \
(*lerror) = curlerror; \
(*rerror) = currerror; \
*threshold = *curval; \
if( i > 0 ) { \
*threshold = 0.5F * (*threshold + *prevval); \
} \
*left = curleft; \
*right = curright; \
found = 1; \
} \
\
do \
{ \
wl += *((float*) (wdata + idx * wstep)); \
wyl += (*((float*) (wdata + idx * wstep))) \
* (*((float*) (ydata + idx * ystep))); \
wyyl += *((float*) (wdata + idx * wstep)) \
* (*((float*) (ydata + idx * ystep))) \
* (*((float*) (ydata + idx * ystep))); \
} \
while( (++i) < num && \
( *((float*) (data + (idx = \
(int) ( *((type*) (idxdata + i*idxstep))) ) * datastep)) \
== *curval ) ); \
--i; \
prevval = curval; \
} /* for each value */ \
\
return found; \
}
這段代碼的邏輯一句話概括就是:::遍歷某特徵的所有樣本找到使分類的左右誤差最小的閾值。