opencv中PCA源碼理解與訓練、使用

/****************************************************************************************\
*                                          PCA                                           *
\****************************************************************************************/

PCA::PCA() {}

PCA::PCA(InputArray data, InputArray _mean, int flags, int maxComponents)
{
    operator()(data, _mean, flags, maxComponents);
}

PCA::PCA(InputArray data, InputArray _mean, int flags, double retainedVariance)
{
    computeVar(data, _mean, flags, retainedVariance);
}

PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, int maxComponents)
{
    Mat data = _data.getMat(), _mean = __mean.getMat();
    int covar_flags = CV_COVAR_SCALE;
    int i, len, in_count;
    Size mean_sz;

    CV_Assert( data.channels() == 1 );
    if( flags & CV_PCA_DATA_AS_COL )
    {
        len = data.rows;
        in_count = data.cols;
        covar_flags |= CV_COVAR_COLS;
        mean_sz = Size(1, len);
    }
    else
    {
        len = data.cols;
        in_count = data.rows;
        covar_flags |= CV_COVAR_ROWS;
        mean_sz = Size(len, 1);
    }

    int count = std::min(len, in_count), out_count = count;
    if( maxComponents > 0 )
        out_count = std::min(count, maxComponents);

    // "scrambled" way to compute PCA (when cols(A)>rows(A)):
    // B = A'A; B*x=b*x; C = AA'; C*y=c*y -> AA'*y=c*y -> A'A*(A'*y)=c*(A'*y) -> c = b, x=A'*y
    if( len <= in_count )
        covar_flags |= CV_COVAR_NORMAL;

    int ctype = std::max(CV_32F, data.depth());
    mean.create( mean_sz, ctype );

    Mat covar( count, count, ctype );

    if( _mean.data )
    {
        CV_Assert( _mean.size() == mean_sz );
        _mean.convertTo(mean, ctype);
        covar_flags |= CV_COVAR_USE_AVG;
    }

    calcCovarMatrix( data, covar, mean, covar_flags, ctype );
    eigen( covar, eigenvalues, eigenvectors );

    if( !(covar_flags & CV_COVAR_NORMAL) )
    {
        // CV_PCA_DATA_AS_ROW: cols(A)>rows(A). x=A'*y -> x'=y'*A
        // CV_PCA_DATA_AS_COL: rows(A)>cols(A). x=A''*y -> x'=y'*A'
        Mat tmp_data, tmp_mean = repeat(mean, data.rows/mean.rows, data.cols/mean.cols);
        if( data.type() != ctype || tmp_mean.data == mean.data )
        {
            data.convertTo( tmp_data, ctype );
            subtract( tmp_data, tmp_mean, tmp_data );
        }
        else
        {
            subtract( data, tmp_mean, tmp_mean );
            tmp_data = tmp_mean;
        }

        Mat evects1(count, len, ctype);
        gemm( eigenvectors, tmp_data, 1, Mat(), 0, evects1,
            (flags & CV_PCA_DATA_AS_COL) ? CV_GEMM_B_T : 0);
        eigenvectors = evects1;

        // normalize eigenvectors
        for( i = 0; i < out_count; i++ )
        {
            Mat vec = eigenvectors.row(i);
            normalize(vec, vec);
        }
    }

    if( count > out_count )
    {
        // use clone() to physically copy the data and thus deallocate the original matrices
        eigenvalues = eigenvalues.rowRange(0,out_count).clone();
        eigenvectors = eigenvectors.rowRange(0,out_count).clone();
    }
    return *this;
}

template <typename T>
int computeCumulativeEnergy(const Mat& eigenvalues, double retainedVariance)
{
    CV_DbgAssert( eigenvalues.type() == DataType<T>::type );

    Mat g(eigenvalues.size(), DataType<T>::type);

    for(int ig = 0; ig < g.rows; ig++)
    {
        g.at<T>(ig, 0) = 0;
        for(int im = 0; im <= ig; im++)
        {
            g.at<T>(ig,0) += eigenvalues.at<T>(im,0);
        }
    }

    int L;

    for(L = 0; L < eigenvalues.rows; L++)
    {
        double energy = g.at<T>(L, 0) / g.at<T>(g.rows - 1, 0);
        if(energy > retainedVariance)
            break;
    }

    L = std::max(2, L);

    return L;
}

PCA& PCA::computeVar(InputArray _data, InputArray __mean, int flags, double retainedVariance)
{
    Mat data = _data.getMat(), _mean = __mean.getMat();
    int covar_flags = CV_COVAR_SCALE;
    int i, len, in_count;
    Size mean_sz;

    CV_Assert( data.channels() == 1 );
    if( flags & CV_PCA_DATA_AS_COL )
    {//每一列代表一個樣本 
        len = data.rows;//特徵數 
        in_count = data.cols;//樣本數 
        covar_flags |= CV_COVAR_COLS;//列 
        mean_sz = Size(1, len);
    }
    else
    {//每一行代表一個樣本 
        len = data.cols;//特徵數 
        in_count = data.rows;//樣本數 
        covar_flags |= CV_COVAR_ROWS;
        mean_sz = Size(len, 1);
    }

    CV_Assert( retainedVariance > 0 && retainedVariance <= 1 );

    int count = std::min(len, in_count);//count爲樣本數和特徵數的最小值 

    // "scrambled" way to compute PCA (when cols(A)>rows(A)):
    // B = A'A; B*x=b*x; C = AA'; C*y=c*y -> AA'*y=c*y -> A'A*(A'*y)=c*(A'*y) -> c = b, x=A'*y
    if( len <= in_count )//如果特徵數小於樣本數 
        covar_flags |= CV_COVAR_NORMAL;

    int ctype = std::max(CV_32F, data.depth());
    mean.create( mean_sz, ctype );

    Mat covar( count, count, ctype );

    if( _mean.data )
    {
        CV_Assert( _mean.size() == mean_sz );
        _mean.convertTo(mean, ctype);
    }
    //計算協方差矩陣,返回covar和mean,這裏並沒有一開始就中心化,直接求data的協方差
    calcCovarMatrix( data, covar, mean, covar_flags, ctype );
    //計算協方差矩陣covar的特徵值eigenvalues和特徵向量eigenvectors, 
    eigen( covar, eigenvalues, eigenvectors );
    //維數>>樣本數
    if( !(covar_flags & CV_COVAR_NORMAL) )
    {
        // CV_PCA_DATA_AS_ROW: cols(A)>rows(A). x=A'*y -> x'=y'*A
        // CV_PCA_DATA_AS_COL: rows(A)>cols(A). x=A''*y -> x'=y'*A'
        Mat tmp_data, tmp_mean = repeat(mean, data.rows/mean.rows, data.cols/mean.cols);
        if( data.type() != ctype || tmp_mean.data == mean.data )
        {
            data.convertTo( tmp_data, ctype );
            //減期望值 
            subtract( tmp_data, tmp_mean, tmp_data );
        }
        else
        {
            subtract( data, tmp_mean, tmp_mean );
            tmp_data = tmp_mean;
        }

        Mat evects1(count, len, ctype);
        gemm( eigenvectors, tmp_data, 1, Mat(), 0, evects1,
            (flags & CV_PCA_DATA_AS_COL) ? CV_GEMM_B_T : 0);
        eigenvectors = evects1;

        // normalize all eigenvectors
        for( i = 0; i < eigenvectors.rows; i++ )
        {
            Mat vec = eigenvectors.row(i);
            //歸一化特徵向量 
            normalize(vec, vec);
        }
    }

    // compute the cumulative energy content for each eigenvector
    //計算構成輸入佔比(默認95%)的特徵值 
    int L;
    if (ctype == CV_32F)
        L = computeCumulativeEnergy<float>(eigenvalues, retainedVariance);
    else
        L = computeCumulativeEnergy<double>(eigenvalues, retainedVariance);

    // use clone() to physically copy the data and thus deallocate the original matrices
    eigenvalues = eigenvalues.rowRange(0,L).clone();
    eigenvectors = eigenvectors.rowRange(0,L).clone();

    return *this;
}

void PCA::project(InputArray _data, OutputArray result) const
{
    Mat data = _data.getMat();//輸入數據 
    CV_Assert( mean.data && eigenvectors.data &&
        ((mean.rows == 1 && mean.cols == data.cols) || (mean.cols == 1 && mean.rows == data.rows)));
    Mat tmp_data, tmp_mean = repeat(mean, data.rows/mean.rows, data.cols/mean.cols);//期望repeat 
    int ctype = mean.type();
    if( data.type() != ctype || tmp_mean.data == mean.data )
    {
        data.convertTo( tmp_data, ctype );
        subtract( tmp_data, tmp_mean, tmp_data );//樣本中心化,tmp_data是輸出 
    }
    else
    {
        subtract( data, tmp_mean, tmp_mean );//樣本中心化,tmp_mean是輸出 
        tmp_data = tmp_mean;
    }
    //函數gemm,實現矩陣相乘,這裏result = eigenvectors'*tmp_data
    if( mean.rows == 1 )
        gemm( tmp_data, eigenvectors, 1, Mat(), 0, result, GEMM_2_T );
    else
        gemm( eigenvectors, tmp_data, 1, Mat(), 0, result, 0 );
}

Mat PCA::project(InputArray data) const
{
    Mat result;
    project(data, result);
    return result;
}
//數據恢復,即X是恢復後的數據(壓縮後) 
void PCA::backProject(InputArray _data, OutputArray result) const
{
    Mat data = _data.getMat();
    CV_Assert( mean.data && eigenvectors.data &&
        ((mean.rows == 1 && eigenvectors.rows == data.cols) ||
         (mean.cols == 1 && eigenvectors.rows == data.rows)));

    Mat tmp_data, tmp_mean;
    data.convertTo(tmp_data, mean.type());
    if( mean.rows == 1 )
    {
        tmp_mean = repeat(mean, data.rows, 1);
        //矩陣相乘,相當於 tmp_data'*eigenvectors  -->XX=base*xRot;,返回result
        gemm( tmp_data, eigenvectors, 1, tmp_mean, 1, result, 0 );
    }
    else
    {
        tmp_mean = repeat(mean, 1, data.cols);
        gemm( eigenvectors, tmp_data, 1, tmp_mean, 1, result, GEMM_1_T );
    }
}

Mat PCA::backProject(InputArray data) const
{
    Mat result;
    backProject(data, result);
    return result;
}

}

void cv::PCACompute(InputArray data, InputOutputArray mean,
                    OutputArray eigenvectors, int maxComponents)
{
    PCA pca;
    pca(data, mean, 0, maxComponents);
    pca.mean.copyTo(mean);
    pca.eigenvectors.copyTo(eigenvectors);
}

void cv::PCAComputeVar(InputArray data, InputOutputArray mean,
                    OutputArray eigenvectors, double retainedVariance)
{
    PCA pca;
    pca.computeVar(data, mean, 0, retainedVariance);
    pca.mean.copyTo(mean);
    pca.eigenvectors.copyTo(eigenvectors);
}

void cv::PCAProject(InputArray data, InputArray mean,
                    InputArray eigenvectors, OutputArray result)
{
    PCA pca;
    pca.mean = mean.getMat();
    pca.eigenvectors = eigenvectors.getMat();
    pca.project(data, result);
}

void cv::PCABackProject(InputArray data, InputArray mean,
                    InputArray eigenvectors, OutputArray result)
{
    PCA pca;
    pca.mean = mean.getMat();
    pca.eigenvectors = eigenvectors.getMat();
    pca.backProject(data, result);
}
</pre><pre name="code" class="cpp"><blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;"><pre name="code" class="cpp">
opencv中的使用
1.訓練
opencv中可以直接調用PCA接口對輸入的圖片序列進行訓練,得到PCA主成分向量
<span style="font-family: Arial, Helvetica, sans-serif;"> </span><pre name="code" class="cpp">/*
 *PcaTrain,train the module of pca
*In: imageListName, the list name of train images
 *    modelName, the name of modelName to save the model
 */
 void PcaTrain(String imageListName,String modelName)
 {
    	vector<Mat> images;
    	read_imgList(imageListName, images);

  	//the change the input image go format the Pca Input format,將輸入的二維圖像轉化成一個行向量模式
	Mat pcaIn = asRowMatrix(images, CV_32FC1);
	PCA pca(pcaIn, Mat(), CV_PCA_DATA_AS_ROW, 0.95);//第三個參數表示按照行模式讀取數據,第四個參數表示所選主成分佔比
	Mat pcaEigenvectors = pca.eigenvectors.clone();//計算得到的主成分特徵
	 Mat pcaEigenvalues = pca.eigenvalues.clone();//計算得到的主成分特徵值
	save(modelName, pcaEigenvalues, pcaEigenvectors);//將計算得到的主成分向特徵和特徵值存儲
 }



<span style="font-family: Arial, Helvetica, sans-serif;">2.使用</span>

直接將待計算圖片和計算出的主成分特徵做運算,得到降維後的向量值

<pre name="code" class="cpp"> /*
 *PcaCompute,compute a set of values of linearly uncorrelated variables by pri    ncipal component analysis
 *In: srcImg, the image that go to compute pca
 *    modelName, the name of modelName to load the model
 */
 Mat PcaCompute(Mat srcImg, String modelName)
 {
	Mat pcaEigenvalues;
	Mat pcaEigenvectors;
	load(modelName, &pcaEigenvalues, &pcaEigenvectors);//加載計算出的主成分特徵模型
	 
	Mat testIn,pcaIn;
	//chage the image to gray
	cvtColor(srcImg,  srcImg, CV_RGB2GRAY );
	pcaIn = norm_0_255(srcImg);//圖片格式化
	pcaIn.reshape(1, 1).convertTo(testIn, CV_32FC1, 1/255.);//輸入圖片轉化成一行
	Mat pcaRes;
	gemm(testIn, pcaEigenvectors.reshape(0, pcaEigenvectors.cols), 1,  Mat(),     0, pcaRes);//圖片行向量和主成分特徵相乘,得到降維後向量
	cout<<"pcaRes rows="<< pcaRes.rows<<" cols="<<pcaRes.cols<<endl<<pcaRes<<    endl;
	return pcaRes;
 }








發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章