zynq學習筆記——HLS FAST corner導出keypoints(一)

PC平臺:WINDOWS 10 64位

Xilinx設計開發套件:Xilinx_vivado_sdk_2015.4

開發板:Zed Board

之前參照XAPP1167文檔,使用HLS Video函數庫裏的FASTX跑了一下例子,當時的例子是直接把keypoint以mask方式畫在了原始視頻圖像上,應用層並沒有獲取到keypoint的座標信息,所以無法開展下一步的圖像處理,比如獲取keypoint的特徵點信息進行圖像匹配等,其實HLS FASTX提供了兩個函數,一個是返回keypoint的mask圖像,另一個是返回keypoint數組,所以如果需要獲取到keypoint的座標信息,必須得使用第二個函數



可以在Vivado HLS安裝目錄下找到FASTX的源代碼,有兩個地方,Xilinx\Vivado_HLS\2015.4\include\hls\hls_video_fast.h和Xilinx\Vivado_HLS\2015.4\common\technology\autopilot\hls\hls_video_fast.h

//generate array 
template<int PSize,int KERNEL_SIZE,typename T, int N, int SRC_T,int ROWS,int COLS>
void FAST_t_opr(
        Mat<ROWS,COLS,SRC_T>    &_src,
        Point_<T>                    (&_keypoints)[N],
        HLS_TNAME(SRC_T)                    _threshold,
        bool                    _nonmax_supression,
        int                     (&flag)[PSize][2]
        )
{
    typedef typename pixel_op_type<HLS_TNAME(SRC_T)>::T INPUT_T;
    LineBuffer<KERNEL_SIZE-1,COLS,INPUT_T>    k_buf;
    LineBuffer<2,COLS+KERNEL_SIZE,ap_int<16> >         core_buf;
    Window<3,3,ap_int<16> >                            core_win;
    Window<KERNEL_SIZE,KERNEL_SIZE,INPUT_T>       win;
    Scalar<HLS_MAT_CN(SRC_T), HLS_TNAME(SRC_T)>             s;
    int rows= _src.rows;
    int cols= _src.cols;
    assert(rows <= ROWS);
    assert(cols <= COLS);
    int kernel_half=KERNEL_SIZE/2;
    ap_uint<2> flag_val[PSize+PSize/2+1];
    int  flag_d[PSize+PSize/2+1];
#pragma HLS ARRAY_PARTITION variable=flag_val dim=0
#pragma HLS ARRAY_PARTITION variable=flag_d dim=0
    int index=0;
    int offset=KERNEL_SIZE/2;

    if(_nonmax_supression)
    {
        offset=offset+1;
    }
 loop_height: for(HLS_SIZE_T i=0;i<rows+offset;i++) {
    loop_width: for(HLS_SIZE_T j=0;j<cols+offset;j++) {
#pragma HLS LOOP_FLATTEN off
#pragma HLS PIPELINE II=1
            if(i<rows&&j<cols) {
                for(int r= 0;r<KERNEL_SIZE;r++) {
                    for(int c=0;c<KERNEL_SIZE-1;c++) {
                        win.val[r][c]=win.val[r][c+1];//column left shift
                    }
                }
                win.val[0][KERNEL_SIZE-1]=k_buf.val[0][j];
                for(int buf_row= 1;buf_row< KERNEL_SIZE-1;buf_row++) {
                    win.val[buf_row][KERNEL_SIZE-1]=k_buf.val[buf_row][j];
                    k_buf.val[buf_row-1][j]=k_buf.val[buf_row][j];
                }
                //-------
                _src>>s;
                win.val[KERNEL_SIZE-1][KERNEL_SIZE-1]=s.val[0];
                k_buf.val[KERNEL_SIZE-2][j]=s.val[0];
            }
            //------core
            for(int r= 0;r<3;r++)
            {
                for(int c=0;c<3-1;c++)
                {
                    core_win.val[r][c]=core_win.val[r][c+1];//column left shift
                }
            }
            core_win.val[0][3-1]=core_buf.val[0][j];
            for(int buf_row= 1;buf_row< 3-1;buf_row++)
            {
                core_win.val[buf_row][3-1]=core_buf.val[buf_row][j];
                core_buf.val[buf_row-1][j]=core_buf.val[buf_row][j];
            }
            int core=0;
            //output
            if(i>=KERNEL_SIZE-1&&j>=KERNEL_SIZE-1)
            {
                //process
                bool iscorner=fast_judge<PSize>(win,(INPUT_T)_threshold,flag_val,flag_d,flag,core,_nonmax_supression);
                if(iscorner&&!_nonmax_supression)
                {
                    if(index<N)
                    {
                    _keypoints[index].x=j-offset;
                    _keypoints[index].y=i-offset;
                    index++;
                    }
                }
            }
            if(i>=rows||j>=cols)
            {
                core=0;
            }
            if(_nonmax_supression)
            {
                core_win.val[3-1][3-1]=core;
                core_buf.val[3-2][j]=core;
                if(i>=KERNEL_SIZE&&j>=KERNEL_SIZE&&core_win.val[1][1]!=0)
                {
                    bool iscorner=fast_nonmax(core_win);
                    if(iscorner)
                    {
                    if(index<N)
                    {
                        _keypoints[index].x=j-offset;
                        _keypoints[index].y=i-offset;
                        index++;
                    }
                    }
                }
            }

        }
    }
}
template<typename T, int N, int SRC_T,int ROWS,int COLS>
void FASTX(
        Mat<ROWS,COLS,SRC_T>    &_src,
        Point_<T> (&_keypoints)[N],
        HLS_TNAME(SRC_T)    _threshold,
        bool   _nomax_supression
        )
{
#pragma HLS INLINE
    int flag[16][2]={{3,0},{4,0},{5,1},{6,2},{6,3},{6,4},{5,5},{4,6},
        {3,6},{2,6},{1,5},{0,4},{0,3},{0,2},{1,1},{2,0}};
    FAST_t_opr<16,7>(_src,_keypoints,_threshold,_nomax_supression,flag);
}

爲了方便在PS端應用層獲取到keypoint信息,我對FASTX函數進行了小小的修改,取消了Point數據類型,直接使用int數組,數組的第1個元素記錄keypoint的總個數,後面就是每個keypoint的座標信息,int的高16位保存X座標,低16保存Y座標,修改後的代碼如下,爲了不影響原來的函數,可以直接增加以下代碼

//generate array 
template<int PSize,int KERNEL_SIZE, int SRC_T,int ROWS,int COLS>
void FAST_t_opr(
        Mat<ROWS,COLS,SRC_T>    &_src,
        int                    *_keypoints,
        int                    _len,
        HLS_TNAME(SRC_T)       _threshold,
        bool                    _nonmax_supression,
        int                     (&flag)[PSize][2]
        )
{
    typedef typename pixel_op_type<HLS_TNAME(SRC_T)>::T INPUT_T;
    LineBuffer<KERNEL_SIZE-1,COLS,INPUT_T>    k_buf;
    LineBuffer<2,COLS+KERNEL_SIZE,ap_int<16> >         core_buf;
    Window<3,3,ap_int<16> >                            core_win;
    Window<KERNEL_SIZE,KERNEL_SIZE,INPUT_T>       win;
    Scalar<HLS_MAT_CN(SRC_T), HLS_TNAME(SRC_T)>             s;
    int rows= _src.rows;
    int cols= _src.cols;
    assert(rows <= ROWS);
    assert(cols <= COLS);
    int kernel_half=KERNEL_SIZE/2;
    ap_uint<2> flag_val[PSize+PSize/2+1];
    int  flag_d[PSize+PSize/2+1];
#pragma HLS ARRAY_PARTITION variable=flag_val dim=0
#pragma HLS ARRAY_PARTITION variable=flag_d dim=0
   
    int index = 1; 
    int offset=KERNEL_SIZE/2;
    int location =0;

    if(_nonmax_supression)
    {
        offset=offset+1;
    }
 loop_height: for(HLS_SIZE_T i=0;i<rows+offset;i++) {
    loop_width: for(HLS_SIZE_T j=0;j<cols+offset;j++) {
#pragma HLS LOOP_FLATTEN off
#pragma HLS PIPELINE II=1
            if(i<rows&&j<cols) {
                for(int r= 0;r<KERNEL_SIZE;r++) {
                    for(int c=0;c<KERNEL_SIZE-1;c++) {
                        win.val[r][c]=win.val[r][c+1];//column left shift
                    }
                }
                win.val[0][KERNEL_SIZE-1]=k_buf.val[0][j];
                for(int buf_row= 1;buf_row< KERNEL_SIZE-1;buf_row++) {
                    win.val[buf_row][KERNEL_SIZE-1]=k_buf.val[buf_row][j];
                    k_buf.val[buf_row-1][j]=k_buf.val[buf_row][j];
                }
                //-------
                _src>>s;
                win.val[KERNEL_SIZE-1][KERNEL_SIZE-1]=s.val[0];
                k_buf.val[KERNEL_SIZE-2][j]=s.val[0];
            }
            //------core
            for(int r= 0;r<3;r++)
            {
                for(int c=0;c<3-1;c++)
                {
                    core_win.val[r][c]=core_win.val[r][c+1];//column left shift
                }
            }
            core_win.val[0][3-1]=core_buf.val[0][j];
            for(int buf_row= 1;buf_row< 3-1;buf_row++)
            {
                core_win.val[buf_row][3-1]=core_buf.val[buf_row][j];
                core_buf.val[buf_row-1][j]=core_buf.val[buf_row][j];
            }
            int core=0;
            //output
            if(i>=KERNEL_SIZE-1&&j>=KERNEL_SIZE-1)
            {
                //process
                bool iscorner=fast_judge<PSize>(win,(INPUT_T)_threshold,flag_val,flag_d,flag,core,_nonmax_supression);
                if(iscorner&&!_nonmax_supression)
                {
                    if(index<_len)
                    {
                     	location =	j-offset;
                    	location <<= 16;
                    	location |= i-offset;
                    	_keypoints[index] = location;
                    	index++;
                    }
                }
            }
            if(i>=rows||j>=cols)
            {
                core=0;
            }
            if(_nonmax_supression)
            {
                core_win.val[3-1][3-1]=core;
                core_buf.val[3-2][j]=core;
                if(i>=KERNEL_SIZE&&j>=KERNEL_SIZE&&core_win.val[1][1]!=0)
                {
                    bool iscorner=fast_nonmax(core_win);
                    if(iscorner)
                    {
                    if(index<_len)
                    {
                        location =	j-offset;
                        location <<= 16;
                        location |= i-offset;
                        _keypoints[index] = location;
                        index++;
                    }
                    }
                }
            }

        }
    }

 	 _keypoints[0] = (index-1); // keypoints total count
}
template<int SRC_T,int ROWS,int COLS>
void FASTX(
        Mat<ROWS,COLS,SRC_T>    &_src,
        int 			*_keypoints,
        int 			_len,
        HLS_TNAME(SRC_T)    _threshold,
        bool   _nomax_supression
        )
{
#pragma HLS INLINE
    int flag[16][2]={{3,0},{4,0},{5,1},{6,2},{6,3},{6,4},{5,5},{4,6},
        {3,6},{2,6},{1,5},{0,4},{0,3},{0,2},{1,1},{2,0}};
    FAST_t_opr<16,7>(_src,_keypoints,_len,_threshold,_nomax_supression,flag);
}

修改完FAST函數後,把原來的例子進行相應的修改

void hls_fast_corner(AXI_STREAM& INPUT_STREAM, AXI_STREAM& OUTPUT_STREAM, int rows, int cols, int threhold, int keypoints[MAX_KEYPOINTS])
{
#pragma HLS INTERFACE axis port=INPUT_STREAM
#pragma HLS INTERFACE axis port=OUTPUT_STREAM

#pragma HLS INTERFACE s_axilite port=rows 		bundle=BUS_CTRL
#pragma HLS INTERFACE s_axilite port=cols 		bundle=BUS_CTRL
#pragma HLS INTERFACE s_axilite port=threhold 	bundle=BUS_CTRL
#pragma HLS INTERFACE s_axilite port=keypoints  bundle=BUS_CTRL

#pragma HLS INTERFACE s_axilite port=return  	bundle=BUS_CTRL

	hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC3>      _src(rows,cols);
	hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC3>      _dst(rows,cols);
#pragma HLS dataflow
	hls::AXIvideo2Mat(INPUT_STREAM, _src);
	hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC3>      src0(rows,cols);
	hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC3>      src1(rows,cols);
	hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC1>      gray(rows,cols);

	hls::Duplicate(_src,src0,src1);

	hls::CvtColor<HLS_BGR2GRAY>(src0,gray);

	hls::FASTX(gray,keypoints, MAX_KEYPOINTS, threhold,true);

	hls::Mat2AXIvideo(src1, OUTPUT_STREAM);
}

沒有了paintmask,不能在圖像上直接看到keypoint了,該如何驗證呢,可以在testbench上使用cvCircle把keypoint畫上去,代碼如下:

int main (int argc, char** argv) {

    IplImage* src = cvLoadImage(INPUT_IMAGE);
    IplImage* dst = cvCreateImage(cvGetSize(src), src->depth, src->nChannels);
    
    AXI_STREAM  src_axi, dst_axi;
    IplImage2AXIvideo(src, src_axi);

    int threhold = 60;
    int keypoints[MAX_KEYPOINTS];
    hls_fast_corner(src_axi, dst_axi, src->height, src->width, threhold, keypoints);

    AXIvideo2IplImage(dst_axi, dst);

    int count = keypoints[0];
    printf("keypoints count:%d\n", count);
    for(int i=1;i<count;i++)
    {
    	int x = keypoints[i] >> 16;
    	int y = (keypoints[i] & 0xFFFF);

    	cvCircle(dst,cvPoint(x,y),2,CV_RGB(0,0,255),2);
    }

    cvSaveImage(OUTPUT_IMAGE, dst);
    
    return 0;
}


C 仿真的效果


導出IP後,打開driver目錄下的xhls_fast_corner.h,可以看到比之前多了下面這些函數,通過read keypoints函數就可以獲取到keypoint信息了

u32 XHls_fast_corner_Get_keypoints_BaseAddress(XHls_fast_corner *InstancePtr);
u32 XHls_fast_corner_Get_keypoints_HighAddress(XHls_fast_corner *InstancePtr);
u32 XHls_fast_corner_Get_keypoints_TotalBytes(XHls_fast_corner *InstancePtr);
u32 XHls_fast_corner_Get_keypoints_BitWidth(XHls_fast_corner *InstancePtr);
u32 XHls_fast_corner_Get_keypoints_Depth(XHls_fast_corner *InstancePtr);
u32 XHls_fast_corner_Write_keypoints_Words(XHls_fast_corner *InstancePtr, int offset, int *data, int length);
u32 XHls_fast_corner_Read_keypoints_Words(XHls_fast_corner *InstancePtr, int offset, int *data, int length);
u32 XHls_fast_corner_Write_keypoints_Bytes(XHls_fast_corner *InstancePtr, int offset, char *data, int length);
u32 XHls_fast_corner_Read_keypoints_Bytes(XHls_fast_corner *InstancePtr, int offset, char *data, int length);



發佈了87 篇原創文章 · 獲贊 59 · 訪問量 55萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章