PC平臺:WINDOWS 10 64位
Xilinx設計開發套件:Xilinx_vivado_sdk_2015.4
開發板:Zed Board
之前參照XAPP1167文檔,使用HLS Video函數庫裏的FASTX跑了一下例子,當時的例子是直接把keypoint以mask方式畫在了原始視頻圖像上,應用層並沒有獲取到keypoint的座標信息,所以無法開展下一步的圖像處理,比如獲取keypoint的特徵點信息進行圖像匹配等,其實HLS FASTX提供了兩個函數,一個是返回keypoint的mask圖像,另一個是返回keypoint數組,所以如果需要獲取到keypoint的座標信息,必須得使用第二個函數
可以在Vivado HLS安裝目錄下找到FASTX的源代碼,有兩個地方,Xilinx\Vivado_HLS\2015.4\include\hls\hls_video_fast.h和Xilinx\Vivado_HLS\2015.4\common\technology\autopilot\hls\hls_video_fast.h
//generate array
template<int PSize,int KERNEL_SIZE,typename T, int N, int SRC_T,int ROWS,int COLS>
void FAST_t_opr(
Mat<ROWS,COLS,SRC_T> &_src,
Point_<T> (&_keypoints)[N],
HLS_TNAME(SRC_T) _threshold,
bool _nonmax_supression,
int (&flag)[PSize][2]
)
{
typedef typename pixel_op_type<HLS_TNAME(SRC_T)>::T INPUT_T;
LineBuffer<KERNEL_SIZE-1,COLS,INPUT_T> k_buf;
LineBuffer<2,COLS+KERNEL_SIZE,ap_int<16> > core_buf;
Window<3,3,ap_int<16> > core_win;
Window<KERNEL_SIZE,KERNEL_SIZE,INPUT_T> win;
Scalar<HLS_MAT_CN(SRC_T), HLS_TNAME(SRC_T)> s;
int rows= _src.rows;
int cols= _src.cols;
assert(rows <= ROWS);
assert(cols <= COLS);
int kernel_half=KERNEL_SIZE/2;
ap_uint<2> flag_val[PSize+PSize/2+1];
int flag_d[PSize+PSize/2+1];
#pragma HLS ARRAY_PARTITION variable=flag_val dim=0
#pragma HLS ARRAY_PARTITION variable=flag_d dim=0
int index=0;
int offset=KERNEL_SIZE/2;
if(_nonmax_supression)
{
offset=offset+1;
}
loop_height: for(HLS_SIZE_T i=0;i<rows+offset;i++) {
loop_width: for(HLS_SIZE_T j=0;j<cols+offset;j++) {
#pragma HLS LOOP_FLATTEN off
#pragma HLS PIPELINE II=1
if(i<rows&&j<cols) {
for(int r= 0;r<KERNEL_SIZE;r++) {
for(int c=0;c<KERNEL_SIZE-1;c++) {
win.val[r][c]=win.val[r][c+1];//column left shift
}
}
win.val[0][KERNEL_SIZE-1]=k_buf.val[0][j];
for(int buf_row= 1;buf_row< KERNEL_SIZE-1;buf_row++) {
win.val[buf_row][KERNEL_SIZE-1]=k_buf.val[buf_row][j];
k_buf.val[buf_row-1][j]=k_buf.val[buf_row][j];
}
//-------
_src>>s;
win.val[KERNEL_SIZE-1][KERNEL_SIZE-1]=s.val[0];
k_buf.val[KERNEL_SIZE-2][j]=s.val[0];
}
//------core
for(int r= 0;r<3;r++)
{
for(int c=0;c<3-1;c++)
{
core_win.val[r][c]=core_win.val[r][c+1];//column left shift
}
}
core_win.val[0][3-1]=core_buf.val[0][j];
for(int buf_row= 1;buf_row< 3-1;buf_row++)
{
core_win.val[buf_row][3-1]=core_buf.val[buf_row][j];
core_buf.val[buf_row-1][j]=core_buf.val[buf_row][j];
}
int core=0;
//output
if(i>=KERNEL_SIZE-1&&j>=KERNEL_SIZE-1)
{
//process
bool iscorner=fast_judge<PSize>(win,(INPUT_T)_threshold,flag_val,flag_d,flag,core,_nonmax_supression);
if(iscorner&&!_nonmax_supression)
{
if(index<N)
{
_keypoints[index].x=j-offset;
_keypoints[index].y=i-offset;
index++;
}
}
}
if(i>=rows||j>=cols)
{
core=0;
}
if(_nonmax_supression)
{
core_win.val[3-1][3-1]=core;
core_buf.val[3-2][j]=core;
if(i>=KERNEL_SIZE&&j>=KERNEL_SIZE&&core_win.val[1][1]!=0)
{
bool iscorner=fast_nonmax(core_win);
if(iscorner)
{
if(index<N)
{
_keypoints[index].x=j-offset;
_keypoints[index].y=i-offset;
index++;
}
}
}
}
}
}
}
template<typename T, int N, int SRC_T,int ROWS,int COLS>
void FASTX(
Mat<ROWS,COLS,SRC_T> &_src,
Point_<T> (&_keypoints)[N],
HLS_TNAME(SRC_T) _threshold,
bool _nomax_supression
)
{
#pragma HLS INLINE
int flag[16][2]={{3,0},{4,0},{5,1},{6,2},{6,3},{6,4},{5,5},{4,6},
{3,6},{2,6},{1,5},{0,4},{0,3},{0,2},{1,1},{2,0}};
FAST_t_opr<16,7>(_src,_keypoints,_threshold,_nomax_supression,flag);
}
爲了方便在PS端應用層獲取到keypoint信息,我對FASTX函數進行了小小的修改,取消了Point數據類型,直接使用int數組,數組的第1個元素記錄keypoint的總個數,後面就是每個keypoint的座標信息,int的高16位保存X座標,低16保存Y座標,修改後的代碼如下,爲了不影響原來的函數,可以直接增加以下代碼
//generate array
template<int PSize,int KERNEL_SIZE, int SRC_T,int ROWS,int COLS>
void FAST_t_opr(
Mat<ROWS,COLS,SRC_T> &_src,
int *_keypoints,
int _len,
HLS_TNAME(SRC_T) _threshold,
bool _nonmax_supression,
int (&flag)[PSize][2]
)
{
typedef typename pixel_op_type<HLS_TNAME(SRC_T)>::T INPUT_T;
LineBuffer<KERNEL_SIZE-1,COLS,INPUT_T> k_buf;
LineBuffer<2,COLS+KERNEL_SIZE,ap_int<16> > core_buf;
Window<3,3,ap_int<16> > core_win;
Window<KERNEL_SIZE,KERNEL_SIZE,INPUT_T> win;
Scalar<HLS_MAT_CN(SRC_T), HLS_TNAME(SRC_T)> s;
int rows= _src.rows;
int cols= _src.cols;
assert(rows <= ROWS);
assert(cols <= COLS);
int kernel_half=KERNEL_SIZE/2;
ap_uint<2> flag_val[PSize+PSize/2+1];
int flag_d[PSize+PSize/2+1];
#pragma HLS ARRAY_PARTITION variable=flag_val dim=0
#pragma HLS ARRAY_PARTITION variable=flag_d dim=0
int index = 1;
int offset=KERNEL_SIZE/2;
int location =0;
if(_nonmax_supression)
{
offset=offset+1;
}
loop_height: for(HLS_SIZE_T i=0;i<rows+offset;i++) {
loop_width: for(HLS_SIZE_T j=0;j<cols+offset;j++) {
#pragma HLS LOOP_FLATTEN off
#pragma HLS PIPELINE II=1
if(i<rows&&j<cols) {
for(int r= 0;r<KERNEL_SIZE;r++) {
for(int c=0;c<KERNEL_SIZE-1;c++) {
win.val[r][c]=win.val[r][c+1];//column left shift
}
}
win.val[0][KERNEL_SIZE-1]=k_buf.val[0][j];
for(int buf_row= 1;buf_row< KERNEL_SIZE-1;buf_row++) {
win.val[buf_row][KERNEL_SIZE-1]=k_buf.val[buf_row][j];
k_buf.val[buf_row-1][j]=k_buf.val[buf_row][j];
}
//-------
_src>>s;
win.val[KERNEL_SIZE-1][KERNEL_SIZE-1]=s.val[0];
k_buf.val[KERNEL_SIZE-2][j]=s.val[0];
}
//------core
for(int r= 0;r<3;r++)
{
for(int c=0;c<3-1;c++)
{
core_win.val[r][c]=core_win.val[r][c+1];//column left shift
}
}
core_win.val[0][3-1]=core_buf.val[0][j];
for(int buf_row= 1;buf_row< 3-1;buf_row++)
{
core_win.val[buf_row][3-1]=core_buf.val[buf_row][j];
core_buf.val[buf_row-1][j]=core_buf.val[buf_row][j];
}
int core=0;
//output
if(i>=KERNEL_SIZE-1&&j>=KERNEL_SIZE-1)
{
//process
bool iscorner=fast_judge<PSize>(win,(INPUT_T)_threshold,flag_val,flag_d,flag,core,_nonmax_supression);
if(iscorner&&!_nonmax_supression)
{
if(index<_len)
{
location = j-offset;
location <<= 16;
location |= i-offset;
_keypoints[index] = location;
index++;
}
}
}
if(i>=rows||j>=cols)
{
core=0;
}
if(_nonmax_supression)
{
core_win.val[3-1][3-1]=core;
core_buf.val[3-2][j]=core;
if(i>=KERNEL_SIZE&&j>=KERNEL_SIZE&&core_win.val[1][1]!=0)
{
bool iscorner=fast_nonmax(core_win);
if(iscorner)
{
if(index<_len)
{
location = j-offset;
location <<= 16;
location |= i-offset;
_keypoints[index] = location;
index++;
}
}
}
}
}
}
_keypoints[0] = (index-1); // keypoints total count
}
template<int SRC_T,int ROWS,int COLS>
void FASTX(
Mat<ROWS,COLS,SRC_T> &_src,
int *_keypoints,
int _len,
HLS_TNAME(SRC_T) _threshold,
bool _nomax_supression
)
{
#pragma HLS INLINE
int flag[16][2]={{3,0},{4,0},{5,1},{6,2},{6,3},{6,4},{5,5},{4,6},
{3,6},{2,6},{1,5},{0,4},{0,3},{0,2},{1,1},{2,0}};
FAST_t_opr<16,7>(_src,_keypoints,_len,_threshold,_nomax_supression,flag);
}
修改完FAST函數後,把原來的例子進行相應的修改
void hls_fast_corner(AXI_STREAM& INPUT_STREAM, AXI_STREAM& OUTPUT_STREAM, int rows, int cols, int threhold, int keypoints[MAX_KEYPOINTS])
{
#pragma HLS INTERFACE axis port=INPUT_STREAM
#pragma HLS INTERFACE axis port=OUTPUT_STREAM
#pragma HLS INTERFACE s_axilite port=rows bundle=BUS_CTRL
#pragma HLS INTERFACE s_axilite port=cols bundle=BUS_CTRL
#pragma HLS INTERFACE s_axilite port=threhold bundle=BUS_CTRL
#pragma HLS INTERFACE s_axilite port=keypoints bundle=BUS_CTRL
#pragma HLS INTERFACE s_axilite port=return bundle=BUS_CTRL
hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC3> _src(rows,cols);
hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC3> _dst(rows,cols);
#pragma HLS dataflow
hls::AXIvideo2Mat(INPUT_STREAM, _src);
hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC3> src0(rows,cols);
hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC3> src1(rows,cols);
hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC1> gray(rows,cols);
hls::Duplicate(_src,src0,src1);
hls::CvtColor<HLS_BGR2GRAY>(src0,gray);
hls::FASTX(gray,keypoints, MAX_KEYPOINTS, threhold,true);
hls::Mat2AXIvideo(src1, OUTPUT_STREAM);
}
沒有了paintmask,不能在圖像上直接看到keypoint了,該如何驗證呢,可以在testbench上使用cvCircle把keypoint畫上去,代碼如下:
int main (int argc, char** argv) {
IplImage* src = cvLoadImage(INPUT_IMAGE);
IplImage* dst = cvCreateImage(cvGetSize(src), src->depth, src->nChannels);
AXI_STREAM src_axi, dst_axi;
IplImage2AXIvideo(src, src_axi);
int threhold = 60;
int keypoints[MAX_KEYPOINTS];
hls_fast_corner(src_axi, dst_axi, src->height, src->width, threhold, keypoints);
AXIvideo2IplImage(dst_axi, dst);
int count = keypoints[0];
printf("keypoints count:%d\n", count);
for(int i=1;i<count;i++)
{
int x = keypoints[i] >> 16;
int y = (keypoints[i] & 0xFFFF);
cvCircle(dst,cvPoint(x,y),2,CV_RGB(0,0,255),2);
}
cvSaveImage(OUTPUT_IMAGE, dst);
return 0;
}
C 仿真的效果
導出IP後,打開driver目錄下的xhls_fast_corner.h,可以看到比之前多了下面這些函數,通過read keypoints函數就可以獲取到keypoint信息了
u32 XHls_fast_corner_Get_keypoints_BaseAddress(XHls_fast_corner *InstancePtr);
u32 XHls_fast_corner_Get_keypoints_HighAddress(XHls_fast_corner *InstancePtr);
u32 XHls_fast_corner_Get_keypoints_TotalBytes(XHls_fast_corner *InstancePtr);
u32 XHls_fast_corner_Get_keypoints_BitWidth(XHls_fast_corner *InstancePtr);
u32 XHls_fast_corner_Get_keypoints_Depth(XHls_fast_corner *InstancePtr);
u32 XHls_fast_corner_Write_keypoints_Words(XHls_fast_corner *InstancePtr, int offset, int *data, int length);
u32 XHls_fast_corner_Read_keypoints_Words(XHls_fast_corner *InstancePtr, int offset, int *data, int length);
u32 XHls_fast_corner_Write_keypoints_Bytes(XHls_fast_corner *InstancePtr, int offset, char *data, int length);
u32 XHls_fast_corner_Read_keypoints_Bytes(XHls_fast_corner *InstancePtr, int offset, char *data, int length);