STAPLE目標跟蹤算法(C++ /Opencv),實測可用

簡介

結合梯度特徵HOG及顏色特徵的實時跟蹤算法,速度可以達到80FPS。

摘要

近些年來,基於相關濾波跟蹤器的算法得到了很大的改進,已經能夠達到很好的跟蹤效果,14年的KCF,15年的SRDCF、HCF等算法,都已經達到了很高的精度,像HCF已經在OTB-50上達到了0.89的效果,但是美中不足的就是使用了深度學習特徵之後速度相對來說比較慢一點,不能達到實時的要求。算法作者經過研究發現,以前算法的模型的學習還是很依賴於跟蹤目標的空間信息,對變形的目標跟蹤效果不是很好,但是使用顏色特徵對目標進行學習能夠很好的處理跟蹤目標發生形變和運動模糊的問題,但是當光照條件變化的時候,顏色特徵就表現的相對無力,這個時候使用HOG特徵能夠對光照變化的跟蹤目標進行很好的跟蹤。所以,作者就想到,使用一個融合互補因子在同一個迴歸框架當中能夠很好的對這些因素進行很好的處理同時能夠達到比較快的速度,並且在跟蹤效果上面也比大部分現有的跟蹤算法效果要好。

實現

這裏感謝貢獻C++代碼的大神 xuduo35

源碼地址:https://github.com/xuduo35/STAPLE

//main.cpp
#include "staple_tracker.hpp"
#include <iostream>
#include <vector>
#include <string>
#include <fstream>
#include <numeric>

#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>

cv::Rect_<float> getAxisAlignedBB(std::vector<cv::Point2f> polygon);
std::vector<cv::Rect_<float>> getgroundtruth(std::string txt_file);

int main(int argc, char * argv[])
{
    std::string sequence = "/sequence";

    if (argc >= 2) {
        sequence = std::string("/vot2015/") + argv[1];
    }

    std::string video_base_path = "..";
    std::string pattern_jpg = video_base_path + sequence + "/*.jpg";
    std::string txt_base_path = video_base_path + sequence + "/groundtruth.txt";

    std::vector<cv::String> image_files;
    cv::glob(pattern_jpg, image_files);
    if (image_files.size() == 0)
        return -1;

    std::vector<cv::Rect_<float>> groundtruth_rect;
    groundtruth_rect = getgroundtruth(txt_base_path);
    //for (size_t i = 0; i < groundtruth_rect.size(); ++i)
    //  std::cout << i+1 << '\t' <<groundtruth_rect[i] << std::endl;

    STAPLE_TRACKER staple;

    cv::Rect_<float> location = groundtruth_rect[0];
    cv::Mat image;
    std::vector<cv::Rect_<float>> result_rects;
    int64 tic, toc;
    double time = 0;
    bool show_visualization = true;

    for (unsigned int frame = 0; frame < image_files.size(); ++frame) {
        image = cv::imread(image_files[frame]);
        tic = cv::getTickCount();
        if (frame == 0){
            staple.tracker_staple_initialize(image, location);
            staple.tracker_staple_train(image, true);
        } else {
            location = staple.tracker_staple_update(image);
            staple.tracker_staple_train(image, false);
        }

        toc = cv::getTickCount() - tic;
        time += toc;
        result_rects.push_back(location);

        if (show_visualization) {
            cv::putText(image, std::to_string(frame + 1), cv::Point(20, 40), 6, 1,
                cv::Scalar(0, 255, 255), 2);
            cv::rectangle(image, groundtruth_rect[frame], cv::Scalar(0, 255, 0), 2);
            cv::rectangle(image, location, cv::Scalar(0, 128, 255), 2);
            cv::imshow("STAPLE", image);

            char key = cv::waitKey(10);
            if (key == 27 || key == 'q' || key == 'Q')
                break;
        }
    }
    time = time / double(cv::getTickFrequency());
    double fps = double(result_rects.size()) / time;
    std::cout << "fps:" << fps << std::endl;
    cv::destroyAllWindows();

    return 0;
}

cv::Rect_<float> getAxisAlignedBB(std::vector<cv::Point2f> polygon)
{
    double cx = double(polygon[0].x + polygon[1].x + polygon[2].x + polygon[3].x) / 4.;
    double cy = double(polygon[0].y + polygon[1].y + polygon[2].y + polygon[3].y) / 4.;
    double x1 = std::min(std::min(std::min(polygon[0].x, polygon[1].x), polygon[2].x), polygon[3].x);
    double x2 = std::max(std::max(std::max(polygon[0].x, polygon[1].x), polygon[2].x), polygon[3].x);
    double y1 = std::min(std::min(std::min(polygon[0].y, polygon[1].y), polygon[2].y), polygon[3].y);
    double y2 = std::max(std::max(std::max(polygon[0].y, polygon[1].y), polygon[2].y), polygon[3].y);
    double A1 = norm(polygon[1] - polygon[2])*norm(polygon[2] - polygon[3]);
    double A2 = (x2 - x1) * (y2 - y1);
    double s = sqrt(A1 / A2);
    double w = s * (x2 - x1) + 1;
    double h = s * (y2 - y1) + 1;
    cv::Rect_<float> rect(cx-1-w/2.0, cy-1-h/2.0, w, h);
    return rect;

}

std::vector<cv::Rect_<float>> getgroundtruth(std::string txt_file)
{
    std::vector<cv::Rect_<float>> rects;
    std::ifstream gt;
    gt.open(txt_file.c_str());
    if (!gt.is_open())
        std::cout << "Ground truth file " << txt_file
        << " can not be read" << std::endl;
    std::string line;
    float x1, y1, x2, y2, x3, y3, x4, y4;
    while (getline(gt, line)) {
        std::replace(line.begin(), line.end(), ',', ' ');
        std::stringstream ss;
        ss.str(line);
        ss >> x1 >> y1 >> x2 >> y2 >> x3 >> y3 >> x4 >> y4;
        std::vector<cv::Point2f>polygon;
        polygon.push_back(cv::Point2f(x1, y1));
        polygon.push_back(cv::Point2f(x2, y2));
        polygon.push_back(cv::Point2f(x3, y3));
        polygon.push_back(cv::Point2f(x4, y4));
        rects.push_back(getAxisAlignedBB(polygon)); //0-index
    }
    gt.close();
    return rects;
}


//staple_tracker.hpp
#ifndef STAPLE_TRACKER_HPP
#define STAPLE_TRACKER_HPP

#include <iostream>
#include <vector>
#include <string>
#include <cmath>

#include <opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/features2d/features2d.hpp>

///
/// \brief The staple_cfg struct
///
struct staple_cfg
{
    bool grayscale_sequence = false;    // suppose that sequence is colour
    int hog_cell_size = 4;
    int fixed_area = 150*150;           // standard area to which we resize the target
    int n_bins = 2*2*2*2*2;             // number of bins for the color histograms (bg and fg models)
    double learning_rate_pwp = 0.04;    // bg and fg color models learning rate
    const char * feature_type = "fhog"; // "fhog", ""gray""
    double inner_padding = 0.2;         // defines inner area used to sample colors from the foreground
    double output_sigma_factor = 1/16.0; // standard deviation for the desired translation filter output
    double lambda = 1e-3;               // egularization weight
    double learning_rate_cf = 0.01;     // HOG model learning rate
    double merge_factor = 0.3;          // fixed interpolation factor - how to linearly combine the two responses
    const char * merge_method = "const_factor";
    bool den_per_channel = false;

    // scale related
    bool scale_adaptation = true;
    int hog_scale_cell_size = 4;         // Default DSST=4
    double learning_rate_scale = 0.025;
    double scale_sigma_factor = 1/4.0;
    int num_scales = 33;
    double scale_model_factor = 1.0;
    double scale_step = 1.02;
    double scale_model_max_area = 32*16;

    // debugging stuff
    int visualization = 0;              // show output bbox on frame
    int visualization_dbg = 0;          // show also per-pixel scores, desired response and filter output

    cv::Point_<float> init_pos;
    cv::Size target_sz;
};

///
/// \brief The STAPLE_TRACKER class
///
class STAPLE_TRACKER
{
public:
    STAPLE_TRACKER()
    {
        cfg = default_parameters_staple(cfg);
        frameno = 0;
    }
    ~STAPLE_TRACKER(){}

    void mexResize(const cv::Mat &im, cv::Mat &output, cv::Size newsz, const char *method);
    void tracker_staple_train(const cv::Mat &im, bool first);
    void tracker_staple_initialize(const cv::Mat &im, cv::Rect_<float> region);
    cv::Rect tracker_staple_update(const cv::Mat &im);

protected:
    staple_cfg default_parameters_staple(staple_cfg cfg);
    void initializeAllAreas(const cv::Mat &im);

    void getSubwindow(const cv::Mat &im, cv::Point_<float> centerCoor, cv::Size model_sz, cv::Size scaled_sz, cv::Mat &output);
    void getSubwindowFloor(const cv::Mat &im, cv::Point_<float> centerCoor, cv::Size model_sz, cv::Size scaled_sz, cv::Mat &output);
    void updateHistModel(bool new_model, cv::Mat &patch, double learning_rate_pwp=0.0);
    void CalculateHann(cv::Size sz, cv::Mat &output);
    void gaussianResponse(cv::Size rect_size, double sigma, cv::Mat &output);
    void getFeatureMap(cv::Mat &im_patch, const char *feature_type, cv::MatND &output);
    void cropFilterResponse(const cv::Mat &response_cf, cv::Size response_size, cv::Mat& output);
    void getColourMap(const cv::Mat &patch, cv::Mat& output);
    void getCenterLikelihood(const cv::Mat &object_likelihood, cv::Size m, cv::Mat& center_likelihood);
    void mergeResponses(const cv::Mat &response_cf, const cv::Mat &response_pwp, cv::Mat &response);
    void getScaleSubwindow(const cv::Mat &im, cv::Point_<float> centerCoor, cv::Mat &output);

private:
    staple_cfg cfg;

    cv::Point_<float> pos;
    cv::Size target_sz;

    cv::Size bg_area;
    cv::Size fg_area;
    double area_resize_factor;
    cv::Size cf_response_size;

    cv::Size norm_bg_area;
    cv::Size norm_target_sz;
    cv::Size norm_delta_area;
    cv::Size norm_pwp_search_area;

    cv::Mat im_patch_pwp;

    cv::MatND bg_hist;
    cv::MatND fg_hist;

    cv::Mat hann_window;
    cv::Mat yf;

    std::vector<cv::Mat> hf_den;
    std::vector<cv::Mat> hf_num;

    cv::Rect rect_position;

    float scale_factor;
    cv::Mat scale_window;
    cv::Mat scale_factors;
    cv::Size scale_model_sz;
    float min_scale_factor;
    float max_scale_factor;
    cv::Size base_target_sz;

    cv::Mat ysf;
    cv::Mat sf_den;
    cv::Mat sf_num;

    int frameno = 0;
};

#endif

代碼運行結果:

實測該代碼可用,效果還不錯。文章只粘貼了部分代碼,有需要的可以去Github上下載;或者在我的博客資源裏下載
 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章