。 - 實踐一個視覺里程計前端
- 理解slam軟件框架
- 調試程序
怎麼管理地圖點,如何處理誤匹配,如何選擇關鍵幀。從簡單到複雜實現一個框架。
- /bin : 二進制
- /include/myshlam:當把引用自己的頭文件時需要寫 include “myslam/xxx.h”,和別的庫不容易混淆
- /src:.cpp文件
- /test: 測試用文件
- /lib: 存放編譯好的庫文件,libmyslam.so
- /config: 配置文件
- /cmak_modules:第三方庫cmake文件
基本數據結構
設計好數據單元,程序的處理流程。
- 幀 :把我們認爲重要的幀保存下來,認爲相機的軌跡可以用這些關鍵幀來描述。
- 路標: 就是相機中的特徵點,把所有land_mark放在地圖中,幀的位姿和路標位置估計相當於局部的slam問題。
- 配置文件:每次運行的參數在這裏讀取就行
%YAML:1.0
# data
# the tum dataset directory, change it to yours!
dataset_dir: /home/ciang/dataset/rgbd_dataset_freiburg1_xyz
# camera intrinsics
# fr1
camera.fx: 517.3
camera.fy: 516.5
camera.cx: 325.1
camera.cy: 249.7
camera.depth_scale: 5000
- 座標變換: 座標系之間的變換,定義一個類,把所有的變換操作都放在這裏:世界->相機;相機->歸一化的相機座標(去深度後);歸一化後相機座標->像素座標。
具體實現:
#include "myslam/camera.h"
namespace myslam
{
Camera::Camera()
{
}
//傳入:世界系的座標點,是一個Vector3d 向量代表空間點(xyz)
// SE3 變換矩陣,注意T_c_w 的寫法,從世界轉換到相機,是吧c寫在前面,和公式推到時相同
Vector3d Camera::world2camera ( const Vector3d& p_w, const SE3& T_c_w )
{
return T_c_w*p_w;
}
//反過來之後是SE3的inverse逆矩陣
Vector3d Camera::camera2world ( const Vector3d& p_c, const SE3& T_c_w )
{
return T_c_w.inverse() *p_c;
}
//相機座標到像素點座標,fx_ fy_cx_cy_是相機內參,根據公式得p_c ( 0,0 )
Vector2d Camera::camera2pixel ( const Vector3d& p_c )
{
return Vector2d (
fx_ * p_c ( 0,0 ) / p_c ( 2,0 ) + cx_,
fy_ * p_c ( 1,0 ) / p_c ( 2,0 ) + cy_
);
}
//回顧下公式吧
Vector3d Camera::pixel2camera ( const Vector2d& p_p, double depth )
{
return Vector3d (
( p_p ( 0,0 )-cx_ ) *depth/fx_,
( p_p ( 1,0 )-cy_ ) *depth/fy_,
depth
);
}
//連續變換世界到相機再到像素
Vector2d Camera::world2pixel ( const Vector3d& p_w, const SE3& T_c_w )
{
return camera2pixel ( world2camera ( p_w, T_c_w ) );
}
Vector3d Camera::pixel2world ( const Vector2d& p_p, const SE3& T_c_w, double depth )
{
return camera2world ( pixel2camera ( p_p, depth ), T_c_w );
}
}
爲了關注算法的正確實現,c++使用的比較簡單,用C的方式寫c++
開始寫
Config ,Frame,Camera,MapPoint,Map
類的關係
Camera
內參外參,相機座標系,像素座標系,世界座標系之間tf樹。外參值得就是輸入,就是我們的數據。
//宏定義。。。。保證紙杯定義一次
#ifndef CAMERA_H
#define CAMERA_H
#include "myslam/common_include.h"
//命名空間
namespace myslam
{
// Pinhole RGBD camera model
class Camera
{
public:
typedef std::shared_ptr<Camera> Ptr;
float fx_, fy_, cx_, cy_, depth_scale_; // Camera intrinsics
Camera();
Camera ( float fx, float fy, float cx, float cy, float depth_scale=0 ) :
fx_ ( fx ), fy_ ( fy ), cx_ ( cx ), cy_ ( cy ), depth_scale_ ( depth_scale )
{}
// coordinate transform: world, camera, pixel
Vector3d world2camera( const Vector3d& p_w, const SE3& T_c_w );
Vector3d camera2world( const Vector3d& p_c, const SE3& T_c_w );
Vector2d camera2pixel( const Vector3d& p_c );
Vector3d pixel2camera( const Vector2d& p_p, double depth=1 );
Vector3d pixel2world ( const Vector2d& p_p, const SE3& T_c_w, double depth=1 );
Vector2d world2pixel ( const Vector3d& p_w, const SE3& T_c_w );
};
}
#endif // CAMERA_H
Camera實現
#include "myslam/camera.h"
namespace myslam
{
Camera::Camera()
{
}
Vector3d Camera::world2camera ( const Vector3d& p_w, const SE3& T_c_w )
{
return T_c_w*p_w;
}
Vector3d Camera::camera2world ( const Vector3d& p_c, const SE3& T_c_w )
{
return T_c_w.inverse() *p_c;
}
Vector2d Camera::camera2pixel ( const Vector3d& p_c )
{
return Vector2d (
fx_ * p_c ( 0,0 ) / p_c ( 2,0 ) + cx_,
fy_ * p_c ( 1,0 ) / p_c ( 2,0 ) + cy_
);
}
Vector3d Camera::pixel2camera ( const Vector2d& p_p, double depth )
{
return Vector3d (
( p_p ( 0,0 )-cx_ ) *depth/fx_,
( p_p ( 1,0 )-cy_ ) *depth/fy_,
depth
);
}
Vector2d Camera::world2pixel ( const Vector3d& p_w, const SE3& T_c_w )
{
return camera2pixel ( world2camera ( p_w, T_c_w ) );
}
Vector3d Camera::pixel2world ( const Vector2d& p_p, const SE3& T_c_w, double depth )
{
return camera2world ( pixel2camera ( p_p, depth ), T_c_w );
}
}
Fram定義
#ifndef FRAME_H
#define FRAME_H
#include "myslam/common_include.h"
#include "myslam/camera.h"
namespace myslam
{
// forward declare
class MapPoint;
class Frame
{
//頭指針
//幀數
//時間戳
//變換矩陣
//相機內參
//圖像點雲數據
public:
typedef std::shared_ptr<Frame> Ptr; //
unsigned long id_; // id of this frame
double time_stamp_; // when it is recorded
SE3 T_c_w_; // transform from world to camera
Camera::Ptr camera_; // Pinhole RGBD Camera model
Mat color_, depth_; // color and depth image
public: // data members
Frame();
Frame( long id, double time_stamp=0, SE3 T_c_w=SE3(), Camera::Ptr camera=nullptr, Mat color=Mat(), Mat depth=Mat() );
~Frame();
//產生一個靜態的frame 並把頭指針返回
//尋找深度
//根據公式判斷是否在合理的旋轉變換範圍之內
// factory function
static Frame::Ptr createFrame();
// find the depth in depth map
double findDepth( const cv::KeyPoint& kp );
// Get Camera Center
Vector3d getCamCenter() const;
// check if a point is in this frame
bool isInFrame( const Vector3d& pt_world );
};
}
#endif // FRAME_H
實現:
#ifndef FRAME_H
#define FRAME_H
#include "myslam/common_include.h"
#include "myslam/camera.h"
namespace myslam
{
// forward declare 前向聲明,還不確定的類
class MapPoint;
class Frame
{
public:
typedef std::shared_ptr<Frame> Ptr;
unsigned long id_; // id of this frame
double time_stamp_; // when it is recorded時間戳
SE3 T_c_w_; // transform from world to camera這一幀從world到camera的se3
Camera::Ptr camera_; // Pinhole RGBD Camera model 相機內參
Mat color_, depth_; // color and depth image 這幀裏存儲的圖片點雲信息
public: // data members
Frame();
//SE3 T_c_w=SE3() 不理解爲什麼這麼寫
Frame( long id, double time_stamp=0, SE3 T_c_w=SE3(), Camera::Ptr camera=nullptr, Mat color=Mat(), Mat depth=Mat() );
~Frame();
// factory function
static Frame::Ptr createFrame();
// find the depth in depth map
double findDepth( const cv::KeyPoint& kp );
// Get Camera Center
Vector3d getCamCenter() const;
// check if a point is in this frame
bool isInFrame( const Vector3d& pt_world );
};
}
#endif // FRAME_H
MapPoint
表示路標點,估計其世界座標,拿到當前幀提取到的特徵點與地圖中的路標點匹配估計相機運動,所以呢描述子樹不可少的,此外記錄一個點的被觀測數和被匹配的次數,作爲評價好壞程度指標。
0.1/include/myslam/frame.h
#ifndef MAPPOINT_H
#define MAPPOINT_H
namespace myslam
{
class Frame;
class MapPoint
{
public:
typedef shared_ptr<MapPoint> Ptr;
unsigned long id_; // ID
Vector3d pos_; // Position in world
Vector3d norm_; // Normal of viewing direction
Mat descriptor_; // Descriptor for matching
int observed_times_; // being observed by feature matching algo.
int correct_times_; // being an inliner in pose estimation
MapPoint();
MapPoint( long id, Vector3d position, Vector3d norm );
// factory function
static MapPoint::Ptr createMapPoint();
};
}
#endif // MAPPOINT_H
實現:
#include "myslam/common_include.h"
#include "myslam/mappoint.h"
namespace myslam
{
MapPoint::MapPoint()
: id_(-1), pos_(Vector3d(0,0,0)), norm_(Vector3d(0,0,0)), observed_times_(0), correct_times_(0)
{
}
//帶參數構造
MapPoint::MapPoint ( long id, Vector3d position, Vector3d norm )
: id_(id), pos_(position), norm_(norm), observed_times_(0), correct_times_(0)
{
}
MapPoint::Ptr MapPoint::createMapPoint()
{
static long factory_id = 0;
return MapPoint::Ptr(
new MapPoint( factory_id++, Vector3d(0,0,0), Vector3d(0,0,0) )
);
}
}
Map類:
Map 類管理着所有的路標點,並負責添加新路標、刪除不好的路標等工作。 VO 的匹配過程只需要和 Map 打交道即可
Map 類中實際存儲了各個關鍵幀和路標點,既需要隨機訪問,又需要隨時插入和刪除,因此我們使用散列(Hash)來存儲它們。
Map類的實現:
#include "myslam/map.h"
namespace myslam
{
//聲明處:unordered_map<unsigned long, Frame::Ptr > keyframes_; // all key-frames
void Map::insertKeyFrame ( Frame::Ptr frame )
{
cout<<"Key frame size = "<<keyframes_.size()<<endl;
if ( keyframes_.find(frame->id_) == keyframes_.end() )
{ //[參見STL make_pair的用法](https://www.cnblogs.com/ranjiewen/p/5901296.html)
keyframes_.insert( make_pair(frame->id_, frame) );
//存入對應的鍵&鍵值,由make_pair維護
}
else
{
keyframes_[ frame->id_ ] = frame;
}
}
//管理點雲,數據結構是硬傷。。。2019/05
void Map::insertMapPoint ( MapPoint::Ptr map_point )
{
if ( map_points_.find(map_point->id_) == map_points_.end() )
{
map_points_.insert( make_pair(map_point->id_, map_point) );
}
else
{
map_points_[map_point->id_] = map_point;
}
}
}
Config:
我們把構造函數聲明爲私有,防止這個類的對象在別處建立,它只能在 setParameterFile 時構造。實際構造的對象是 Config 的智能指針: static shared_ptrconfig_。
#ifndef CONFIG_H
#define CONFIG_H
#include "myslam/common_include.h"
namespace myslam
{
class Config
{
private:
static std::shared_ptr<Config> config_;
// static std::shared_ptr<Config> config_;
cv::FileStorage file_; //cv中管理XML增刪改查的類
Config () {} // private constructor makes a singleton
public:
~Config(); // close the file when deconstructing
// set a new config file
static void setParameterFile( const std::string& filename );
// access the parameter values
template< typename T >
static T get( const std::string& key )
{
return T( Config::config_->file_[key] );//file是具體的管理xml
}
};
}
#endif // CONFIG_H
Config實現:
namespace myslam
{
void Config::setParameterFile( const std::string& filename )
{
if ( config_ == nullptr )
config_ = shared_ptr<Config>(new Config);
if(config_== nullptr)
config_== shared_ptr<Config>(new Config);
//config_中的 管理器file
///主要功能的一行
config_->file_ = cv::FileStorage( filename.c_str(), cv::FileStorage::READ );
if ( config_->file_.isOpened() == false )
{
std::cerr<<"parameter file "<<filename<<" does not exist."<<std::endl;
config_->file_.release();
return;
}
}
至此,我們定義了 SLAM 程序的基本數據結構,書寫了若干個基本類。這好比是造房 子的磚頭和水泥。你可以調用 cmake 編譯這個 0.1版,儘管它還沒有實質性的功能。接下 來我們來考慮把前面講過的 VO 算法加到工程中,並做一些測試來調整各算法的性能。注 意,我會刻意地暴露某些設計的問題,所以你看到的實現不見得就是最好的(或者足夠好 的)。
第一次改進
假設參考幀相對世界座標的變換矩陣爲 ,當前幀與世界座標系間爲 Tcw,則待估計的運動與這兩個幀的變換矩陣構成左乘關係:
VisualOdometry 類給出了上述算法的實現
#ifndef VISUALODOMETRY_H
#define VISUALODOMETRY_H
#include "myslam/common_include.h"
#include "myslam/map.h"
#include <opencv2/features2d/features2d.hpp>
namespace myslam
{
class VisualOdometry
{
public:
typedef shared_ptr<VisualOdometry> Ptr;
//枚舉典型用法:只能多選一時候
enum VOState {
INITIALIZING=-1,
OK=0,
LOST
};
VOState state_; // current VO status
Map::Ptr map_; // map with all frames and map points 實現MAP那些
Frame::Ptr ref_; // reference frame 參考幀
Frame::Ptr curr_; // current frame 當前幀 這個類的原材料
//Ptr is similar to boost::shared_ptr that is part of the Boost library
cv::Ptr<cv::ORB> orb_; // orb detector and computer cv::ORB 類型的cv下的Ptr智能指針
//
vector<cv::Point3f> pts_3d_ref_; // 3d points in reference frame
//
vector<cv::KeyPoint> keypoints_curr_; // keypoints in current frame
//
Mat descriptors_curr_; // descriptor in current frame
Mat descriptors_ref_; // descriptor in reference frame
//
vector<cv::DMatch> feature_matches_;
//根據上面一堆得到的結果
SE3 T_c_r_estimated_; // the estimated pose of current frame
//判斷好壞
int num_inliers_; // number of inlier features in icp
int num_lost_; // number of lost times
// parameters
int num_of_features_; // number of features
double scale_factor_; // scale in image pyramid
int level_pyramid_; // number of pyramid levels
float match_ratio_; // ratio for selecting good matches
int max_num_lost_; // max number of continuous lost times
int min_inliers_; // minimum inliers
double key_frame_min_rot; // minimal rotation of two key-frames
double key_frame_min_trans; // minimal translation of two key-frames
public: // functions
VisualOdometry();
~VisualOdometry();
bool addFrame( Frame::Ptr frame ); // add a new frame
protected:
// inner operation
void extractKeyPoints();
void computeDescriptors();
void featureMatching();
void poseEstimationPnP();
void setRef3DPoints();
void addKeyFrame();
bool checkEstimatedPose();
bool checkKeyFrame();
};
}
#endif // VISUALODOMETRY_H