c++stl vector源碼簡析
vector是我們經常用的容易之一,我們現在來簡單看一下vector的部分源碼理解vector的行爲以便我們在使用過程中更得心應手
首先看一下vector的alloc基類:
template <class _Tp, class _Allocator, bool _IsStatic>
class _Vector_alloc_base {
public:
typedef typename _Alloc_traits<_Tp, _Allocator>::allocator_type
allocator_type;
allocator_type get_allocator() const { return _M_data_allocator; }
_Vector_alloc_base(const allocator_type& __a)
: _M_data_allocator(__a), _M_start(0), _M_finish(0), _M_end_of_storage(0)
{}
protected:
allocator_type _M_data_allocator;
_Tp* _M_start;
_Tp* _M_finish;
_Tp* _M_end_of_storage;
_Tp* _M_allocate(size_t __n)
{ return _M_data_allocator.allocate(__n); }
void _M_deallocate(_Tp* __p, size_t __n)
{ if (__p) _M_data_allocator.deallocate(__p, __n); }
};
可以看到成員變量部分有start,finish ,end
首先說明一點vector是一個長度可變的容器,爲了保證它在末尾插入的效率 它實際的存儲空間是大於它的size的
size大小爲start到finish,實際容量是start到end;
成員變量還有一個分配器,源碼中有這樣的註釋
// Base class for ordinary allocators.
這是一個普通的分配器
源碼中還定義了另一個類
// Specialization for allocators that have the property that we don't
// actually have to store an allocator object.
在這個類裏面是通過調用函數返回一個分配器對象 目前他們兩個的區別和作用我還不瞭解 等以後瞭解過後再來補充
接下來是vector的基類
template <class _Tp, class _Alloc>
class _Vector_base {
public:
typedef _Alloc allocator_type;
allocator_type get_allocator() const { return allocator_type(); }
_Vector_base(const _Alloc&)
: _M_start(0), _M_finish(0), _M_end_of_storage(0) {}
_Vector_base(size_t __n, const _Alloc&)
: _M_start(0), _M_finish(0), _M_end_of_storage(0)
{
_M_start = _M_allocate(__n);
_M_finish = _M_start;
_M_end_of_storage = _M_start + __n;
}
~_Vector_base() { _M_deallocate(_M_start, _M_end_of_storage - _M_start); }
protected:
_Tp* _M_start;
_Tp* _M_finish;
_Tp* _M_end_of_storage;
typedef simple_alloc<_Tp, _Alloc> _M_data_allocator;
_Tp* _M_allocate(size_t __n)
{ return _M_data_allocator::allocate(__n); }
void _M_deallocate(_Tp* __p, size_t __n)
{ _M_data_allocator::deallocate(__p, __n); }
};
可以看見有兩種構造函數一種傳入了分配空間的大小,一種沒有傳入大小
傳入大小的構造函數和析構函數分別調用了下面的分配內存的函數和清空內存的函數
接下來就是vector類的,因爲vecotor類代碼較長我就拆解下來看
public:
typedef _Tp value_type;
typedef value_type* pointer;
typedef const value_type* const_pointer;
typedef value_type* iterator;
typedef const value_type* const_iterator;
typedef value_type& reference;
typedef const value_type& const_reference;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef typename _Base::allocator_type allocator_type;
allocator_type get_allocator() const { return _Base::get_allocator(); }
#ifdef __STL_CLASS_PARTIAL_SPECIALIZATION
typedef reverse_iterator<const_iterator> const_reverse_iterator;
typedef reverse_iterator<iterator> reverse_iterator;
#else /* __STL_CLASS_PARTIAL_SPECIALIZATION */
typedef reverse_iterator<const_iterator, value_type, const_reference,
difference_type> const_reverse_iterator;
typedef reverse_iterator<iterator, value_type, reference, difference_type>
reverse_iterator;
#endif /* __STL_CLASS_PARTIAL_SPECIALIZATION */
看到typedef有我們熟悉的iterator和size_t等等 並且還有反向迭代器的的聲明
接下來是我們常用的vector內置函數:
iterator begin() { return _M_start; }
const_iterator begin() const { return _M_start; }
iterator end() { return _M_finish; }
const_iterator end() const { return _M_finish; }
reverse_iterator rbegin()
{ return reverse_iterator(end()); }
const_reverse_iterator rbegin() const
{ return const_reverse_iterator(end()); }
reverse_iterator rend()
{ return reverse_iterator(begin()); }
const_reverse_iterator rend() const
{ return const_reverse_iterator(begin()); }
size_type size() const
{ return size_type(end() - begin()); }
size_type max_size() const
{ return size_type(-1) / sizeof(_Tp); }
size_type capacity() const
{ return size_type(_M_end_of_storage - begin()); }
bool empty() const
{ return begin() == end(); }
reference operator[](size_type __n) { return *(begin() + __n); }
const_reference operator[](size_type __n) const { return *(begin() + __n); }
這部分應該比較簡單明瞭不需要過多地解釋,我們繼續往下看
void _M_range_check(size_type __n) const {
if (__n >= this->size())
__stl_throw_range_error("vector");
}
reference at(size_type __n)
{ _M_range_check(__n); return (*this)[__n]; }
const_reference at(size_type __n) const
{ _M_range_check(__n); return (*this)[__n]; }
這裏是範圍檢查函數如果範圍超出size就拋出異常at函數就是取回下標爲n的元素。不過這裏我有個疑問,爲什麼返回(*this)[n]而不是 (*start)[n]並且this指針指向實例對象的位置,難道是在實例對象內部先存放的vector元素的數組? 這個問題求證後再來說明
下面是vector的構造函數:
explicit vector(const allocator_type& __a = allocator_type())
: _Base(__a) {}
vector(size_type __n, const _Tp& __value,
const allocator_type& __a = allocator_type())
: _Base(__n, __a)
{ _M_finish = uninitialized_fill_n(_M_start, __n, __value); }
explicit vector(size_type __n)
: _Base(__n, allocator_type())
{ _M_finish = uninitialized_fill_n(_M_start, __n, _Tp()); }
vector(const vector<_Tp, _Alloc>& __x)
: _Base(__x.size(), __x.get_allocator())
{ _M_finish = uninitialized_copy(__x.begin(), __x.end(), _M_start); }
template <class _InputIterator>
vector(_InputIterator __first, _InputIterator __last,
const allocator_type& __a = allocator_type()) : _Base(__a) {
typedef typename _Is_integer<_InputIterator>::_Integral _Integral;
_M_initialize_aux(__first, __last, _Integral());
}
vector(const _Tp* __first, const _Tp* __last,
const allocator_type& __a = allocator_type())
: _Base(__last - __first, __a)
{ _M_finish = uninitialized_copy(__first, __last, _M_start); }
其中explicit是聲明爲顯示的意思
不懂可以參考一下:http://www.cnblogs.com/ymy124/p/3632634.html
注意第四個構造函數 用一個vector去初始化另一個vector實際上是把原始vector的size個大小的數據copy到新的vector裏面並且新的vector實際容量爲size而不是原始vectror的容量
最後一個構造函數同理
我們繼續看reserve函數的行爲:
void reserve(size_type __n) {
if (capacity() < __n) {
const size_type __old_size = size();
iterator __tmp = _M_allocate_and_copy(__n, _M_start, _M_finish);
destroy(_M_start, _M_finish);
_M_deallocate(_M_start, _M_end_of_storage - _M_start);
_M_start = __tmp;
_M_finish = __tmp + __old_size;
_M_end_of_storage = _M_start + __n;
}
}
可以看出當容量小於n時reserve函數是重新分配一個大小爲n的內存並且把原來的數據複製過去然後把原來的內存(注意:這裏大小是容量不是size)銷燬
// assign(), a generalized assignment member function. Two
// versions: one that takes a count, and one that takes a range.
// The range version is a member template, so we dispatch on whether
// or not the type is an integer.
//assign有兩個版本一個輸入數字一個輸入範圍 範圍的是一個模板所以不管參數是不是整型都可以調用
void vector<_Tp, _Alloc>::_M_fill_assign(size_t __n, const value_type& __val)
{
if (__n > capacity()) {
vector<_Tp, _Alloc> __tmp(__n, __val, get_allocator());
__tmp.swap(*this);
}
else if (__n > size()) {
fill(begin(), end(), __val);
_M_finish = uninitialized_fill_n(_M_finish, __n - size(), __val);
}
else
erase(fill_n(begin(), __n, __val), end());
}
template <class _Tp, class _Alloc> template <class _InputIter>
void vector<_Tp, _Alloc>::_M_assign_aux(_InputIter __first, _InputIter __last,
input_iterator_tag) {
iterator __cur = begin();
for ( ; __first != __last && __cur != end(); ++__cur, ++__first)
*__cur = *__first;
if (__first == __last)
erase(__cur, end());
else
insert(end(), __first, __last);
}
template <class _InputIterator>
void assign(_InputIterator __first, _InputIterator __last) {
typedef typename _Is_integer<_InputIterator>::_Integral _Integral;
_M_assign_dispatch(__first, __last, _Integral());
}
關於assign還有很多函數這裏就不一一列出了,繼續分析我們常用的部分
接下來是front ,back ,push_back
reference front() { return *begin(); }
const_reference front() const { return *begin(); }
reference back() { return *(end() - 1); }
const_reference back() const { return *(end() - 1); }
void push_back(const _Tp& __x) {
if (_M_finish != _M_end_of_storage) {
construct(_M_finish, __x);
++_M_finish;
}
else
_M_insert_aux(end(), __x);
}
void push_back() {
if (_M_finish != _M_end_of_storage) {
construct(_M_finish);
++_M_finish;
}
else
_M_insert_aux(end());
}
前面都很簡單,不過沒有參數的那個push_back是個什麼鬼,我居然之前不知道有這個東西。那就來看一下吧,嗯,區別就是兩個調用的construct參數不一樣,可能會給一個默認值吧。可是找了半天居然這個文件的源碼裏面沒找到construct,不管了,繼續。
void swap(vector<_Tp, _Alloc>& __x) {
__STD::swap(_M_start, __x._M_start);
__STD::swap(_M_finish, __x._M_finish);
__STD::swap(_M_end_of_storage, __x._M_end_of_storage);
}
swap就很簡單了,只是把他們指針的值交換了一下
下面是insert
iterator insert(iterator __position, const _Tp& __x) {
size_type __n = __position - begin();
if (_M_finish != _M_end_of_storage && __position == end()) {
construct(_M_finish, __x);
++_M_finish;
}
else
_M_insert_aux(__position, __x);
return begin() + __n;
}
iterator insert(iterator __position) {
size_type __n = __position - begin();
if (_M_finish != _M_end_of_storage && __position == end()) {
construct(_M_finish);
++_M_finish;
}
else
_M_insert_aux(__position);
return begin() + __n;
}
插入同push_back判斷一下插入位置是不是end是end就執行push_back一樣的操作,否則就正兒八經地插入進去
下面是pop_back,erase
void pop_back() {
--_M_finish;
destroy(_M_finish);
}
iterator erase(iterator __position) {
if (__position + 1 != end())
copy(__position + 1, _M_finish, __position);
--_M_finish;
destroy(_M_finish);
return __position;
}
iterator erase(iterator __first, iterator __last) {
iterator __i = copy(__last, _M_finish, __first);
destroy(__i, _M_finish);
_M_finish = _M_finish - (__last - __first);
return __first;
}
pop_back簡單吧,erase刪除一個節點的時候是把position+1到finish的內存複製到position到finish-1
而erase刪除範圍的時候是把last+1到finish的內存複製到first到finish-last+first,說起來有點繞,你們畫個圖就明白了
繼續resize和clear
void resize(size_type __new_size, const _Tp& __x) {
if (__new_size < size())
erase(begin() + __new_size, end());
else
insert(end(), __new_size - size(), __x);
}
void resize(size_type __new_size) { resize(__new_size, _Tp()); }
void clear() { erase(begin(), end()); }
如果新的size小於當前的size就會裁剪爲新的size並且把其餘部分的數據清空撒刪除
否則就會用x把未賦值的部分填充使得size等於new size
template <class _InputIterator>
void _M_range_initialize(_InputIterator __first,
_InputIterator __last, input_iterator_tag)
{
for ( ; __first != __last; ++__first)
push_back(*__first);
}
範圍初始化就是按序把元素push_back(注意這裏的inputiterator只能迭代一次所以要按序插入)再看下面
template <class _ForwardIterator>
void _M_range_initialize(_ForwardIterator __first,
_ForwardIterator __last, forward_iterator_tag)
{
size_type __n = 0;
distance(__first, __last, __n);
_M_start = _M_allocate(__n);
_M_end_of_storage = _M_start + __n;
_M_finish = uninitialized_copy(__first, __last, _M_start);
}
這裏就是forwarditerator可以反覆使用 我們通過distance++遍歷就可以預先知道長度然後用內存拷貝的方式初始化
下面是運算符重載
==
template <class _Tp, class _Alloc>
inline bool
operator==(const vector<_Tp, _Alloc>& __x, const vector<_Tp, _Alloc>& __y)
{
return __x.size() == __y.size() &&
equal(__x.begin(), __x.end(), __y.begin());
}
<
template <class _Tp, class _Alloc>
inline bool
operator<(const vector<_Tp, _Alloc>& __x, const vector<_Tp, _Alloc>& __y)
{
return lexicographical_compare(__x.begin(), __x.end(),
__y.begin(), __y.end());
}
其餘的比較運算符重載函數都是調用這兩個來運算的
如x>y等價於y
vector<_Tp,_Alloc>::operator=(const vector<_Tp, _Alloc>& __x)
{
if (&__x != this) {
const size_type __xlen = __x.size();
if (__xlen > capacity()) {
iterator __tmp = _M_allocate_and_copy(__xlen, __x.begin(), __x.end());
destroy(_M_start, _M_finish);
_M_deallocate(_M_start, _M_end_of_storage - _M_start);
_M_start = __tmp;
_M_end_of_storage = _M_start + __xlen;
}
else if (size() >= __xlen) {
iterator __i = copy(__x.begin(), __x.end(), begin());
destroy(__i, _M_finish);
}
else {
copy(__x.begin(), __x.begin() + size(), _M_start);
uninitialized_copy(__x.begin() + size(), __x.end(), _M_finish);
}
_M_finish = _M_start + __xlen;
}
return *this;
}
先判斷防止自我賦值
接下來比較大小如果容量小於x的size則重新分配內存
如果容量大於x的size直接複製並把多餘的內存清空
如果容量大於x的size並且自己的size小於x的size則先把自己已初始化的內存複製在把未初始化的部分初始化
下面是一個指定位置插入元素的函數
template <class _Tp, class _Alloc>
void
vector<_Tp, _Alloc>::_M_insert_aux(iterator __position, const _Tp& __x)
{
if (_M_finish != _M_end_of_storage) {
construct(_M_finish, *(_M_finish - 1));
++_M_finish;
_Tp __x_copy = __x;
copy_backward(__position, _M_finish - 2, _M_finish - 1);
*__position = __x_copy;
}
else {
const size_type __old_size = size();
//這裏可以看出vector增長是指數增長的
const size_type __len = __old_size != 0 ? 2 * __old_size : 1;
iterator __new_start = _M_allocate(__len);
iterator __new_finish = __new_start;
__STL_TRY {
__new_finish = uninitialized_copy(_M_start, __position, __new_start);
construct(__new_finish, __x);
++__new_finish;
__new_finish = uninitialized_copy(__position, _M_finish, __new_finish);
}
__STL_UNWIND((destroy(__new_start,__new_finish),
_M_deallocate(__new_start,__len)));
destroy(begin(), end());
_M_deallocate(_M_start, _M_end_of_storage - _M_start);
_M_start = __new_start;
_M_finish = __new_finish;
_M_end_of_storage = __new_start + __len;
}
}
這裏我們看出在插入元素的時候如果內存不足則vector的增長模式爲翻倍,和我之前在網上看到的不同
網上的說法是增長當前容量的一半也就是new storage=1.5 old storage這裏可能是c++11對內存增長策略做出了調整 保留兩種說法
再接下來就是jinsert函數的集中重載,代碼較多但是原理都和其他函數差不多,這裏就不再講解了
到這裏就把vector的源碼簡單過了一遍,其中掠過了很多細節,但瞭解大體對我們的提升也很大,以後有時間和精力再回過頭來摳細節,相信對我們提升回更大。