代碼如下,詳細請參見注釋。
class Transaction { //class Transaction
//結構體Write
struct Write {
Row row; //行
Column col; //列
string value; //列值
};//Write Struct
vector<Write> writes_;//數據緩存Write
int start_ts_;//事務開始時間
Transaction() : start_ts_(oracle.GetTimestamp()) {} //構造函數,初始化變量start_ts_
/*
輸入:Write結構體
輸出:無
實現:簡單的把Write對象(列值)push到Vector中
*/
void Set(Write w) {//Set函數
writes_.push_back(w);
}
/*
輸入:Row-行標識,Column-列標識
輸出:value-列值,成功返回true,失敗(如沒有獲取值)返回false
*/
bool Get(Row row, Column c, string* value) {
while (true) {
//Bigtable提供的行級事務
bigtable::Txn T = bigtable::StartRowTransaction(row);
// Check for locks that signal concurrent writes.
// 檢查是否存在併發事務在寫數據
// 注:SI的特點是寫不阻塞讀,讀不阻塞寫,但在這裏卻需要等待?
// 原因是SI保證讀到的是事務開始(start_ts)之前已提交的數據,
// 存在鎖意味着寫操作未完成且該操作的commit_ts可能在事務開始之前,
// 但需要在寫入之後才能知道是否在start_ts之前,因此需要等待
if (T.Read(row, c+"lock", [0, start_ts_])) { //判斷[0, start_ts_]內是否存在lock?
// There is a pending lock; try to clean it and wait
// 仍存在lock,等待
BackoffAndMaybeCleanupLock(row, c);
continue;
}
// Find the latest write below our start timestamp.
//讀取最近已提交的數據版本
latest write = T.Read(row, c+"write", [0, start_ts_]);
if (!latest_write.found())
//沒有數據,返回false
return false; // no data
//從Column+write中獲取start_ts
int data_ts = latest_write.start_timestamp();
//獲取真正的數據:Row+Column(column+"data")+start_ts
*value = T.Read(row, c+"data", [data_ts, data_ts]);
return true;
}
}
// Prewrite tries to lock cell w, returning false in case of conflict.
// 預寫入(理論基礎:通過意向表緩存數據,執行延遲更新)
/*
輸入:Write結構體,Write主節點
輸出:成功返回true,失敗返回false
*/
bool Prewrite(Write w, Write primary) {
//獲取列
Column c = w.col;
//啓動Bigtable行事務
bigtable::Txn T = bigtable::StartRowTransaction(w.row);
// Abort on writes after our start timestamp ...
// 存在比事務啓動時間start_ts更大的值,存在ww衝突,按照FUW原則,本事務回滾
if (T.Read(w.row, c+"write", [start_ts_, ∞]))
return false;
// ... or locks at any timestamp.
// 存在鎖,說明未完成的寫,即存在ww衝突,且其他事務比本事務更"早"獲得鎖,本事務回滾
if (T.Read(w.row, c+"lock", [0, ∞]))
return false;
//校驗完畢,可以寫數據
//寫入數據:key=Row+Column(data)+start_ts,value=需寫入的值
T.Write(w.row, c+"data", start_ts_, w.value);
//上鎖,key=Row+Column(lock)+start_ts,value=主節點的行&列
T.Write(w.row, c+"lock", start_ts_,
{primary.row, primary.col}); // The primary’s location.
//執行提交操作
return T.Commit();
}
//提交操作
/*
輸入:無
輸出:成功返回true,失敗返回false
*/
bool Commit() {
// The primary’s location.
// 數組writes_的第一個元素爲主節點
Write primary = writes_[0];
// 除第一個元素外,其他爲從節點
vector<Write> secondaries(writes_.begin()+1, writes_.end());
//主節點預寫入失敗
if (!Prewrite(primary, primary))
return false;
//遍歷從節點,執行預寫入,一個節點不成功則全部失敗
for (Write w : secondaries)
if (!Prewrite(w, primary))
return false;
//獲取事務提交時間戳
int commit_ts = oracle_.GetTimestamp();
// Commit primary first.
// 主節點首先提交
Write p = primary;
//啓動Bigtable事務
bigtable::Txn T = bigtable::StartRowTransaction(p.row);
//謹慎起見,判斷是否存在鎖(本事務,start_ts唯一),避免重複寫入
if (!T.Read(p.row, p.col+"lock", [start_ts_, start_ts_]))
return false; // aborted while working
//寫入:key=Row+Column(write)+commit_ts,value=start_ts,實際的值在key=Row+Column(data)+start_ts中
T.Write(p.row, p.col+"write", commit_ts, start_ts_); // Pointer to data written at start_ts_.
//刪除鎖
T.Erase(p.row, p.col+"lock", commit_ts);
//Bigtable事務提交
if (!T.Commit())
return false; // commit point
// Second phase: write out write records for secondary cells.
//遍歷從節點,寫key=Row+Column(write)+commit_ts,value=start_ts,同時刪除鎖
for (Write w : secondaries) {
bigtable::Write(w.row, w.col+"write", commit_ts, start_ts_);
bigtable::Erase(w.row, w.col+"lock", commit_ts);
}
return true;
}
} // class Transaction