Google Percolator SI實現

代碼如下,詳細請參見注釋。


class Transaction { //class Transaction
  //結構體Write
  struct Write {
    Row row;        //行
    Column col;     //列
    string value;   //列值
  };//Write Struct 
  vector<Write> writes_;//數據緩存Write
  int start_ts_;//事務開始時間

  Transaction() : start_ts_(oracle.GetTimestamp()) {} //構造函數,初始化變量start_ts_
  
  /*
    輸入:Write結構體
    輸出:無
    實現:簡單的把Write對象(列值)push到Vector中
  */
  void Set(Write w) {//Set函數
    writes_.push_back(w);   
  }
  
  /*
    輸入:Row-行標識,Column-列標識
    輸出:value-列值,成功返回true,失敗(如沒有獲取值)返回false
  */
  bool Get(Row row, Column c, string* value) {
    while (true) {
      //Bigtable提供的行級事務
      bigtable::Txn T = bigtable::StartRowTransaction(row);
      // Check for locks that signal concurrent writes.
      // 檢查是否存在併發事務在寫數據
      // 注:SI的特點是寫不阻塞讀,讀不阻塞寫,但在這裏卻需要等待?
      //     原因是SI保證讀到的是事務開始(start_ts)之前已提交的數據,
      //     存在鎖意味着寫操作未完成且該操作的commit_ts可能在事務開始之前,
      //     但需要在寫入之後才能知道是否在start_ts之前,因此需要等待
      if (T.Read(row, c+"lock", [0, start_ts_])) { //判斷[0, start_ts_]內是否存在lock?
        // There is a pending lock; try to clean it and wait
        // 仍存在lock,等待
        BackoffAndMaybeCleanupLock(row, c);
        continue;
      }
      // Find the latest write below our start timestamp.
      //讀取最近已提交的數據版本
      latest write = T.Read(row, c+"write", [0, start_ts_]);
      if (!latest_write.found())
        //沒有數據,返回false
        return false; // no data
      //從Column+write中獲取start_ts
      int data_ts = latest_write.start_timestamp();
      //獲取真正的數據:Row+Column(column+"data")+start_ts
      *value = T.Read(row, c+"data", [data_ts, data_ts]);
      return true;
    }
  }
  
  // Prewrite tries to lock cell w, returning false in case of conflict.
  // 預寫入(理論基礎:通過意向表緩存數據,執行延遲更新)
  /*    
    輸入:Write結構體,Write主節點
    輸出:成功返回true,失敗返回false
  */
  bool Prewrite(Write w, Write primary) {
    //獲取列
    Column c = w.col;
    //啓動Bigtable行事務
    bigtable::Txn T = bigtable::StartRowTransaction(w.row);
    // Abort on writes after our start timestamp ...
    // 存在比事務啓動時間start_ts更大的值,存在ww衝突,按照FUW原則,本事務回滾
    if (T.Read(w.row, c+"write", [start_ts_, ∞]))
      return false;
    // ... or locks at any timestamp.
    // 存在鎖,說明未完成的寫,即存在ww衝突,且其他事務比本事務更"早"獲得鎖,本事務回滾
    if (T.Read(w.row, c+"lock", [0, ∞]))
      return false;

    //校驗完畢,可以寫數據
    //寫入數據:key=Row+Column(data)+start_ts,value=需寫入的值
    T.Write(w.row, c+"data", start_ts_, w.value);
    //上鎖,key=Row+Column(lock)+start_ts,value=主節點的行&列
    T.Write(w.row, c+"lock", start_ts_,
      {primary.row, primary.col}); // The primary’s location.
    //執行提交操作
    return T.Commit();
  }

  //提交操作
  /*
    輸入:無
    輸出:成功返回true,失敗返回false
  */
  bool Commit() {
    // The primary’s location.
    // 數組writes_的第一個元素爲主節點
    Write primary = writes_[0];
    // 除第一個元素外,其他爲從節點
    vector<Write> secondaries(writes_.begin()+1, writes_.end());
    //主節點預寫入失敗
    if (!Prewrite(primary, primary))
      return false;
    //遍歷從節點,執行預寫入,一個節點不成功則全部失敗
    for (Write w : secondaries)
      if (!Prewrite(w, primary))
        return false;

    //獲取事務提交時間戳
    int commit_ts = oracle_.GetTimestamp();
    // Commit primary first.
    // 主節點首先提交
    Write p = primary;
    //啓動Bigtable事務
    bigtable::Txn T = bigtable::StartRowTransaction(p.row);
    //謹慎起見,判斷是否存在鎖(本事務,start_ts唯一),避免重複寫入
    if (!T.Read(p.row, p.col+"lock", [start_ts_, start_ts_]))
      return false; // aborted while working
    //寫入:key=Row+Column(write)+commit_ts,value=start_ts,實際的值在key=Row+Column(data)+start_ts中
    T.Write(p.row, p.col+"write", commit_ts, start_ts_); // Pointer to data written at start_ts_.
    //刪除鎖
    T.Erase(p.row, p.col+"lock", commit_ts);
    //Bigtable事務提交
    if (!T.Commit())
      return false; // commit point
    // Second phase: write out write records for secondary cells.
    //遍歷從節點,寫key=Row+Column(write)+commit_ts,value=start_ts,同時刪除鎖
    for (Write w : secondaries) {
      bigtable::Write(w.row, w.col+"write", commit_ts, start_ts_);
      bigtable::Erase(w.row, w.col+"lock", commit_ts);
    }
    return true;
  }
} // class Transaction
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章