本節介紹了PostgreSQL啓動事務的邏輯,主要內容是函數StartTransaction的實現邏輯。
一、數據結構
靜態變量
當前事務狀態CurrentTransactionState
/*
* CurrentTransactionState always points to the current transaction state
* block. It will point to TopTransactionStateData when not in a
* transaction at all, or when in a top-level transaction.
* CurrentTransactionState通常指向當前事務塊.
* 如不處於事務中或者處於頂層事務中,則指向TopTransactionStateData
*/
static TransactionStateData TopTransactionStateData = {
.state = TRANS_DEFAULT,
.blockState = TBLOCK_DEFAULT,
};
/*
* unreportedXids holds XIDs of all subtransactions that have not yet been
* reported in an XLOG_XACT_ASSIGNMENT record.
* unreportedXids保存所有尚未在XLOG_XACT_ASSIGNMENT記錄的子事務.
*/
static int nUnreportedXids;
static TransactionId unreportedXids[PGPROC_MAX_CACHED_SUBXIDS];
static TransactionState CurrentTransactionState = &TopTransactionStateData;
/*
* The subtransaction ID and command ID assignment counters are global
* to a whole transaction, so we do not keep them in the state stack.
* subtransaction ID和command ID全局計數器,對事務可見,在state棧中不記錄這些信息.
*/
static SubTransactionId currentSubTransactionId;
static CommandId currentCommandId;
static bool currentCommandIdUsed;
TransactionState
事務狀態結構體
/*
* transaction states - transaction state from server perspective
* 事務狀態枚舉 - 服務器視角的事務狀態
*/
typedef enum TransState
{
TRANS_DEFAULT, /* idle 空閒 */
TRANS_START, /* transaction starting 事務啓動 */
TRANS_INPROGRESS, /* inside a valid transaction 進行中 */
TRANS_COMMIT, /* commit in progress 提交中 */
TRANS_ABORT, /* abort in progress 回滾中 */
TRANS_PREPARE /* prepare in progress 準備中 */
} TransState;
/*
* transaction block states - transaction state of client queries
* 事務塊狀態 - 客戶端查詢的事務狀態
*
* Note: the subtransaction states are used only for non-topmost
* transactions; the others appear only in the topmost transaction.
* 注意:subtransaction只用於非頂層事務;其他字段用於頂層事務.
*/
typedef enum TBlockState
{
/* not-in-transaction-block states 未進入事務塊狀態 */
TBLOCK_DEFAULT, /* idle 空閒 */
TBLOCK_STARTED, /* running single-query transaction 單個查詢事務 */
/* transaction block states 事務塊狀態 */
TBLOCK_BEGIN, /* starting transaction block 開始事務塊 */
TBLOCK_INPROGRESS, /* live transaction 進行中 */
TBLOCK_IMPLICIT_INPROGRESS, /* live transaction after implicit BEGIN 隱式事務,進行中 */
TBLOCK_PARALLEL_INPROGRESS, /* live transaction inside parallel worker 並行worker中的事務,進行中 */
TBLOCK_END, /* COMMIT received 接收到COMMIT */
TBLOCK_ABORT, /* failed xact, awaiting ROLLBACK 失敗,等待ROLLBACK */
TBLOCK_ABORT_END, /* failed xact, ROLLBACK received 失敗,已接收ROLLBACK */
TBLOCK_ABORT_PENDING, /* live xact, ROLLBACK received 進行中,接收到ROLLBACK */
TBLOCK_PREPARE, /* live xact, PREPARE received 進行中,接收到PREPARE */
/* subtransaction states 子事務狀態 */
TBLOCK_SUBBEGIN, /* starting a subtransaction 開啓 */
TBLOCK_SUBINPROGRESS, /* live subtransaction 進行中 */
TBLOCK_SUBRELEASE, /* RELEASE received 接收到RELEASE */
TBLOCK_SUBCOMMIT, /* COMMIT received while TBLOCK_SUBINPROGRESS 進行中,接收到COMMIT */
TBLOCK_SUBABORT, /* failed subxact, awaiting ROLLBACK 失敗,等待ROLLBACK */
TBLOCK_SUBABORT_END, /* failed subxact, ROLLBACK received 失敗,已接收ROLLBACK */
TBLOCK_SUBABORT_PENDING, /* live subxact, ROLLBACK received 進行中,接收到ROLLBACK */
TBLOCK_SUBRESTART, /* live subxact, ROLLBACK TO received 進行中,接收到ROLLBACK TO */
TBLOCK_SUBABORT_RESTART /* failed subxact, ROLLBACK TO received 失敗,已接收ROLLBACK TO */
} TBlockState;
/*
* transaction state structure
* 事務狀態結構體
*/
typedef struct TransactionStateData
{
//事務ID
TransactionId transactionId; /* my XID, or Invalid if none */
//子事務ID
SubTransactionId subTransactionId; /* my subxact ID */
//保存點名稱
char *name; /* savepoint name, if any */
//保存點級別
int savepointLevel; /* savepoint level */
//低級別的事務狀態
TransState state; /* low-level state */
//高級別的事務狀態
TBlockState blockState; /* high-level state */
//事務嵌套深度
int nestingLevel; /* transaction nesting depth */
//GUC上下文嵌套深度
int gucNestLevel; /* GUC context nesting depth */
//事務生命週期上下文
MemoryContext curTransactionContext; /* my xact-lifetime context */
//查詢資源
ResourceOwner curTransactionOwner; /* my query resources */
//按XID順序保存的已提交的子事務ID
TransactionId *childXids; /* subcommitted child XIDs, in XID order */
//childXids數組大小
int nChildXids; /* # of subcommitted child XIDs */
//分配的childXids數組空間
int maxChildXids; /* allocated size of childXids[] */
//上一個CurrentUserId
Oid prevUser; /* previous CurrentUserId setting */
//上一個SecurityRestrictionContext
int prevSecContext; /* previous SecurityRestrictionContext */
//上一事務是否只讀?
bool prevXactReadOnly; /* entry-time xact r/o state */
//是否處於Recovery?
bool startedInRecovery; /* did we start in recovery? */
//XID是否已保存在WAL Record中?
bool didLogXid; /* has xid been included in WAL record? */
//Enter/ExitParallelMode計數器
int parallelModeLevel; /* Enter/ExitParallelMode counter */
//父事務狀態
struct TransactionStateData *parent; /* back link to parent */
} TransactionStateData;
//結構體指針
typedef TransactionStateData *TransactionState;
VirtualTransactionId
VirtualTransactionIDs由執行事務的後臺進程BackendId和邏輯分配的LocalTransactionId組成.
/*
* Top-level transactions are identified by VirtualTransactionIDs comprising
* the BackendId of the backend running the xact, plus a locally-assigned
* LocalTransactionId. These are guaranteed unique over the short term,
* but will be reused after a database restart; hence they should never
* be stored on disk.
* 最高層的事務通過VirtualTransactionIDs定義.
* VirtualTransactionIDs由執行事務的後臺進程BackendId和邏輯分配的LocalTransactionId組成.
*
* Note that struct VirtualTransactionId can not be assumed to be atomically
* assignable as a whole. However, type LocalTransactionId is assumed to
* be atomically assignable, and the backend ID doesn't change often enough
* to be a problem, so we can fetch or assign the two fields separately.
* We deliberately refrain from using the struct within PGPROC, to prevent
* coding errors from trying to use struct assignment with it; instead use
* GET_VXID_FROM_PGPROC().
* 請注意,不能假設struct VirtualTransactionId作爲一個整體是原子可分配的。
* 但是,類型LocalTransactionId是假定原子可分配的,同時後臺進程ID不會經常變換,因此這不是一個問題,
* 因此我們可以單獨提取或者分配這兩個域字段.
*
*/
typedef struct
{
BackendId backendId; /* determined at backend startup */
LocalTransactionId localTransactionId; /* backend-local transaction id */
} VirtualTransactionId;
二、源碼解讀
StartTransaction函數,用於啓動事務,設置事務狀態爲TRANS_INPROGRESS,CurrentTransactionState->state = TRANS_INPROGRESS.
/*
* StartTransaction
* 啓動事務
*/
static void
StartTransaction(void)
{
TransactionState s;//事務狀態
VirtualTransactionId vxid;//虛擬事務ID
/*
* Let's just make sure the state stack is empty
* 確保事務棧是空的
*/
s = &TopTransactionStateData;
CurrentTransactionState = s;
Assert(XactTopTransactionId == InvalidTransactionId);
/* check the current transaction state */
//檢查當前事務狀態
Assert(s->state == TRANS_DEFAULT);
/*
* Set the current transaction state information appropriately during
* start processing. Note that once the transaction status is switched
* this process cannot fail until the user ID and the security context
* flags are fetched below.
* 在啓動過程中設置當前事務狀態信息。
* 請注意,一旦切換了事務狀態,在後續獲取用戶ID和安全上下文標誌前,不會出現異常。
*/
s->state = TRANS_START;
//無效事務ID,待分配
s->transactionId = InvalidTransactionId; /* until assigned */
/*
* initialize current transaction state fields
* 初始化當前事務狀態字段
*
* note: prevXactReadOnly is not used at the outermost level
* 注意:prevXactReadOnly不會在最外層中使用
*/
s->nestingLevel = 1;
s->gucNestLevel = 1;
s->childXids = NULL;
s->nChildXids = 0;
s->maxChildXids = 0;
/*
* Once the current user ID and the security context flags are fetched,
* both will be properly reset even if transaction startup fails.
* 一旦當前用戶ID和安全上下文標記已提取,即使事務啓動失敗,也會正確地重置它們。
*/
GetUserIdAndSecContext(&s->prevUser, &s->prevSecContext);
/* SecurityRestrictionContext should never be set outside a transaction */
//SecurityRestrictionContext不應在事務外設置
Assert(s->prevSecContext == 0);
/*
* Make sure we've reset xact state variables
* 確保已重置了xact狀態變量
*
* If recovery is still in progress, mark this transaction as read-only.
* We have lower level defences in XLogInsert and elsewhere to stop us
* from modifying data during recovery, but this gives the normal
* indication to the user that the transaction is read-only.
* 如仍處於恢復過程,標誌此事務爲只讀.
* 在XLogInsert中和其他地方有低級別的保護機制確保在恢復過程中不會更新數據,
* 只是給用戶正常的提示,說明事務只讀.
*/
if (RecoveryInProgress())
{
//只讀狀態
s->startedInRecovery = true;
XactReadOnly = true;
}
else
{
s->startedInRecovery = false;
XactReadOnly = DefaultXactReadOnly;
}
XactDeferrable = DefaultXactDeferrable;
XactIsoLevel = DefaultXactIsoLevel;
forceSyncCommit = false;
MyXactFlags = 0;
/*
* reinitialize within-transaction counters
* 重新初始化事務內計數器
*/
s->subTransactionId = TopSubTransactionId;
currentSubTransactionId = TopSubTransactionId;
currentCommandId = FirstCommandId;
currentCommandIdUsed = false;
/*
* initialize reported xid accounting
* 初始化已報告的事務計數
*/
nUnreportedXids = 0;
s->didLogXid = false;
/*
* must initialize resource-management stuff first
* 必須首先初始化資源管理器
*/
AtStart_Memory();
AtStart_ResourceOwner();
/*
* Assign a new LocalTransactionId, and combine it with the backendId to
* form a virtual transaction id.
* 分配新的本地事務ID(LocalTransactionId),
* 與backendId組成虛擬事務ID.
*/
vxid.backendId = MyBackendId;
vxid.localTransactionId = GetNextLocalTransactionId();
/*
* Lock the virtual transaction id before we announce it in the proc array
* 在proc array聲明前,鎖定虛擬事務ID
*/
VirtualXactLockTableInsert(vxid);
/*
* Advertise it in the proc array. We assume assignment of
* LocalTransactionID is atomic, and the backendId should be set already.
* 在proc array中聲明.
* 假定LocalTransactionID是原子的,backendId已分配.
*/
Assert(MyProc->backendId == vxid.backendId);
MyProc->lxid = vxid.localTransactionId;
TRACE_POSTGRESQL_TRANSACTION_START(vxid.localTransactionId);
/*
* set transaction_timestamp() (a/k/a now()). Normally, we want this to
* be the same as the first command's statement_timestamp(), so don't do a
* fresh GetCurrentTimestamp() call (which'd be expensive anyway). But
* for transactions started inside procedures (i.e., nonatomic SPI
* contexts), we do need to advance the timestamp. Also, in a parallel
* worker, the timestamp should already have been provided by a call to
* SetParallelStartTimestamps().
* 設置transaction_timestamp.
* 正常來說,期望該值與第一條命令的statement_timestamp一樣,這樣就不需要
* 調用GetCurrentTimestamp進行刷新(昂貴的操作!).
* 但對於在過程中啓動的事務(如非原子的SPI上下文),我們確實需要增加時間戳.
* 同樣的,在並行worker中,時間戳應通過外層調用SetParallelStartTimestamps提供.
*/
if (!IsParallelWorker())
{
if (!SPI_inside_nonatomic_context())
xactStartTimestamp = stmtStartTimestamp;
else
xactStartTimestamp = GetCurrentTimestamp();
}
else
Assert(xactStartTimestamp != 0);
pgstat_report_xact_timestamp(xactStartTimestamp);
/* Mark xactStopTimestamp as unset. */
//標記xactStopTimestamp未設置
xactStopTimestamp = 0;
/*
* initialize other subsystems for new transaction
* 爲新事務初始化其他子系統(GUC/Cache等)
*/
AtStart_GUC();
AtStart_Cache();
AfterTriggerBeginXact();
/*
* done with start processing, set current transaction state to "in
* progress"
* 已完成啓動過程,設置事務狀態爲TRANS_INPROGRESS
*/
s->state = TRANS_INPROGRESS;
ShowTransactionState("StartTransaction");
}
三、跟蹤分析
執行begin,觸發該函數調用
11:10:36 (xdb@[local]:5432)testdb=# begin;
啓動gdb,設置斷點
(gdb) b StartTransaction
Breakpoint 4 at 0x54800f: file xact.c, line 1825.
(gdb) c
Continuing.
Breakpoint 4, StartTransaction () at xact.c:1825
1825 s = &TopTransactionStateData;
(gdb)
查看調用棧
(gdb) bt
#0 StartTransaction () at xact.c:1825
#1 0x0000000000548f50 in StartTransactionCommand () at xact.c:2718
#2 0x00000000008c8e7d in start_xact_command () at postgres.c:2500
#3 0x00000000008c6771 in exec_simple_query (query_string=0x24a6ec8 "begin;") at postgres.c:948
#4 0x00000000008cae70 in PostgresMain (argc=1, argv=0x24d2dc8, dbname=0x24d2c30 "testdb", username=0x24a3ba8 "xdb")
at postgres.c:4182
#5 0x000000000082642b in BackendRun (port=0x24c8c00) at postmaster.c:4361
#6 0x0000000000825b8f in BackendStartup (port=0x24c8c00) at postmaster.c:4033
#7 0x0000000000821f1c in ServerLoop () at postmaster.c:1706
#8 0x00000000008217b4 in PostmasterMain (argc=1, argv=0x24a1b60) at postmaster.c:1379
#9 0x00000000007488ef in main (argc=1, argv=0x24a1b60) at main.c:228
(gdb)
查看TopTransactionStateData全局變量(尚未初始化)
(gdb) p TopTransactionStateData
$7 = {transactionId = 0, subTransactionId = 0, name = 0x0, savepointLevel = 0, state = TRANS_DEFAULT,
blockState = TBLOCK_DEFAULT, nestingLevel = 0, gucNestLevel = 0, curTransactionContext = 0x0, curTransactionOwner = 0x0,
childXids = 0x0, nChildXids = 0, maxChildXids = 0, prevUser = 10, prevSecContext = 0, prevXactReadOnly = false,
startedInRecovery = false, didLogXid = true, parallelModeLevel = 0, parent = 0x0}
設置全局變量CurrentTransactionState = & TopTransactionStateData;
(gdb) n
1826 CurrentTransactionState = s;
(gdb)
1828 Assert(XactTopTransactionId == InvalidTransactionId);
(gdb)
初始化事務狀態
(gdb) n
1833 if (s->state != TRANS_DEFAULT)
(gdb)
1841 s->state = TRANS_START;
(gdb)
1842 s->transactionId = InvalidTransactionId; /* until assigned */
(gdb)
1852 if (RecoveryInProgress())
(gdb)
1859 s->startedInRecovery = false;
(gdb)
1860 XactReadOnly = DefaultXactReadOnly;
(gdb)
1862 XactDeferrable = DefaultXactDeferrable;
(gdb)
1863 XactIsoLevel = DefaultXactIsoLevel;
(gdb)
1864 forceSyncCommit = false;
(gdb)
1865 MyXactFlags = 0;
(gdb)
1870 s->subTransactionId = TopSubTransactionId;
(gdb)
1871 currentSubTransactionId = TopSubTransactionId;
(gdb)
1872 currentCommandId = FirstCommandId;
(gdb)
1873 currentCommandIdUsed = false;
(gdb)
1878 nUnreportedXids = 0;
(gdb)
1879 s->didLogXid = false;
(gdb)
1884 AtStart_Memory();
(gdb)
啓動subsystem(內存/GUC/Cache等)
(gdb)
1884 AtStart_Memory();
(gdb) n
1885 AtStart_ResourceOwner();
(gdb)
設置虛擬事務ID
1891 vxid.backendId = MyBackendId;
(gdb)
1892 vxid.localTransactionId = GetNextLocalTransactionId();
(gdb)
1897 VirtualXactLockTableInsert(vxid);
(gdb)
1903 Assert(MyProc->backendId == vxid.backendId);
(gdb) p vxid
$8 = {backendId = 3, localTransactionId = 6}
(gdb)
(gdb) n
1904 MyProc->lxid = vxid.localTransactionId;
(gdb)
設置時間戳
1906 TRACE_POSTGRESQL_TRANSACTION_START(vxid.localTransactionId);
(gdb)
1917 if (!IsParallelWorker())
(gdb)
1919 if (!SPI_inside_nonatomic_context())
(gdb)
1920 xactStartTimestamp = stmtStartTimestamp;
(gdb)
1926 pgstat_report_xact_timestamp(xactStartTimestamp);
(gdb)
1928 xactStopTimestamp = 0;
(gdb)
(gdb) p xactStartTimestamp
$9 = 601009839154257
初始化其他字段
(gdb) n
1935 s->nestingLevel = 1;
(gdb) n
1936 s->gucNestLevel = 1;
(gdb)
1937 s->childXids = NULL;
(gdb)
1938 s->nChildXids = 0;
(gdb)
1939 s->maxChildXids = 0;
(gdb)
1940 GetUserIdAndSecContext(&s->prevUser, &s->prevSecContext);
(gdb)
1942 Assert(s->prevSecContext == 0);
(gdb)
1947 AtStart_GUC();
(gdb)
1948 AtStart_Cache();
(gdb)
1949 AfterTriggerBeginXact();
(gdb)
1955 s->state = TRANS_INPROGRESS;
(gdb)
1957 ShowTransactionState("StartTransaction");
(gdb)
1958 }
(gdb)
初始化後的事務狀態
(gdb) p *s
$10 = {transactionId = 0, subTransactionId = 1, name = 0x0, savepointLevel = 0, state = TRANS_INPROGRESS,
blockState = TBLOCK_DEFAULT, nestingLevel = 1, gucNestLevel = 1, curTransactionContext = 0x2523850,
curTransactionOwner = 0x24d4868, childXids = 0x0, nChildXids = 0, maxChildXids = 0, prevUser = 10, prevSecContext = 0,
prevXactReadOnly = false, startedInRecovery = false, didLogXid = false, parallelModeLevel = 0, parent = 0x0}
(gdb)
完成調用
(gdb) n
StartTransactionCommand () at xact.c:2719
2719 s->blockState = TBLOCK_STARTED;
(gdb)
2720 break;
(gdb)
DONE!