英文連接:https://cstack.github.io/db_tutorial/parts/part6.html
爲了實現B-tree,這一節先對當前的實現進行一點重構。
我們增加一個概念:Cursor(遊標),代表了對象在數據庫中的位置。那麼關於cursor有幾件事需要完成
1、在表之前創建cursor
2、在表之後創建cursor
3、訪問cursor所指向的行
4、訪問cursor的下一行
完成這些之後,我們還會繼續實現:
1、使用cursor刪除行
2、使用cursor修改行
3、使用ID查詢表,並且在在該行後創建cursor
在沒有實現ado,Cursor類型暫時按如下定義:
批註:ADO 連接對象(ADO Connection Object)
ADO 連接對象用來創建到某個數據源的開放連接。通過此連接,您可以對此數據庫進行訪問和操作。
查看此連接對象的所有方法和屬性
struct Cursor_t {
Table* table;
uint32_t row_num;
bool end_of_table; // Indicates a position one past the last element
};
typedef struct Cursor_t Cursor;
table_start()
and table_end()
create new cursors:
Cursor* table_start(Table* table) {
Cursor* cursor = malloc(sizeof(Cursor));
cursor->table = table;
cursor->row_num = 0;
cursor->end_of_table = (table->num_rows == 0);
return cursor;
}
Cursor* table_end(Table* table) {
Cursor* cursor = malloc(sizeof(Cursor));
cursor->table = table;
cursor->row_num = table->num_rows;
cursor->end_of_table = true;
return cursor;
}
row_slot()修改爲cursor_value()
, 該函數作用:指向了sursor所執行的位置
定義1個函數cursor_advance,實現對num_rows加1。
void cursor_advance(Cursor* cursor) {
cursor->row_num = 1;
if (cursor->row_num >= cursor->table->num_rows) {
cursor->end_of_table = true;
}
}
最後,我們修改“virtual machine”,改用抽象的對象:Cursor。當插入一行時,我們打開一個Cursor,指向表尾。在cursor後寫入後,關閉Cursor。
Row* row_to_insert = &(statement->row_to_insert);
+ Cursor* cursor = table_end(table);
- serialize_row(row_to_insert, row_slot(table, table->num_rows));
+ serialize_row(row_to_insert, cursor_value(cursor));
table->num_rows += 1;
+ free(cursor);
+
return EXECUTE_SUCCESS;
}
同理,修改execute_select的實現,使用cursor替換row_slot
ExecuteResult execute_select(Statement* statement, Table* table) {
+ Cursor* cursor = table_start(table);
+
Row row;
- for (uint32_t i = 0; i < table->num_rows; i++) {
- deserialize_row(row_slot(table, i), &row);
+ while (!(cursor->end_of_table)) {
+ deserialize_row(cursor_value(cursor), &row);
print_row(&row);
+ cursor_advance(cursor);
}
+
+ free(cursor);
+
return EXECUTE_SUCCESS;
}
至此,execute_select和execute_insert就不需要在做任何關於表存儲的假設,就可以通過cursor和table進行交互了。
還是使用上節的用例測試下:
db > insert 1 cstack [email protected]
Executed.
db > insert 2 hello [email protected]
Executed.
db > select
(1, cstack, [email protected])
(2, hello, [email protected])
(1, cstack, [email protected])
(2, hello, [email protected])
Executed.
db >
至此最新代碼:
#include <errno.h>
#include <fcntl.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
/* 定義 元數據操作結果*/
enum MetaCommandResult_t {
META_COMMAND_SUCCESS,
META_COMMAND_UNRECOGNIZED_COMMAND
};
typedef enum MetaCommandResult_t MetaCommandResult;
/* 執行結果*/
enum ExecuteResult_t { EXECUTE_SUCCESS, EXECUTE_TABLE_FULL };
typedef enum ExecuteResult_t ExecuteResult;
/* sql解析結果 */
enum PrepareResult_t {
PREPARE_SUCCESS,
PREPARE_NEGATIVE_ID,
PREPARE_STRING_TOO_LONG,
PREPARE_SYNTAX_ERROR,
PREPARE_UNRECOGNIZED_STATEMENT
};
typedef enum PrepareResult_t PrepareResult;
/* 行定義,對應具體的業務 */
const uint32_t COLUMN_USERNAME_SIZE = 32;
const uint32_t COLUMN_EMAIL_SIZE = 255;
struct Row_t {
uint32_t id;
char username[COLUMN_USERNAME_SIZE + 1];
char email[COLUMN_EMAIL_SIZE + 1];
};
typedef struct Row_t Row;
/* sql type*/
enum StatementType_t{
STATEMENT_INSERT,
STATEMENT_SELECT
};
typedef enum StatementType_t StatementType;
struct Statement_t {
StatementType type;
Row row_to_insert; /* only used by insert statement */
};
typedef struct Statement_t Statement;
/**/
#define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute)
const uint32_t ID_SIZE = size_of_attribute(Row, id);
const uint32_t USERNAME_SIZE = size_of_attribute(Row, username);
const uint32_t EMAIL_SIZE = size_of_attribute(Row, email);
const uint32_t ID_OFFSET = 0;
const uint32_t USERNAME_OFFSET = ID_OFFSET + ID_SIZE;
const uint32_t EMAIL_OFFSET = USERNAME_OFFSET + USERNAME_SIZE;
const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE;
/* 大部分系統結構的頁大小都是4K ,所以這裏也定義爲4K 這樣就不用做轉換 */
const uint32_t PAGE_SIZE = 4096;
const uint32_t TABLE_MAX_PAGES = 100;
const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE;
const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES;
/*
* 頁和表定義
*/
struct Pager_t {
int file_descriptor;
uint32_t file_length;
void *pages[TABLE_MAX_PAGES];
};
typedef struct Pager_t Pager;
struct Table_t {
Pager *pager;
uint32_t num_rows;
};
typedef struct Table_t Table;
/* 序列化: 將row寫到內存中 */
void serialize_row(Row* source, void* destination) {
memcpy(destination + ID_OFFSET, &(source->id), ID_SIZE);
memcpy(destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE);
memcpy(destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE);
}
/* 反序列化 */
void deserialize_row(void* source, Row* destination) {
memcpy(&(destination->id), source + ID_OFFSET, ID_SIZE);
memcpy(&(destination->username), source + USERNAME_OFFSET, USERNAME_SIZE);
memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE);
}
/**
* 獲取頁num對應的頁
* @param table
* @param row_num
* @return
*/
void *get_page(Pager *pager, uint32_t page_num)
{
if (page_num > TABLE_MAX_PAGES) {
printf("Tried to fetch page number out of bounds. %d > %d\n", page_num,
TABLE_MAX_PAGES);
exit(EXIT_FAILURE);
}
if (pager->pages[page_num] == NULL) {
// Cache miss. Allocate memory and load from file.
void *page = malloc(PAGE_SIZE);
uint32_t num_pages = pager->file_length / PAGE_SIZE;
// We might save a partial page at the end of the file
if (pager->file_length % PAGE_SIZE) {
num_pages += 1;
}
if (page_num <= num_pages) {
lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
ssize_t bytes_read = read(pager->file_descriptor, page, PAGE_SIZE);
if (bytes_read == -1) {
printf("Error reading file: %d\n", errno);
exit(EXIT_FAILURE);
}
}
pager->pages[page_num] = page;
}
return pager->pages[page_num];
}
/**
* 打開pager
* @param filename
* @return
*/
Pager *pager_open(const char *filename)
{
int fd = open(filename,
O_RDWR | // Read/Write mode
O_CREAT, // Create file if it does not exist
S_IWUSR | // User write permission
S_IRUSR // User read permission
);
if (fd == -1) {
printf("Unable to open file\n");
exit(EXIT_FAILURE);
}
off_t file_length = lseek(fd, 0, SEEK_END);
Pager *pager = malloc(sizeof(Pager));
pager->file_descriptor = fd;
pager->file_length = file_length;
for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
pager->pages[i] = NULL;
}
return pager;
}
/**
* Cursor定義
*/
struct Cursor_t {
Table* table;
uint32_t row_num;
bool end_of_table; // Indicates a position one past the last element
};
typedef struct Cursor_t Cursor;
/**
* Cursor api
* @param row
*/
Cursor* table_start(Table* table) {
Cursor* cursor = malloc(sizeof(Cursor));
cursor->table = table;
cursor->row_num = 0;
cursor->end_of_table = (table->num_rows == 0);
return cursor;
}
Cursor* table_end(Table* table) {
Cursor* cursor = malloc(sizeof(Cursor));
cursor->table = table;
cursor->row_num = table->num_rows;
cursor->end_of_table = true;
return cursor;
}
/**
* 計算插入位置, 行插入槽
*/
void* cursor_value(Cursor* cursor)
{
uint32_t row_num = cursor->row_num;
uint32_t page_num = row_num / ROWS_PER_PAGE;
void* page = get_page(cursor->table->pager, page_num);
uint32_t row_offset = row_num % ROWS_PER_PAGE;
uint32_t byte_offset = row_offset * ROW_SIZE;
return page + byte_offset;
}
/**
* rownum 加1
* @param cursor
*/
void cursor_advance(Cursor* cursor)
{
cursor->row_num += 1;
if (cursor->row_num >= cursor->table->num_rows) {
cursor->end_of_table = true;
}
}
void print_row(Row* row) {
printf("(%d, %s, %s)\n", row->id, row->username, row->email);
}
/**
* 打開數據庫文件並建立連接
* @param filename
* @return
*/
Table *db_open(const char *filename)
{
Pager *pager = pager_open(filename);
uint32_t num_rows = pager->file_length / ROW_SIZE;
Table *table = malloc(sizeof(Table));
table->num_rows = 0;
table->pager = pager;
table->num_rows = num_rows;
return table;
}
/* 接收輸入*/
struct InputBuffer_t {
char* buffer;
size_t buffer_length;
ssize_t input_length;
};
typedef struct InputBuffer_t InputBuffer;
/* 初始化buffer */
InputBuffer* new_input_buffer()
{
InputBuffer* input_buffer = malloc(sizeof(InputBuffer));
input_buffer->buffer = NULL;
input_buffer->buffer_length = 0;
input_buffer->input_length = 0;
return input_buffer;
}
void print_prompt() { printf("db > "); }
/* 按行從標準輸入讀取 */
void read_input(InputBuffer* input_buffer)
{
ssize_t bytes_read =
getline(&(input_buffer->buffer), &(input_buffer->buffer_length), stdin);
if (bytes_read <= 0) {
printf("Error reading input\n");
exit(EXIT_FAILURE);
}
// Ignore trailing newline
input_buffer->input_length = bytes_read - 1;
input_buffer->buffer[bytes_read - 1] = 0;
}
/**
* 刷新頁到文件(指定頁號)
* @param pager
* @param page_num
* @param size
*/
void pager_flush(Pager *pager, uint32_t page_num, uint32_t size)
{
if (pager->pages[page_num] == NULL) {
printf("Tried to flush null page\n");
exit(EXIT_FAILURE);
}
off_t offset = lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
if (offset == -1) {
printf("Error seeking: %d\n", errno);
exit(EXIT_FAILURE);
}
ssize_t bytes_written =
write(pager->file_descriptor, pager->pages[page_num], size);
if (bytes_written == -1) {
printf("Error writing: %d\n", errno);
exit(EXIT_FAILURE);
}
}
/**
* 關閉數據連接
*/
void db_close(Table *table) {
Pager *pager = table->pager;
uint32_t num_full_pages = table->num_rows / ROWS_PER_PAGE;
for (uint32_t i = 0; i < num_full_pages; i++) {
if (pager->pages[i] == NULL) {
continue;
}
pager_flush(pager, i, PAGE_SIZE);
free(pager->pages[i]);
pager->pages[i] = NULL;
}
// There may be a partial page to write to the end of the file
// This should not be needed after we switch to a B-tree
uint32_t num_additional_rows = table->num_rows % ROWS_PER_PAGE;
if (num_additional_rows > 0) {
uint32_t page_num = num_full_pages;
if (pager->pages[page_num] != NULL) {
pager_flush(pager, page_num, num_additional_rows * ROW_SIZE);
free(pager->pages[page_num]);
pager->pages[page_num] = NULL;
}
}
int result = close(pager->file_descriptor);
if (result == -1) {
printf("Error closing db file.\n");
exit(EXIT_FAILURE);
}
for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
void *page = pager->pages[i];
if (page) {
free(page);
pager->pages[i] = NULL;
}
}
free(pager);
}
/* 元數據命令處理 */
MetaCommandResult do_meta_command(InputBuffer* input_buffer, Table* table)
{
if (strcmp(input_buffer->buffer, ".exit") == 0) {
db_close(table);
exit(EXIT_SUCCESS);
} else {
return META_COMMAND_UNRECOGNIZED_COMMAND;
}
}
/* insert解析和校驗*/
PrepareResult prepare_insert(InputBuffer *input_buffer, Statement *statement) {
statement->type = STATEMENT_INSERT;
char *keyword = strtok(input_buffer->buffer, " ");
char *id_string = strtok(NULL, " ");
char *username = strtok(NULL, " ");
char *email = strtok(NULL, " ");
if (id_string == NULL || username == NULL || email == NULL) {
return PREPARE_SYNTAX_ERROR;
}
int id = atoi(id_string);
if (id < 0) {
return PREPARE_NEGATIVE_ID;
}
if (strlen(username) > COLUMN_USERNAME_SIZE) {
return PREPARE_STRING_TOO_LONG;
}
if (strlen(email) > COLUMN_EMAIL_SIZE) {
return PREPARE_STRING_TOO_LONG;
}
statement->row_to_insert.id = id;
strcpy(statement->row_to_insert.username, username);
strcpy(statement->row_to_insert.email, email);
return PREPARE_SUCCESS;
}
/* sql解析 */
PrepareResult prepare_statement(InputBuffer* input_buffer,Statement* statement)
{
if (strncasecmp(input_buffer->buffer, "insert", 6) == 0) {
return prepare_insert(input_buffer, statement);
}
if (strncasecmp(input_buffer->buffer, "select", 6) == 0) {
statement->type = STATEMENT_SELECT;
return PREPARE_SUCCESS;
}
return PREPARE_UNRECOGNIZED_STATEMENT;
}
/* 執行insert*/
ExecuteResult execute_insert(Statement *statement, Table *table)
{
if (table->num_rows >= TABLE_MAX_ROWS) {
return EXECUTE_TABLE_FULL;
}
Row *row_to_insert = &(statement->row_to_insert);
Cursor* cursor = table_end(table);
serialize_row(row_to_insert, cursor_value(cursor));
table->num_rows += 1;
free(cursor);
return EXECUTE_SUCCESS;
}
/* 執行查詢*/
ExecuteResult execute_select(Statement *statement, Table *table)
{
Row row;
Cursor* cursor = table_start(table);
while (!(cursor->end_of_table)) {
deserialize_row(cursor_value(cursor), &row);
print_row(&row);
cursor_advance(cursor);
}
free(cursor);
return EXECUTE_SUCCESS;
}
/* sql執行*/
ExecuteResult execute_statement(Statement* statement , Table* table)
{
switch (statement->type)
{
case (STATEMENT_INSERT):
return execute_insert(statement, table);
case (STATEMENT_SELECT):
return execute_select(statement, table);
}
}
/* 主函數*/
int main(int argc, char* argv[])
{
if (argc < 2) {
printf("Must supply a database filename.\n");
exit(EXIT_FAILURE);
}
char *filename = argv[1];
Table *table = db_open(filename);
InputBuffer *input_buffer = new_input_buffer();
while (true)
{
print_prompt();
read_input(input_buffer);
if (input_buffer->buffer[0] == '.')
{
switch (do_meta_command(input_buffer,table))
{
case (META_COMMAND_SUCCESS):
continue;
case (META_COMMAND_UNRECOGNIZED_COMMAND):
printf("Unrecognized command '%s'\n", input_buffer->buffer);
continue;
}
}
Statement statement;
switch (prepare_statement(input_buffer, &statement))
{
case (PREPARE_SUCCESS):
break;
case (PREPARE_NEGATIVE_ID):
printf("ID must be positive.\n");
continue;
case (PREPARE_STRING_TOO_LONG):
printf("String is too long.\n");
continue;
case (PREPARE_SYNTAX_ERROR):
printf("Syntax error. Could not parse statement.\n");
continue;
case (PREPARE_UNRECOGNIZED_STATEMENT):
printf("Unrecognized keyword at start of '%s'.\n",
input_buffer->buffer);
continue;
}
switch (execute_statement(&statement, table))
{
case (EXECUTE_SUCCESS):
printf("Executed.\n");
break;
case (EXECUTE_TABLE_FULL):
printf("Error: Table full.\n");
break;
}
}
}