Mysql的.frm文件是用於保存表結構信息的文件,最近從源碼瞭解了一下.frm的組成,在網上也有一些大神對.frm的組成做過分享,自己再去了解一下總是沒壞處的,.frm文件的創建在create_frm函數,但是這個函數只有對header部分的記錄信息,具體表結構信息未找到在哪個函數生成的,不過有個open_binary_frm函數是對.frm文件的讀取可以瞭解.frm的詳細結構組成,注意下面的函數都是從5.7.19的版本中提取,5.7之前的版本可能有點不同,比如虛擬列
首先看看create_frm創建header的組成部分
File create_frm(THD *thd, const char *name, const char *db,
const char *table, uint reclength, uchar *fileinfo,
HA_CREATE_INFO *create_info, uint keys, KEY *key_info)
{
........................
if ((file= mysql_file_create(key_file_frm,
name, CREATE_MODE, create_flags, MYF(0))) >= 0)
{
size_t key_length, tmp_key_length;
uint tmp, csid;
memset(fileinfo, 0, 64);
/* header */
/*這裏開始對header組裝,這部分在官網文檔中又介紹*/
fileinfo[0]=(uchar) 254;
fileinfo[1]= 1;
/*FRM_VER是frm文件版本*/
fileinfo[2]= FRM_VER+3+ MY_TEST(create_info->varchar);
/*表引擎類型的記錄,ha_legacy_type這個enum類型中記錄了所有mysql目前支持的引擎類型*/
fileinfo[3]= (uchar) ha_legacy_type(
ha_checktype(thd,ha_legacy_type(create_info->db_type),0,0));
fileinfo[4]=1;
/*這裏記錄IO_SIZE=4096,在frm中一次寫入都是按4k大小計算*/
int2store(fileinfo+6,IO_SIZE);/* Next block starts here */
/*
Keep in sync with pack_keys() in unireg.cc
For each key:
8 bytes for the key header
9 bytes for each key-part (MAX_REF_PARTS)
NAME_LEN bytes for the name
1 byte for the NAMES_SEP_CHAR (before the name)
For all keys:
6 bytes for the header
1 byte for the NAMES_SEP_CHAR (after the last name)
9 extra bytes (padding for safety? alignment?)
*/
/*下面這個for循環有對索引備註進行操作,索引存儲固定6bytes作爲header,每個索引有8bytes記錄索引基礎信息,9bytes記錄索引的每個字段信息,如果是組合索引,有幾個字段就需要幾個9bytes,這個再open_binary_frm中可以得到印證*/
for (i= 0; i < keys; i++)
{
DBUG_ASSERT(MY_TEST(key_info[i].flags & HA_USES_COMMENT) ==
(key_info[i].comment.length > 0));
if (key_info[i].flags & HA_USES_COMMENT)
key_comment_total_bytes += 2 + key_info[i].comment.length;
}
/*這下面都是對數據的存儲*/
key_length= keys * (8 + MAX_REF_PARTS * 9 + NAME_LEN + 1) + 16
+ key_comment_total_bytes;
length= next_io_size((ulong) (IO_SIZE+key_length+reclength+
create_info->extra_size));
int4store(fileinfo+10,length);
tmp_key_length= (key_length < 0xffff) ? key_length : 0xffff;
int2store(fileinfo+14, static_cast<uint16>(tmp_key_length));
int2store(fileinfo+16,reclength);
int4store(fileinfo+18, static_cast<uint32>(create_info->max_rows));
int4store(fileinfo+22, static_cast<uint32>(create_info->min_rows));
/* fileinfo[26] is set in mysql_create_frm() */
fileinfo[27]=2;// Use long pack-fields
/* fileinfo[28 & 29] is set to key_info_length in mysql_create_frm() */
create_info->table_options|=HA_OPTION_LONG_BLOB_PTR; // Use portable blob pointers
int2store(fileinfo+30, static_cast<uint16>(create_info->table_options));
fileinfo[32]=0;// No filename anymore
fileinfo[33]=5; // Mark for 5.0 frm file
int4store(fileinfo+34,create_info->avg_row_length);
csid= (create_info->default_table_charset ?
create_info->default_table_charset->number : 0);
fileinfo[38]= (uchar) csid;
/*
In future versions, we will store in fileinfo[39] the values of the
TRANSACTIONAL and PAGE_CHECKSUM clauses of CREATE TABLE.
*/
fileinfo[39]= 0;
fileinfo[40]= (uchar) create_info->row_type;
/* Bytes 41-46 were for RAID support; now reused for other purposes */
fileinfo[41]= (uchar) (csid >> 8);
int2store(fileinfo+42, create_info->stats_sample_pages & 0xffff);
fileinfo[44]= (uchar) create_info->stats_auto_recalc;
fileinfo[45]= 0;
fileinfo[46]= 0;
int4store(fileinfo+47, static_cast<uint32>(key_length));
tmp= MYSQL_VERSION_ID; // Store to avoid warning from int4store
int4store(fileinfo+51, tmp);
int4store(fileinfo+55, create_info->extra_size);
/*
59-60 is reserved for extra_rec_buf_length,
61 for default_part_db_type
*/
int2store(fileinfo+62, static_cast<uint16>(create_info->key_block_size));
.........................................................................
} /* create_frm */
從函數代碼得出64bytes的組成如下:
position,bytes(64字節的header)
0,1: 固定值254
1,1: 固定值1
2,1: FRM_VER+3+MY_TEST(create_info->varchar) frm_ver表示frm文件版本號
3,1: (uchar) ha_legacy_type(ha_checktype(thd,ha_legacy_type(create_info->db_type),0,0)) 引擎類型
4,1: 固定值1
5,1: 0
6,2: IO_size=4096
8,2: 固定值
10,4: Length, based on key_length + rec_length + create_info->extra_size
14,2: tmp_key_length
16,2: reclength
18,4: max_rows
22,4: min_rows
26,1: 0
27,1: 固定值2
28,2: key_info_length
30,2: blob指針,create_info->table_options
32,1: 固定值0
33,1: 固定值5 代表frm版本爲5.0
34,4: create_info->avg_row_length 行平均長度
38,1: 表默認字符集編號
39,1: 固定值0
40,1: create_info->row_type
41,6: 這6個字節記錄RAID支持
47,4: key_length 用於偏移讀取默認值信息,默認值其實位置在io_size+(key_length or tmp_key_length)
51,4: mysql版本號
55,4: create_info->extra_size
59,2: extra_rec_buf_length
61,1: default_part_db_type 分區表類型
62,2: create_info->key_block_size
下面再看open_binary_frm的組成
static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
File file)
{
...............................................................................................................................
field_pack_length= new_frm_ver < 2 ? 11 : 17; //單個字段基本信息佔用字節數
disk_buff= 0;
error= 3;
/* Position of the form in the form file. */
if (!(pos= get_form_pos(file, head)))//pos是字段保存信息的起始位置,由header[4:6] + (header[8:10] * 4)決定,記錄在(header[8:10] * 4)頭4bytes中
goto err; /* purecov: inspected */
mysql_file_seek(file,pos,MY_SEEK_SET,MYF(0));
if (mysql_file_read(file, forminfo,288,MYF(MY_NABP))) //288個基本字節
goto err;
/*這一部分是對.frm的header進行讀取,和create_frm差不多,直接省略*/
.......................................................................................................................................................
/* Read keyinformation */
key_info_length= (uint) uint2korr(head+28); //索引佔的總字節數,在.frm的header中,偏移位置是28佔用2bytes
mysql_file_seek(file, (ulong) uint2korr(head+6), MY_SEEK_SET, MYF(0)); //跳到4096字節處(IO_SIZE),frm文件頭的64字節也包含在其中,爲固定字節
if (read_string(file, &disk_buff,key_info_length)) //讀取索引信息到disk_buff中
goto err; /* purecov: inspected */
if (disk_buff[0] & 0x80)
{
share->keys= keys= (disk_buff[1] << 7) | (disk_buff[0] & 0x7f);
share->key_parts= key_parts= uint2korr(disk_buff+2);
}
else
{
share->keys= keys= disk_buff[0]; //索引數量
share->key_parts= key_parts= disk_buff[1]; //所有索引的字段數
}
share->keys_for_keyread.init(0);
share->keys_in_use.init(keys);
strpos=disk_buff+6; // 6字節的索引基本信息
use_extended_sk=
ha_check_storage_engine_flag(share->db_type(),
HTON_SUPPORTS_EXTENDED_KEYS);
uint total_key_parts;
if (use_extended_sk)
{
uint primary_key_parts= keys ?
(new_frm_ver >= 3) ? (uint) strpos[4] : (uint) strpos[3] : 0;
total_key_parts= key_parts + primary_key_parts * (keys - 1);
}
else
total_key_parts= key_parts;
n_length= keys * sizeof(KEY) + total_key_parts * sizeof(KEY_PART_INFO); //索引部分總長度,key長度爲8,key_part_info的長度爲9
..........................................................................................................................................................
//循環解析索引
for (i=0 ; i < keys ; i++, keyinfo++)
{
keyinfo->table= 0; // Updated in open_frm
if (new_frm_ver >= 3)
{
keyinfo->flags= (uint) uint2korr(strpos) ^ HA_NOSAME;
keyinfo->key_length= (uint) uint2korr(strpos+2); //索引總長度
keyinfo->user_defined_key_parts= (uint) strpos[4]; //索引字段數
keyinfo->algorithm= (enum ha_key_alg) strpos[5]; //索引類型,btree、hash、fulltext
keyinfo->block_size= uint2korr(strpos+6);
strpos+=8; //一個key佔用8個字節,跳過繼續
}
...........................................................................................................................................................
// 循環解析索引字段
for (j=keyinfo->user_defined_key_parts ; j-- ; key_part++)
{
*rec_per_key++ = 0;
*rec_per_key_float++ = REC_PER_KEY_UNKNOWN;
key_part->fieldnr=(uint16) (uint2korr(strpos) & FIELD_NR_MASK);
key_part->offset= (uint) uint2korr(strpos+2)-1; //record字節偏移量
key_part->key_type=(uint) uint2korr(strpos+5); //字段類型
// key_part->field=(Field*) 0;// Will be fixed later
if (new_frm_ver >= 1)
{
key_part->key_part_flag= *(strpos+4);
key_part->length=(uint) uint2korr(strpos+7); //字段長度
strpos+=9;//一個字段的信息佔用9bytes,指針跳過繼續下一個字段信息
}
.........................................................................................................................................................
}
//****循環解析索引字段
..............................................................................................................................................................
}
//****循環解析索引
keynames=(char*) key_part;
strpos+= (my_stpcpy(keynames, (char *) strpos) - keynames)+1;
//reading index comments
................................................................................................................................................................
record_offset= (ulong) (uint2korr(head+6)+
((uint2korr(head+14) == 0xffff ?
uint4korr(head+47) : uint2korr(head+14)))); // 讀取字段默認數據的偏移量
................................................................................................................................................................
/*默認值讀取
error=4;
extra_rec_buf_length= uint2korr(head+59);
rec_buff_length= ALIGN_SIZE(share->reclength + 1 + extra_rec_buf_length);
share->rec_buff_length= rec_buff_length;
if (!(record= (uchar *) alloc_root(&share->mem_root,
rec_buff_length)))
goto err; /* purecov: inspected */
share->default_values= record;
if (mysql_file_pread(file, record, (size_t) share->reclength,
record_offset, MYF(MY_NABP)))
goto err; /* purecov: inspected */
*/
/*讀取字段信息,偏移量pos上面已經計算出來,288個基本字節信息也讀取到forminfo中*/
mysql_file_seek(file, pos+288, MY_SEEK_SET, MYF(0));
share->fields= uint2korr(forminfo+258);//表字段數
pos= uint2korr(forminfo+260); /* Length of all screens */ //基礎信息字節長度
n_length= uint2korr(forminfo+268); //所有字段名記錄長度
interval_count= uint2korr(forminfo+270);//enum、set類字段數
interval_parts= uint2korr(forminfo+272);
int_length= uint2korr(forminfo+274); //enum、set類字段內容所佔長度
share->null_fields= uint2korr(forminfo+282); //允許爲空的字段數
com_length= uint2korr(forminfo+284);//字段註釋所佔長度
gcol_screen_length= uint2korr(forminfo+286); //虛擬列所佔字節
share->vfields= 0;
share->stored_fields= share->fields;
if (forminfo[46] != (uchar)255)
{
share->comment.length= (int) (forminfo[46]);
share->comment.str= strmake_root(&share->mem_root, (char*) forminfo+47,
share->comment.length);
}
...........................................................................................................................................................................
share->field= field_ptr;
read_length=(uint) (share->fields * field_pack_length +
pos+ (uint) (n_length+int_length+com_length+
gcol_screen_length));
if (read_string(file, &disk_buff,read_length))
goto err; /* purecov: inspected */
strpos= disk_buff+pos;//跳過pos的長度
share->intervals= (TYPELIB*) (field_ptr+share->fields+1);
interval_array= (const char **) (share->intervals+interval_count);
names= (char*) (interval_array+share->fields+interval_parts+keys+3);
if (!interval_count)
share->intervals= 0;// For better debugging
memcpy(names, strpos+(share->fields*field_pack_length),//獲取字段名字節
(uint) (n_length+int_length));
orig_comment_pos= comment_pos= names+(n_length+int_length);//獲取註釋
memcpy(comment_pos, disk_buff+read_length-com_length-gcol_screen_length,
com_length);
orig_gcol_screen_pos= gcol_screen_pos= names+(n_length+int_length+com_length);
memcpy(gcol_screen_pos, disk_buff+read_length-gcol_screen_length,
gcol_screen_length);//獲取虛擬列信息
..............................................................................................................................................................................
/*循環獲取字段信息*/
for (i=0 ; i < share->fields; i++, strpos+=field_pack_length)
{
//這裏有對虛擬列的判斷
if (new_frm_ver >= 3 &&
(strpos[10] & Field::GENERATED_FIELD) && // Field::unireg_check 是否爲虛擬列
! (bool) (uint) (gcol_screen_pos[3])) // Field::stored_in_db 是否存儲
{
comment_pos+= uint2korr(strpos+15);
gcol_screen_pos+= uint2korr(gcol_screen_pos + 1) + FRM_GCOL_HEADER_SIZE;
has_vgc= true;
}
else
{
if ((error= make_field_from_frm(thd, share,
new_frm_ver, use_hash,
i, strpos,
format_section_fields,
&comment_pos,
&gcol_screen_pos,
&null_pos,
&null_bit_pos,
&errarg)))
goto err;
}
}
..............................................................................................................................................................
// 獲取索引信息
/* Fix key->name and key_part->field */
if (key_parts)
{
const int pk_off= find_type(primary_key_name, &share->keynames,
FIND_TYPE_NO_PREFIX);
uint primary_key= (pk_off > 0 ? pk_off-1 : MAX_KEY);
longlong ha_option= handler_file->ha_table_flags();
keyinfo= share->key_info;
key_part= keyinfo->key_part;
for (uint key=0 ; key < share->keys ; key++,keyinfo++)
{
.........................................................................................................................................................
for (i=0 ; i < keyinfo->user_defined_key_parts ; key_part++,i++)
{
Field *field;
if (new_field_pack_flag <= 1)
key_part->fieldnr= (uint16) find_field(share->field,
share->default_values,
(uint) key_part->offset,
(uint) key_part->length);
if (!key_part->fieldnr)
{
error= 4; // Wrong file
goto err;
}
field= key_part->field= share->field[key_part->fieldnr-1];
key_part->type= field->key_type();
if (field->real_maybe_null())
{
key_part->null_offset=field->null_offset(share->default_values);
key_part->null_bit= field->null_bit;
key_part->store_length+=HA_KEY_NULL_LENGTH;
keyinfo->flags|=HA_NULL_PART_KEY;
keyinfo->key_length+= HA_KEY_NULL_LENGTH;
}
if (field->type() == MYSQL_TYPE_BLOB ||
field->real_type() == MYSQL_TYPE_VARCHAR ||
field->type() == MYSQL_TYPE_GEOMETRY)
{
key_part->store_length+=HA_KEY_BLOB_LENGTH;
if (i + 1 <= keyinfo->user_defined_key_parts)
keyinfo->key_length+= HA_KEY_BLOB_LENGTH;
}
key_part->init_flags();
....................................................................................................................................................................
//判斷是否爲自增
if (share->found_next_number_field)
{
Field *reg_field= *share->found_next_number_field;
if ((int) (share->next_number_index= (uint)
find_ref_key(share->key_info, share->keys,
share->default_values, reg_field,
&share->next_number_key_offset,
&share->next_number_keypart)) < 0)
{
/* Wrong field definition */
error= 4;
goto err;
}
else
reg_field->flags |= AUTO_INCREMENT_FLAG;
}//**判斷是否爲自增
.......................................................................................................................................................................
} /* open_binary_frm */
代碼量太大,很多地方都做了省略,下面來做個文字解釋:
索引部分:
6bytes: 記錄索引基本信息
0,1:1bytes 索引數
1,1: 1bytes 用於索引的字段數
4,2: 2bytes 索引信息結束後存儲索引名的字節長度
8bytes: 記錄單個索引信息
0,2: flags 1代表唯一有唯一約束,普通爲0
2,2: 索引總長度
4,1: 索引字段數,有多少字段,後面就有多少個9bytes
5,1: 索引類型
6,2: block_size
9bytes: 記錄所有單個字段信息
0,1: 字段在表中的編號
2,2: record字節偏移量
5,2: 字段類型
7,2: 字段長度
後面緊跟索引名:
ff+索引名
表字段部分:
288bytes基本信息
46,1: 表註釋所佔長度,後面緊跟註釋內容
258,2: 兩個字節記錄字段數
260,2: 字段基本信息長度
268,2: 所有字段名長度,每個字段名以ff隔開
284,2: 字段註釋所佔長度
288字節後緊跟字段信息,格式排列順序:
content_pos: 基礎信息
fields*17 : 字段信息
n_length: 字段名總長度
int_length : 緊跟字段之後,爲enum、set類型的值
com_length: 字段註釋長度
gcol_screen: 虛擬列信息
0,1: 固定值1
1,3: 虛擬列信息長度
3,0: 是否存儲數據
剩餘的爲信息
如有多個虛擬列,循環上面的字節排序
17bytes記錄字段信息:
3,5: 字段類型
5,8: 記錄偏移量
8,10: flags
10,11: 是否爲虛擬列
12,13: enum、set類型的順序號
13,14: 字段類型
14,15: 字段字符類型
15,17: 字段註釋長度
下面圖片內容是我用python寫的解析腳本打印的表結構,如果有興趣的話可以按照上面的介紹利用自己熟悉的語言寫一個腳本玩玩,萬一哪天真的需要用上呢,作爲一個專業DBA來說應該幾乎爲零
ps: mysql技術交流QQ羣479472450,個人微信公衆號會發送一些整理學習結果,多多關注