struct f2fs_summary_block {
struct f2fs_summary entries[ENTRIES_IN_SUM];
union {
__le16 n_nats;
__le16 n_sits;
};
/* spare area is used by NAT or SIT journals */
union {
struct nat_journal nat_j;
struct sit_journal sit_j;
};
struct summary_footer footer;
} __packed;
struct nat_journal {
struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES];
__u8 reserved[NAT_JOURNAL_RESERVED];
} __packed;
struct sit_journal {
struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES];
__u8 reserved[SIT_JOURNAL_RESERVED];
} __packed;
每個當前的segment在內存裏面都有一個f2fs_summary_block的實例,f2fs使用了CURSEG_COLD_DATA和CURSEG_HOT_DATA的summary block,其中前者的block存儲的是SIT的journal,後者存儲的是NAT的journal,不過有個疑問是,f2fs支持2 log,當只有兩個log時候,segment分別是data和node類型的,這時CURSEG_COLD_DATA和CURSEG_HOT_DATA就是同一個segment了,這樣這兩個journal不就衝突了?
journal的插入和查找,先搜索journal區域是否已經存在了相關的entry,如果存在了,則返回這個entry,否則,返回或者創建相關的entry。這樣可以保證,頻繁的NAT,SIT的項更改只會寫到內存中。
int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
unsigned int val, int alloc)
{
int i;
if (type == NAT_JOURNAL) {
for (i = 0; i < nats_in_cursum(sum); i++) {
if (le32_to_cpu(nid_in_journal(sum, i)) == val)
return i;
}
if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
return update_nats_in_cursum(sum, 1);
} else if (type == SIT_JOURNAL) {
for (i = 0; i < sits_in_cursum(sum); i++)
if (le32_to_cpu(segno_in_journal(sum, i)) == val)
return i;
if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
return update_sits_in_cursum(sum, 1);
}
return -1;
}
寫checkpoint的時候會觸發對SIT,NAT的flush操作,從而把journa區域的NAT,SIT項回寫到SSD上:
void flush_sit_entries(struct f2fs_sb_info *sbi)
{
struct sit_info *sit_i = SIT_I(sbi);
unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
unsigned long nsegs = TOTAL_SEGS(sbi);
struct page *page = NULL;
struct f2fs_sit_block *raw_sit = NULL;
unsigned int start = 0, end = 0;
unsigned int segno = -1;
bool flushed;
mutex_lock(&curseg->curseg_mutex);
mutex_lock(&sit_i->sentry_lock);
/*
* "flushed" indicates whether sit entries in journal are flushed
* to the SIT area or not.
*/
flushed = flush_sits_in_journal(sbi);
while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) {
struct seg_entry *se = get_seg_entry(sbi, segno);
int sit_offset, offset;
sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
if (flushed)
goto to_sit_page;
offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1);
if (offset >= 0) {
segno_in_journal(sum, offset) = cpu_to_le32(segno);
seg_info_to_raw_sit(se, &sit_in_journal(sum, offset));
goto flush_done;
}
to_sit_page:
if (!page || (start > segno) || (segno > end)) {
if (page) {
f2fs_put_page(page, 1);
page = NULL;
}
start = START_SEGNO(sit_i, segno);
end = start + SIT_ENTRY_PER_BLOCK - 1;
/* read sit block that will be updated */
page = get_next_sit_page(sbi, start);
raw_sit = page_address(page);
}
/* udpate entry in SIT block */
seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]);
flush_done:
__clear_bit(segno, bitmap);
sit_i->dirty_sentries--;
}
mutex_unlock(&sit_i->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
/* writeout last modified SIT block */
f2fs_put_page(page, 1);
set_prefree_as_free_segments(sbi);
}
- flush_sits_in_journal會嘗試把journal項對應的entry標記爲髒的,從而在頁換出的時候觸發對數據的回寫
- f2fs有兩個SIT(NAT)區域,一個是當前最新的數據,一個是當前checkpoint有效的數據,get_next_sit_page把更新的數據寫在了最新的那個區域,並返回這個page。checkpoint完成後,最新區域成了checkpoint有效區域,而checkpoint有效區域成了更新的數據寫入的區域。