struct f2fs_summary_block {
struct f2fs_summary entries[ENTRIES_IN_SUM];
union {
__le16 n_nats;
__le16 n_sits;
};
/* spare area is used by NAT or SIT journals */
union {
struct nat_journal nat_j;
struct sit_journal sit_j;
};
struct summary_footer footer;
} __packed;
struct nat_journal {
struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES];
__u8 reserved[NAT_JOURNAL_RESERVED];
} __packed;
struct sit_journal {
struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES];
__u8 reserved[SIT_JOURNAL_RESERVED];
} __packed;
每个当前的segment在内存里面都有一个f2fs_summary_block的实例,f2fs使用了CURSEG_COLD_DATA和CURSEG_HOT_DATA的summary block,其中前者的block存储的是SIT的journal,后者存储的是NAT的journal,不过有个疑问是,f2fs支持2 log,当只有两个log时候,segment分别是data和node类型的,这时CURSEG_COLD_DATA和CURSEG_HOT_DATA就是同一个segment了,这样这两个journal不就冲突了?
journal的插入和查找,先搜索journal区域是否已经存在了相关的entry,如果存在了,则返回这个entry,否则,返回或者创建相关的entry。这样可以保证,频繁的NAT,SIT的项更改只会写到内存中。
int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
unsigned int val, int alloc)
{
int i;
if (type == NAT_JOURNAL) {
for (i = 0; i < nats_in_cursum(sum); i++) {
if (le32_to_cpu(nid_in_journal(sum, i)) == val)
return i;
}
if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
return update_nats_in_cursum(sum, 1);
} else if (type == SIT_JOURNAL) {
for (i = 0; i < sits_in_cursum(sum); i++)
if (le32_to_cpu(segno_in_journal(sum, i)) == val)
return i;
if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
return update_sits_in_cursum(sum, 1);
}
return -1;
}
写checkpoint的时候会触发对SIT,NAT的flush操作,从而把journa区域的NAT,SIT项回写到SSD上:
void flush_sit_entries(struct f2fs_sb_info *sbi)
{
struct sit_info *sit_i = SIT_I(sbi);
unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
unsigned long nsegs = TOTAL_SEGS(sbi);
struct page *page = NULL;
struct f2fs_sit_block *raw_sit = NULL;
unsigned int start = 0, end = 0;
unsigned int segno = -1;
bool flushed;
mutex_lock(&curseg->curseg_mutex);
mutex_lock(&sit_i->sentry_lock);
/*
* "flushed" indicates whether sit entries in journal are flushed
* to the SIT area or not.
*/
flushed = flush_sits_in_journal(sbi);
while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) {
struct seg_entry *se = get_seg_entry(sbi, segno);
int sit_offset, offset;
sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
if (flushed)
goto to_sit_page;
offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1);
if (offset >= 0) {
segno_in_journal(sum, offset) = cpu_to_le32(segno);
seg_info_to_raw_sit(se, &sit_in_journal(sum, offset));
goto flush_done;
}
to_sit_page:
if (!page || (start > segno) || (segno > end)) {
if (page) {
f2fs_put_page(page, 1);
page = NULL;
}
start = START_SEGNO(sit_i, segno);
end = start + SIT_ENTRY_PER_BLOCK - 1;
/* read sit block that will be updated */
page = get_next_sit_page(sbi, start);
raw_sit = page_address(page);
}
/* udpate entry in SIT block */
seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]);
flush_done:
__clear_bit(segno, bitmap);
sit_i->dirty_sentries--;
}
mutex_unlock(&sit_i->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
/* writeout last modified SIT block */
f2fs_put_page(page, 1);
set_prefree_as_free_segments(sbi);
}
- flush_sits_in_journal会尝试把journal项对应的entry标记为脏的,从而在页换出的时候触发对数据的回写
- f2fs有两个SIT(NAT)区域,一个是当前最新的数据,一个是当前checkpoint有效的数据,get_next_sit_page把更新的数据写在了最新的那个区域,并返回这个page。checkpoint完成后,最新区域成了checkpoint有效区域,而checkpoint有效区域成了更新的数据写入的区域。