f2fs的journal机制

由于f2fs的log-structure特性,每次写一个数据块,需要相应更改direct node,NAT和SIT,尤其是NAT和SIT区域,可能仅仅需要修改一个entry几个字节的信息,就要重写整个page,这会严重降低文件系统的性能和SSD的使用寿命,因此,f2fs使用了journal的机制来减少NAT和SIT的写次数。所谓journal,其实就是把NAT和SIT的更改写到f2fs_summary_block中,当写checkpoint时,才把dirty的SIT和NAT区域回写。
struct f2fs_summary_block {
	struct f2fs_summary entries[ENTRIES_IN_SUM];
	union {
		__le16 n_nats;
		__le16 n_sits;
	};
	/* spare area is used by NAT or SIT journals */
	union {
		struct nat_journal nat_j;
		struct sit_journal sit_j;
	};
	struct summary_footer footer;
} __packed;

struct nat_journal {
	struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES];
	__u8 reserved[NAT_JOURNAL_RESERVED];
} __packed;

struct sit_journal {
	struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES];
	__u8 reserved[SIT_JOURNAL_RESERVED];
} __packed;

每个当前的segment在内存里面都有一个f2fs_summary_block的实例,f2fs使用了CURSEG_COLD_DATA和CURSEG_HOT_DATA的summary block,其中前者的block存储的是SIT的journal,后者存储的是NAT的journal,不过有个疑问是,f2fs支持2 log,当只有两个log时候,segment分别是data和node类型的,这时CURSEG_COLD_DATA和CURSEG_HOT_DATA就是同一个segment了,这样这两个journal不就冲突了?

journal的插入和查找,先搜索journal区域是否已经存在了相关的entry,如果存在了,则返回这个entry,否则,返回或者创建相关的entry。这样可以保证,频繁的NAT,SIT的项更改只会写到内存中。

int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
					unsigned int val, int alloc)
{
	int i;

	if (type == NAT_JOURNAL) {
		for (i = 0; i < nats_in_cursum(sum); i++) {
			if (le32_to_cpu(nid_in_journal(sum, i)) == val)
				return i;
		}
		if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
			return update_nats_in_cursum(sum, 1);
	} else if (type == SIT_JOURNAL) {
		for (i = 0; i < sits_in_cursum(sum); i++)
			if (le32_to_cpu(segno_in_journal(sum, i)) == val)
				return i;
		if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
			return update_sits_in_cursum(sum, 1);
	}
	return -1;
}
写checkpoint的时候会触发对SIT,NAT的flush操作,从而把journa区域的NAT,SIT项回写到SSD上:

void flush_sit_entries(struct f2fs_sb_info *sbi)
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
	struct f2fs_summary_block *sum = curseg->sum_blk;
	unsigned long nsegs = TOTAL_SEGS(sbi);
	struct page *page = NULL;
	struct f2fs_sit_block *raw_sit = NULL;
	unsigned int start = 0, end = 0;
	unsigned int segno = -1;
	bool flushed;

	mutex_lock(&curseg->curseg_mutex);
	mutex_lock(&sit_i->sentry_lock);

	/*
	 * "flushed" indicates whether sit entries in journal are flushed
	 * to the SIT area or not.
	 */
	flushed = flush_sits_in_journal(sbi);

	while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) {
		struct seg_entry *se = get_seg_entry(sbi, segno);
		int sit_offset, offset;

		sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);

		if (flushed)
			goto to_sit_page;

		offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1);
		if (offset >= 0) {
			segno_in_journal(sum, offset) = cpu_to_le32(segno);
			seg_info_to_raw_sit(se, &sit_in_journal(sum, offset));
			goto flush_done;
		}
to_sit_page:
		if (!page || (start > segno) || (segno > end)) {
			if (page) {
				f2fs_put_page(page, 1);
				page = NULL;
			}

			start = START_SEGNO(sit_i, segno);
			end = start + SIT_ENTRY_PER_BLOCK - 1;

			/* read sit block that will be updated */
			page = get_next_sit_page(sbi, start);
			raw_sit = page_address(page);
		}

		/* udpate entry in SIT block */
		seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]);
flush_done:
		__clear_bit(segno, bitmap);
		sit_i->dirty_sentries--;
	}
	mutex_unlock(&sit_i->sentry_lock);
	mutex_unlock(&curseg->curseg_mutex);

	/* writeout last modified SIT block */
	f2fs_put_page(page, 1);

	set_prefree_as_free_segments(sbi);
}
  1. flush_sits_in_journal会尝试把journal项对应的entry标记为脏的,从而在页换出的时候触发对数据的回写
  2. f2fs有两个SIT(NAT)区域,一个是当前最新的数据,一个是当前checkpoint有效的数据,get_next_sit_page把更新的数据写在了最新的那个区域,并返回这个page。checkpoint完成后,最新区域成了checkpoint有效区域,而checkpoint有效区域成了更新的数据写入的区域。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章