sheepdog 源碼學習(1)

最近比較忙,做畢設(sheepdog),要在sheepdog的基礎上做點東西,也就是說要改sheepdog源碼。我只有一個月時間,所以最近一直都在讀sheepdog源碼。C 語言太菜,裏面好多用法都不會,基本都是現邊看邊查。。。

sheepdog 可以幹什麼,首先,你需要啓動sheep,啓動的命令可以用下面這個。當然可以用 test 目錄下的測試腳本來啓動一個 虛擬的 sheepdog 集羣。何謂虛擬呢?就是這個腳本可以在你的主機上啓動5個sheep進程,每個sheep進程佔用不同的端口,這樣這5個sheep進程就組成了一個sheepdog集羣了。這樣,你也就擁有一個sheepdog集羣了。之後,你就可以用dog命令來看一些東西了,比如: dog node list, dog vdi list. 之類的,具體是什麼意思就需要你去 sheepdog github 去讀點文檔啦。。。和系統相關的先說這些吧,這只是個開始,但並不是我重點。再羅嗦一點,這樣默認啓動後,sheepdog 的 日誌 目錄是在 /tmp/sheepdog/ 目錄下。這個目錄下面有這樣幾個子目錄。首先可能是 0/ 1/ 2/ 3/ ... 以數字命名的目錄,這表示這是 node ID 的log 目錄。因爲你是再 主機上虛擬出的幾個 sheep,每一個sheep 都要有一個 log 目錄,這是自然的啦。。。進入任意一個目錄,就拿0/作比方吧,進去可以看到有這樣幾個文件,obj/ sheep.log epoch sock config 等文件。其中 obj 就是用來存放 數據塊的 目錄哦,如果你給你創建的vdi 寫入了比較多的東西,那麼這個目錄下應該有很多文件,並且每一個 都是 4M。哈哈,不信你可以試一下。當然,要讀系統debug 日誌的話,打開 sheep.log 讀就可以了。

sheep /tmp/sheepdog/4 -z 4 -p 7004 -c local -n -y 127.0.0.1 -d
# 當然,這是我從 test 目錄下的腳本里偷出來的哈。。。
dog 是 sheepdog 中很重要的一個組成部分,它提供了一系列的系統命令。要分析整個系統的功能,我們當然可以從這裏入手,順藤摸瓜。。。好吧。開始吧。首先我們找到這個文件,會是誰呢,當然是 dog.c 啦,還能有誰啊,打開它啊,從哪裏對呢,當然是從 main開始啊。開始看吧。

下面是 dog.c 的main函數部分。當然,有些內容我省略掉了。首先是這個 init_commands(&commands) 這個函數,我們需要注意下,我們跟進去,看看這個函數具體在幹什麼。

int main(int argc, char **argv)
{
	int ch, longindex, ret;
	unsigned long flags;
	struct option *long_options;
	const struct command *commands;
	const char *short_options;
	char *p;
	const struct sd_option *sd_opts;
	uint8_t sdhost[16];
	int sdport;
	install_crash_handler(crash_handler);

	init_commands(&commands);

	if (argc < 2)
		usage(commands, 0);

	flags = setup_commands(commands, argv[1], argv[2]);

	optind = 3;

	sd_opts = build_sd_options(command_opts);
	long_options = build_long_options(sd_opts);
	short_options = build_short_options(sd_opts);

	mytest_func();

	while ((ch = getopt_long(argc, argv, short_options, long_options,
				&longindex)) >= 0) {

	      ......
	}
	if (!is_stdout_console() || raw_output)
		highlight = false;

	if (flags & CMD_NEED_NODELIST) {
		ret = update_node_list(SD_MAX_NODES);
		if (ret < 0) {
			sd_err("Failed to get node list");
			exit(EXIT_SYSFAIL);
		}
	}

	if (flags & CMD_NEED_ARG && argc == optind)
		subcommand_usage(argv[1], argv[2], EXIT_USAGE);

	if (init_event(EPOLL_SIZE) < 0)
		exit(EXIT_SYSFAIL);

	if (init_work_queue(get_nr_nodes) != 0) {
		sd_err("Failed to init work queue");
		exit(EXIT_SYSFAIL);
	}

	if (sockfd_init()) {
		sd_err("sockfd_init() failed");
		exit(EXIT_SYSFAIL);
	}
	ret = command_fn(argc, argv);
	if (ret == EXIT_USAGE)
		subcommand_usage(argv[1], argv[2], EXIT_USAGE);
	return ret;
}

init_commands(const struct command **commands) 命令初始化函數。

static void init_commands(const struct command **commands)
{
	// This is static and be assignment and returned as a pointer.
	static struct command *cmds;
	struct command command_list[] = {
		vdi_command, // in vdi.c
		node_command, // in node.c
		cluster_command, // in cluster.c
		trace_command,	// in dog.h
		{NULL,}
	};

	if (!cmds) {
		cmds = (struct command *)xmalloc(sizeof(command_list));
		memcpy(cmds, command_list, sizeof(command_list));
	}

	*commands = cmds;
	return;
}

vdi_command,

struct command vdi_command = {
	"vdi",
	vdi_cmd,
	vdi_parser
};
vdi_cmd. 好啦,到這裏就差不多了,這裏你應該有點感覺了,你再命令行中敲出來的命令,都是存放再這個地方的,對你的每一個命令的響應,也是再這裏做出的。比如 dog vdi create.就是第二個 vdi_cmd[1] 所對應的內容. vdi_create 是一個指向函數的指針。我們可以去看看這個函數的具體內容。

static struct subcommand vdi_cmd[] = {
	{"check", "<vdiname>", "saph", "check and repair image's consistency",
	 NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
	 vdi_check, vdi_options},
	{"create", "<vdiname> <size>", "Pycaphrv", "create an image",
	 NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
	 vdi_create, vdi_options},
	{"snapshot", "<vdiname>", "saphrv", "create a snapshot",
	 NULL, CMD_NEED_ARG,
	 vdi_snapshot, vdi_options},
	{"clone", "<src vdi> <dst vdi>", "sPcaphrv", "clone an image",
	 NULL, CMD_NEED_ARG,
	 vdi_clone, vdi_options},
	{"delete", "<vdiname>", "saph", "delete an image",
	 NULL, CMD_NEED_ARG,
	 vdi_delete, vdi_options},
	{"rollback", "<vdiname>", "saphfrv", "rollback to a snapshot",
	 NULL, CMD_NEED_ARG,
	 vdi_rollback, vdi_options},
	{"list", "[vdiname]", "aprh", "list images",
	 NULL, 0, vdi_list, vdi_options},
	{"tree", NULL, "aph", "show images in tree view format",
	 NULL, 0, vdi_tree, vdi_options},
	{"graph", NULL, "aph", "show images in Graphviz dot format",
	 NULL, 0, vdi_graph, vdi_options},
	{"object", "<vdiname>", "isaph", "show object information in the image",
	 NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
	 vdi_object, vdi_options},
	{"track", "<vdiname>", "isaph", "show the object epoch trace in the image",
	 NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
	 vdi_track, vdi_options},
	{"setattr", "<vdiname> <key> [value]", "dxaph", "set a VDI attribute",
	 NULL, CMD_NEED_ARG,
	 vdi_setattr, vdi_options},
	{"getattr", "<vdiname> <key>", "aph", "get a VDI attribute",
	 NULL, CMD_NEED_ARG,
	 vdi_getattr, vdi_options},
	{"resize", "<vdiname> <new size>", "aph", "resize an image",
	 NULL, CMD_NEED_ARG,
	 vdi_resize, vdi_options},
	{"read", "<vdiname> [<offset> [<len>]]", "saph", "read data from an image",
	 NULL, CMD_NEED_ARG,
	 vdi_read, vdi_options},
	{"write", "<vdiname> [<offset> [<len>]]", "apwh", "write data to an image",
	 NULL, CMD_NEED_ARG,
	 vdi_write, vdi_options},
	{"backup", "<vdiname> <backup>", "sFaph", "create an incremental backup between two snapshots",
	 NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
	 vdi_backup, vdi_options},
	{"restore", "<vdiname> <backup>", "saph", "restore snapshot images from a backup",
	 NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
	 vdi_restore, vdi_options},
	{"cache", "<vdiname>", "saph", "Run 'dog vdi cache' for more information",
	 vdi_cache_cmd, CMD_NEED_ARG,
	 vdi_cache, vdi_options},
	{NULL,},
};

vdi_create 額,不好意思,我只是隨便選了一個,沒想到這個函數這麼長,但是創建一個 vdi 的過程就是這樣的。當然裏面又引出了很多新的東西,這就是需要我們去認真分析的東西。相信已經看到了,裏面最重要的過程應該是那個 ret = do_vdi_create() 過程。那是下一個應該考慮的過程。

static int vdi_create(int argc, char **argv)
{
	const char *vdiname = argv[optind++];
	uint64_t size;
	uint32_t vid;
	uint64_t oid;
	uint32_t idx, max_idx, ret, nr_copies = vdi_cmd_data.nr_copies;
	struct sd_inode *inode = NULL;

	if (!argv[optind]) {
		sd_err("Please specify the VDI size");
		return EXIT_USAGE;
	}
	ret = option_parse_size(argv[optind], &size);
	if (ret < 0)
		return EXIT_USAGE;

	if (size > SD_OLD_MAX_VDI_SIZE && 0 == vdi_cmd_data.store_policy) {
		sd_err("VDI size is larger than %s bytes, please use '-y' to "
		       "create a hyper volume with size up to %s bytes",
		       strnumber(SD_OLD_MAX_VDI_SIZE),
		       strnumber(SD_MAX_VDI_SIZE));
		return EXIT_USAGE;
	}

	if (size > SD_MAX_VDI_SIZE) {
		sd_err("VDI size is too large");
		return EXIT_USAGE;
	}

	if (nr_copies > sd_nodes_nr) {
		sd_err("There are not enough nodes(%d) to hold the copies(%d)",
		       sd_nodes_nr, nr_copies);
		return EXIT_USAGE;
	}

	ret = do_vdi_create(vdiname, size, 0, &vid, false,
			    vdi_cmd_data.nr_copies, vdi_cmd_data.copy_policy,
			    vdi_cmd_data.store_policy);
	if (ret != EXIT_SUCCESS || !vdi_cmd_data.prealloc)
		goto out;

	inode = xmalloc(sizeof(*inode));

	ret = dog_read_object(vid_to_vdi_oid(vid), inode, sizeof(*inode), 0,
			      true);
	if (ret != SD_RES_SUCCESS) {
		sd_err("Failed to read a newly created VDI object");
		ret = EXIT_FAILURE;
		goto out;
	}
	max_idx = DIV_ROUND_UP(size, SD_DATA_OBJ_SIZE);

	for (idx = 0; idx < max_idx; idx++) {
		vdi_show_progress(idx * SD_DATA_OBJ_SIZE, inode->vdi_size);
		oid = vid_to_data_oid(vid, idx);

		ret = dog_write_object(oid, 0, NULL, 0, 0, 0, inode->nr_copies,
				      inode->copy_policy, true, true);
		if (ret != SD_RES_SUCCESS) {
			ret = EXIT_FAILURE;
			goto out;
		}

		INODE_SET_VID(inode, idx, vid);
		ret = sd_inode_write_vid(dog_bnode_writer, inode, idx, vid, vid,
					 0, false, true);
		if (ret) {
			ret = EXIT_FAILURE;
			goto out;
		}
	}
	vdi_show_progress(idx * SD_DATA_OBJ_SIZE, inode->vdi_size);
	ret = EXIT_SUCCESS;

	if (verbose) {
		if (raw_output)
			printf("%x\n", vid);
		else
			printf("VDI ID of newly created VDI: %x\n", vid);
	}

out:
	free(inode);
	return ret;
}
上面只是講了一下 sheepdog 中 dog 的一小部分,並且沒有深究,當然,我們是需要深究這部分的,看每一個功能從上層到下層的具體實現,這都是很有必要的。今天這些只是講了最外層的部分,從交互入手,我想這也是認識一個系統的一個比較自然的過程吧。由於時間關係,先寫這些,歡迎討論,待續。。。

發佈了42 篇原創文章 · 獲贊 6 · 訪問量 7萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章