sys_mknod()

上篇文章中講到rootfs文件系統掛載後,會先創建”/dev”目錄,之後會創建設備文件"/dev/console",本文就分析下該設備文件的建立過程,文中還會見到與字符設備密切相關的chrdev_open()函數
1.default_rootfs
noinitramfs.c-->default_rootfs()
static int __init default_rootfs(void)
{
...

	err = sys_mknod((const char __user *) "/dev/console",
			S_IFCHR | S_IRUSR | S_IWUSR,
		new_encode_dev(MKDEV(5, 1)));
...
}


S_IFCHR表示要建立字符設備文件,S_IRUSR表示文件擁有者可以讀該文件,S_IWUSR表示文件擁有者可以寫該文件。第三個參數是設別號,這裏是0x501。2.6內核的設備號採用新的計算方式。
2.sys_mknod
SYSCALL_DEFINE3(mknod, const char __user *, filename, int, mode, unsigned, dev)
{
	return sys_mknodat(AT_FDCWD, filename, mode, dev);
}

SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode,
		unsigned, dev)
{
	int error;
	char *tmp;
	struct dentry *dentry;
	struct nameidata nd;


	if (S_ISDIR(mode))
		return -EPERM;

	
	error = user_path_parent(dfd, filename, &nd, &tmp);
	if (error)
		return error;

	dentry = lookup_create(&nd, 0);
	if (IS_ERR(dentry)) {
		error = PTR_ERR(dentry);
		goto out_unlock;
	}
	if (!IS_POSIXACL(nd.path.dentry->d_inode))
		mode &= ~current_umask();
	error = may_mknod(mode);
	if (error)
		goto out_dput;
	error = mnt_want_write(nd.path.mnt);
	if (error)
		goto out_dput;
	error = security_path_mknod(&nd.path, dentry, mode, dev);
	if (error)
		goto out_drop_write;
	switch (mode & S_IFMT) {
		case 0: case S_IFREG:
			error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd);
			break;
		case S_IFCHR: case S_IFBLK:
			error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,
					new_decode_dev(dev));
			break;
		case S_IFIFO: case S_IFSOCK:
			error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0);
			break;
	}
out_drop_write:
	mnt_drop_write(nd.path.mnt);
out_dput:
	dput(dentry);
out_unlock:
	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
	path_put(&nd.path);
	putname(tmp);

	return error;
}
比較sys_mknod()和sys_mkdir()的代碼,會發現很相似。只不過sys_mkdir調用的是vfs_mkdir,而sys_mknod會調用vfs_create或者vfs_mknod.其實ramfs_mkdir也會調用ramfs_mknod,只不過參數不同而已。
另外創建目錄”/dev”和創建設備文件”/dev/console”還有一個不同點,就是創建”/dev/console”時,因爲”/dev”已經存在,所以user_path_parent()->do_path_lookup()->path_walk()->link_path_walk()->__link_path_walk()中的for循環會循環兩次,而之前創建”/dev”時,只執行了一次,就退出循環了。
下面就分析下這兩個不同點
不同點1.__link_path_walk
static int __link_path_walk(const char *name, struct nameidata *nd)
{
	struct path next;
	struct inode *inode;
	int err;
	unsigned int lookup_flags = nd->flags;
	
	while (*name=='/')
		name++;
	if (!*name)
		goto return_reval;
	/*獲取起始目錄”/”的inode*/
	inode = nd->path.dentry->d_inode;
	if (nd->depth)
		lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);

	/* At this point we know we have a real path component. */
	for(;;) {
		unsigned long hash;
		struct qstr this;
		unsigned int c;

		nd->flags |= LOOKUP_CONTINUE;

		/*檢查目錄的權限*/
		err = exec_permission_lite(inode);
 		if (err)
			break;

		this.name = name;
		c = *(const unsigned char *)name;

		hash = init_name_hash();
		do {
			name++;
			hash = partial_name_hash(c, hash);
			c = *(const unsigned char *)name;
		} while (c && (c != '/'));
		this.len = name - (const char *) this.name;
		this.hash = end_name_hash(hash);
		/*第一次循環時,this.name=”dev/console”,this.len=3
		*第二次循環時,this.name=”console”,this.len=7
		*/
		
		/* remove trailing slashes? */
		/*第二次循環時,因爲已查找完畢,會從此處跳走。第一次循環時會繼續往下走*/
	    if (!c)
			goto last_component;
		while (*++name == '/');
		if (!*name)
		goto last_with_slashes;
		/*
		 * "." and ".." are special - ".." especially so because it has
		 * to be able to know about the current root directory and
		 * parent relationships.
		 */
		if (this.name[0] == '.') switch (this.len) {
			default:
				break;
			case 2:	
				if (this.name[1] != '.')
					break;
				follow_dotdot(nd);
				inode = nd->path.dentry->d_inode;
				/* fallthrough */
			case 1:
				continue;
		}
		/*
		 * See if the low-level filesystem might want
		 * to use its own hash..
		 */
		if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
			err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
							    &this);
			if (err < 0)
				break;
		}
		/* This does the actual lookups.. */
		/*從dentry_hashtable表中查找”dev”目錄的目錄項,
		*因爲之前已建立”dev”目錄,並將其插入到了dentry_hashtable表中
		*所以可以找到”dev”的目錄項。找到後,將其保存在path結構的next變量中,
		*do_lookup的具體代碼,這裏不再分析
		*/
		err = do_lookup(nd, &this, &next);
		if (err)
			break;

		err = -ENOENT;
		/*獲取”dev”目錄對應的inode結構*/
		inode = next.dentry->d_inode;
		if (!inode)
			goto out_dput;
		/*爲空*/
		if (inode->i_op->follow_link) {
			err = do_follow_link(&next, nd);
			if (err)
				goto return_err;
			err = -ENOENT;
			inode = nd->path.dentry->d_inode;
			if (!inode)
				break;
		} else
		/*將path結構的next變量中的內容賦給nameidata結構的nd變量
                 *即把起始目錄改爲”dev”目錄,第二次循環時的父目錄就是”dev”目錄.
                 *這樣,就實現每循環一次,目錄就深入一次。
		 */
			path_to_nameidata(&next, nd);
		err = -ENOTDIR; 
		if (!inode->i_op->lookup)
			break;
		/*第一次循環結束*/
		continue;
		/* here ends the main loop */

last_with_slashes:
		lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
last_component:
		/*第二次循環會跳到此處*/
		/* Clear LOOKUP_CONTINUE iff it was previously unset */
		nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
		/*因爲設置了LOOKUP_PARENT,即只需要獲取父目錄的信息,所以
		*跳到 lookup_parent處
		*/
		if (lookup_flags & LOOKUP_PARENT)
			goto lookup_parent;
		if (this.name[0] == '.') switch (this.len) {
			default:
				break;
			case 2:	
				if (this.name[1] != '.')
					break;
				follow_dotdot(nd);
				inode = nd->path.dentry->d_inode;
				/* fallthrough */
			case 1:
				goto return_reval;
		}
		if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
			err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
							    &this);
			if (err < 0)
				break;
		}
		err = do_lookup(nd, &this, &next);
		if (err)
			break;
		inode = next.dentry->d_inode;
		if (follow_on_final(inode, lookup_flags)) {
			err = do_follow_link(&next, nd);
			if (err)
				goto return_err;
			inode = nd->path.dentry->d_inode;
		} else
			path_to_nameidata(&next, nd);
		err = -ENOENT;
		if (!inode)
			break;
		if (lookup_flags & LOOKUP_DIRECTORY) {
			err = -ENOTDIR; 
			if (!inode->i_op->lookup)
				break;
		}
		goto return_base;
lookup_parent:
		/*將最後的”console”等信息存放在nd->last中*/
		nd->last = this;
		nd->last_type = LAST_NORM;
		if (this.name[0] != '.')
			goto return_base;
		if (this.len == 1)
			nd->last_type = LAST_DOT;
		else if (this.len == 2 && this.name[1] == '.')
			nd->last_type = LAST_DOTDOT;
		else
			goto return_base;
return_reval:
		/*
		 * We bypassed the ordinary revalidation routines.
		 * We may need to check the cached dentry for staleness.
		 */
		if (nd->path.dentry && nd->path.dentry->d_sb &&
		    (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
			err = -ESTALE;
			/* Note: we do not d_invalidate() */
			if (!nd->path.dentry->d_op->d_revalidate(
					nd->path.dentry, nd))
				break;
		}
return_base:
		return 0;
out_dput:
		path_put_conditional(&next, nd);
		break;
	}
	path_put(&nd->path);
return_err:
	return err;
}
該函數循環兩次,最後將父目錄”dev”的信息存放在nd->path中,”console”等要創建的文件信息放在nd->last中。之後一路返回主函數,接着調用lookup_create(),查找是否存在所要創建的文件對應的dentry,沒有則新建dentry結構,並初始化,具體見上篇文章。

不同點2.vfs_mknod
因爲設置了S_IFCHR,並分配了設備號,所以最後要調用vfs_mknod.創建設備文件對應的inode結構。
fs/ramfs/inode.c
static int
ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
{
	struct inode * inode = ramfs_get_inode(dir->i_sb, mode, dev);
	int error = -ENOSPC;

	if (inode) {
		if (dir->i_mode & S_ISGID) {
			inode->i_gid = dir->i_gid;
			if (S_ISDIR(mode))
				inode->i_mode |= S_ISGID;
		}
		/*建立目錄項和索引節點的關聯*/
		d_instantiate(dentry, inode);
		dget(dentry);	/* Extra count - pin the dentry in core */
		error = 0;
		dir->i_mtime = dir->i_ctime = CURRENT_TIME;
	}
	return error;
}
struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)
{
	struct inode * inode = new_inode(sb);

	if (inode) {
		inode->i_mode = mode;
		inode->i_uid = current_fsuid();
		inode->i_gid = current_fsgid();
		inode->i_mapping->a_ops = &ramfs_aops;
		inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
		mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
		mapping_set_unevictable(inode->i_mapping);
		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
		switch (mode & S_IFMT) {
		default:
			init_special_inode(inode, mode, dev);
			break;
		case S_IFREG:
			inode->i_op = &ramfs_file_inode_operations;
			inode->i_fop = &ramfs_file_operations;
			break;
		case S_IFDIR:
			inode->i_op = &ramfs_dir_inode_operations;
			inode->i_fop = &simple_dir_operations;
			/* directory inodes start off with i_nlink == 2 (for "." entry) */
			inc_nlink(inode);
			break;
		case S_IFLNK:
			inode->i_op = &page_symlink_inode_operations;
			break;
		}
	}
	return inode;
}
這裏需要講解的就只有init_special_inode(),但也很簡單。
void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
	/*設置mode,open時,會根據該mode判斷是否是字符設備*/
	inode->i_mode = mode;
	if (S_ISCHR(mode)) {
        /*設置文件操作函數集和設備號*/
		inode->i_fop = &def_chr_fops;
		inode->i_rdev = rdev;
	} else if (S_ISBLK(mode)) {
		inode->i_fop = &def_blk_fops;
		inode->i_rdev = rdev;
	} else if (S_ISFIFO(mode))
		inode->i_fop = &def_fifo_fops;
	else if (S_ISSOCK(mode))
		inode->i_fop = &bad_sock_fops;
	else
		printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
				  " inode %s:%lu\n", mode, inode->i_sb->s_id,
				  inode->i_ino);
}
最後看下所設置的文件操作函數集裏的具體內容,
const struct file_operations def_chr_fops = {
	.open = chrdev_open,
};
可以找到chrdev_open函數。該函數以後編寫和分析字符設備時,再詳細分析下。
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章