設備節點創建過程源代碼分析

閱讀本文需要linux文件系統基礎知識。以下引用的kernel源代碼,都是基於linux kernel源代碼版本:3.4。


以下分析基於下面的假設:

根設備使用SD卡設備(塊設備),根文件系統使用ext4文件系統。


linux kernel在初始化的最後階段,會加載“根文件系統”,按照前面的假設,也就是加載一個ext4文件系統作爲根文件系統,這個文件系統位於SD卡上。

在加載這個根文件系統前,kernel會先加載一個虛擬的根文件系統,名叫rootfs文件系統,這個rootfs文件系統纔是kernel真正意義上mount的第一個文件系統。它是基於內存的文件系統(並沒有對應一個非易失性存儲設備,而像ext, fat這些文件系統都會對應一個外部存儲設備)。

這個rootfs 文件系統會mount到自己的根目錄 “/” 上,mount完畢後會將自己的根目錄設爲進程的根目錄。現在假設這個rootfs文件系統已經mount好了(暫不分析rootfs文件系統的mount過程)。

kernel在安裝根文件系統前會先安裝根設備節點:/dev/root。ext4根文件系統必須存在於一個根設備上,比如硬盤,SD/MMC卡。根設備節點用來描述指定的根設備。


kernel在kernel_init函數中會調用prepare_namespace函數(init/do_mounts.c),prepare_namespace函數先從啓動參數中獲得根設備名稱,保存在root_device_name中,這裏名稱就是/dev/mmcblk0p1。 

調用mount_root掛載根文件系統。在mount_root函數中就會創建設備節點了。

void __init prepare_namespace(void)
{
	int is_floppy;

	if (root_delay) {
		printk(KERN_INFO "Waiting %dsec before mounting root device...\n",
		       root_delay);
		ssleep(root_delay);
	}

	/*
	 * wait for the known devices to complete their probing
	 *
	 * Note: this is a potential source of long boot delays.
	 * For example, it is not atypical to wait 5 seconds here
	 * for the touchpad of a laptop to initialize.
	 */
	wait_for_device_probe();

	md_run_setup();

	if (saved_root_name[0]) {
		root_device_name = saved_root_name;
		if (!strncmp(root_device_name, "mtd", 3) ||
		    !strncmp(root_device_name, "ubi", 3)) {
			mount_block_root(root_device_name, root_mountflags);
			goto out;
		}
        printk(KERN_INFO "-----ROOT_DEV_NAME----: %s\n", root_device_name);
		ROOT_DEV = name_to_dev_t(root_device_name);                    //拿到設備名稱後調用name_to_dev_t轉換爲設備號;
		if (strncmp(root_device_name, "/dev/", 5) == 0)
			root_device_name += 5;                                 //跳過 ‘/dev/’;
	}

	if (initrd_load())
		goto out;

	/* wait for any asynchronous scanning to complete */
	if ((ROOT_DEV == 0) && root_wait) {
		printk(KERN_INFO "Waiting for root device %s...\n",
			saved_root_name);
		while (driver_probe_done() != 0 ||
			(ROOT_DEV = name_to_dev_t(saved_root_name)) == 0)
			msleep(100);
		async_synchronize_full();
	}

	is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR;

	if (is_floppy && rd_doload && rd_load_disk(0))
		ROOT_DEV = Root_RAM0;

	mount_root();                       //安裝根文件系統;
out:
	devtmpfs_mount("dev");
	sys_mount(".", "/", NULL, MS_MOVE, NULL);
	sys_chroot((const char __user __force *)".");
}

mount_root():    (init/do_mounts.c)

void __init mount_root(void)
{
#ifdef CONFIG_ROOT_NFS
	if (ROOT_DEV == Root_NFS) {
		if (mount_nfs_root())
			return;

		printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
		ROOT_DEV = Root_FD0;
	}
#endif
#ifdef CONFIG_BLK_DEV_FD
	if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
		/* rd_doload is 2 for a dual initrd/ramload setup */
		if (rd_doload==2) {
			if (rd_load_disk(1)) {
				ROOT_DEV = Root_RAM1;
				root_device_name = NULL;
			}
		} else
			change_floppy("root floppy");
	}
#endif
#ifdef CONFIG_BLOCK
    
	create_dev("/dev/root", ROOT_DEV);            //創建根設備節點;
 
	mount_block_root("/dev/root", root_mountflags);
#endif
}

這個函數在create_dev函數中創建根設備節點。create_dev函數實際調用了mknod系統調用對應的內核函數sys_mknod。

參數爲: name = "/dev/root", mode = S_IFBLK|0600,dev = xxx;

static inline int create_dev(char *name, dev_t dev)
{
	sys_unlink(name);
	return sys_mknod(name, S_IFBLK|0600, new_encode_dev(dev));
}

SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
{
	return sys_mknodat(AT_FDCWD, filename, mode, dev);
}

最後實際是調用sys_mknodat函數(fs/namei.c),SYSCALL_DEFINE4是函數定義的宏,展開後就變成sys_mknodat函數。

sys_mknodat函數調用user_path_create得到要創建節點的path,節點父目錄的信息保存在path中,然後調用vfs_mknod創建節點。

SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
		unsigned, dev)
{
	struct dentry *dentry;
	struct path path;
	int error;

	if (S_ISDIR(mode))
		return -EPERM;

	dentry = user_path_create(dfd, filename, &path, 0);
	if (IS_ERR(dentry))
		return PTR_ERR(dentry);

	if (!IS_POSIXACL(path.dentry->d_inode))
		mode &= ~current_umask();
	error = may_mknod(mode);
	if (error)
		goto out_dput;
	error = mnt_want_write(path.mnt);
	if (error)
		goto out_dput;
	error = security_path_mknod(&path, dentry, mode, dev);
	if (error)
		goto out_drop_write;
	switch (mode & S_IFMT) {
		case 0: case S_IFREG:
			error = vfs_create(path.dentry->d_inode,dentry,mode,NULL);
			break;
		case S_IFCHR: case S_IFBLK:
			error = vfs_mknod(path.dentry->d_inode,dentry,mode,
					new_decode_dev(dev));
			break;
		case S_IFIFO: case S_IFSOCK:
			error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
			break;
	}
out_drop_write:
	mnt_drop_write(path.mnt);
out_dput:
	dput(dentry);
	mutex_unlock(&path.dentry->d_inode->i_mutex);
	path_put(&path);

	return error;
}

user_path_create:   (fs/namei.c)


簡單的調用了kern_path_create函數後返回。此時參數分別爲dfd = AT_FDCWD,tmp = "/dev/root",path爲輸出參數,is_dir = 0;

struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir)
{
	char *tmp = getname(pathname);
	struct dentry *res;
	if (IS_ERR(tmp))
		return ERR_CAST(tmp);
	res = kern_path_create(dfd, tmp, path, is_dir);
	putname(tmp);
	return res;
}


kern_path_create函數先調用do_path_lookup搜索路徑,搜索方式是搜索最後一個目錄項的父目錄。然後再調用lookup_hash搜索目錄最後一項,也就是root項。

struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, int is_dir)
{
	struct dentry *dentry = ERR_PTR(-EEXIST);
	struct nameidata nd;
	int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
	if (error)
		return ERR_PTR(error);

	/*
	 * Yucky last component or no last component at all?
	 * (foo/., foo/.., /////)
	 */
	if (nd.last_type != LAST_NORM)
		goto out;
	nd.flags &= ~LOOKUP_PARENT;
	nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
	nd.intent.open.flags = O_EXCL;

	/*
	 * Do the final lookup.
	 */
	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
	dentry = lookup_hash(&nd);
	if (IS_ERR(dentry))
		goto fail;

	if (dentry->d_inode)
		goto eexist;
	/*
	 * Special case - lookup gave negative, but... we had foo/bar/
	 * From the vfs_mknod() POV we just have a negative dentry -
	 * all is fine. Let's be bastards - you had / on the end, you've
	 * been asking for (non-existent) directory. -ENOENT for you.
	 */
	if (unlikely(!is_dir && nd.last.name[nd.last.len])) {
		dput(dentry);
		dentry = ERR_PTR(-ENOENT);
		goto fail;
	}
	*path = nd.path;
	return dentry;
eexist:
	dput(dentry);
	dentry = ERR_PTR(-EEXIST);
fail:
	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
out:
	path_put(&nd.path);
	return dentry;
}

do_path_lookup函數如下(fs/namei.c)。

調用path_lookupat進行搜索。加上LOOKUP_RCU參數。vfs文件系統路徑搜索主要有兩種mode,rcu-walk和 ref-walk。 這兩種方式在源代碼目錄 Documentation/filesystems/path-lookup.txt 的文檔中有詳細說明。

static int do_path_lookup(int dfd, const char *name,
				unsigned int flags, struct nameidata *nd)
{
	int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
	if (unlikely(retval == -ECHILD))
		retval = path_lookupat(dfd, name, flags, nd);
	if (unlikely(retval == -ESTALE))
		retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);

	if (likely(!retval)) {
		if (unlikely(!audit_dummy_context())) {
			if (nd->path.dentry && nd->inode)
				audit_inode(name, nd->path.dentry);
		}
	}
	return retval;
}

path_lookupat函數先初始化路徑,可以看作將設置路徑的根(搜索的起點)。VFS中目錄項是用dentry和inode描述的,這裏可以看作設置根路徑對應的dentry。


static int path_lookupat(int dfd, const char *name,
				unsigned int flags, struct nameidata *nd)
{
	struct file *base = NULL;
	struct path path;
	int err;

	/*
	 * Path walking is largely split up into 2 different synchronisation
	 * schemes, rcu-walk and ref-walk (explained in
	 * Documentation/filesystems/path-lookup.txt). These share much of the
	 * path walk code, but some things particularly setup, cleanup, and
	 * following mounts are sufficiently divergent that functions are
	 * duplicated. Typically there is a function foo(), and its RCU
	 * analogue, foo_rcu().
	 *
	 * -ECHILD is the error number of choice (just to avoid clashes) that
	 * is returned if some aspect of an rcu-walk fails. Such an error must
	 * be handled by restarting a traditional ref-walk (which will always
	 * be able to complete).
	 */
	err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);      //找到搜索的起點,保存在nd中;

	if (unlikely(err))
		return err;

	current->total_link_count = 0;
	err = link_path_walk(name, nd);                                   //從起點開始路徑的搜索,其中nd用來返回搜索結果;

	if (!err && !(flags & LOOKUP_PARENT)) {
		err = lookup_last(nd, &path);
		while (err > 0) {
			void *cookie;
			struct path link = path;
			nd->flags |= LOOKUP_PARENT;
			err = follow_link(&link, nd, &cookie);
			if (!err)
				err = lookup_last(nd, &path);
			put_link(nd, &link, cookie);
		}
	}

	if (!err)
		err = complete_walk(nd);

	if (!err && nd->flags & LOOKUP_DIRECTORY) {
		if (!nd->inode->i_op->lookup) {
			path_put(&nd->path);
			err = -ENOTDIR;
		}
	}

	if (base)
		fput(base);

	if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
		path_put(&nd->root);
		nd->root.mnt = NULL;
	}
	return err;
}


主要調用set_root_rcu設置根路徑,將根路徑保存在nd->root。

static int path_init(int dfd, const char *name, unsigned int flags,
		     struct nameidata *nd, struct file **fp)
{
	int retval = 0;
	int fput_needed;
	struct file *file;

	nd->last_type = LAST_ROOT; /* if there are only slashes... */
	nd->flags = flags | LOOKUP_JUMPED;
	nd->depth = 0;
	if (flags & LOOKUP_ROOT) {
		struct inode *inode = nd->root.dentry->d_inode;
		if (*name) {
			if (!inode->i_op->lookup)
				return -ENOTDIR;
			retval = inode_permission(inode, MAY_EXEC);
			if (retval)
				return retval;
		}
		nd->path = nd->root;
		nd->inode = inode;
		if (flags & LOOKUP_RCU) {
			br_read_lock(vfsmount_lock);
			rcu_read_lock();
			nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
		} else {
			path_get(&nd->path);
		}
		return 0;
	}

	nd->root.mnt = NULL;

	if (*name=='/') {
		if (flags & LOOKUP_RCU) {
			br_read_lock(vfsmount_lock);
			rcu_read_lock();                     
			set_root_rcu(nd);                       //設置根路徑
		} else {
			set_root(nd);
			path_get(&nd->root);
		}
		nd->path = nd->root;                            //搜索的初始路徑設爲根路徑
	} else if (dfd == AT_FDCWD) {
		if (flags & LOOKUP_RCU) {
			struct fs_struct *fs = current->fs;
			unsigned seq;

			br_read_lock(vfsmount_lock);
			rcu_read_lock();

			do {
				seq = read_seqcount_begin(&fs->seq);
				nd->path = fs->pwd;
				nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
			} while (read_seqcount_retry(&fs->seq, seq));
		} else {
			get_fs_pwd(current->fs, &nd->path);
		}
	} else {
		struct dentry *dentry;

		file = fget_raw_light(dfd, &fput_needed);
		retval = -EBADF;
		if (!file)
			goto out_fail;

		dentry = file->f_path.dentry;

		if (*name) {
			retval = -ENOTDIR;
			if (!S_ISDIR(dentry->d_inode->i_mode))
				goto fput_fail;

			retval = inode_permission(dentry->d_inode, MAY_EXEC);
			if (retval)
				goto fput_fail;
		}

		nd->path = file->f_path;
		if (flags & LOOKUP_RCU) {
			if (fput_needed)
				*fp = file;
			nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
			br_read_lock(vfsmount_lock);
			rcu_read_lock();
		} else {
			path_get(&file->f_path);
			fput_light(file, fput_needed);
		}
	}

	nd->inode = nd->path.dentry->d_inode;             //設置根目錄的inode
	return 0;

fput_fail:
	fput_light(file, fput_needed);
out_fail:
	return retval;
}


用當前進程的根路徑設置nd->root。

static __always_inline void set_root_rcu(struct nameidata *nd)
{
	if (!nd->root.mnt) {
		struct fs_struct *fs = current->fs;
		unsigned seq;

		do {
			seq = read_seqcount_begin(&fs->seq);
			nd->root = fs->root;                                         //設置nd的root爲當前進程fs的root;
			nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
		} while (read_seqcount_retry(&fs->seq, seq));
	}
}


path_init返回後,調用link_path_walk正式開始搜索,搜索的結果會保存在struct nameidata結構體nd中。


struct nameidata {
	struct path	path;
	struct qstr	last;                                         //保存最後一個目錄項
	struct path	root;
	struct inode	*inode; /* path.dentry.d_inode */
	unsigned int	flags;
	unsigned	seq;
	int		last_type;
	unsigned	depth;
	char *saved_names[MAX_NESTED_LINKS + 1];

	/* Intent data */
	union {
		struct open_intent open;
	} intent;
};

如果路徑名以‘/’開頭,就把它跳過去,因爲在這種情況下nd->path已經指向本進程的根目錄了。如果路徑名中僅僅包含‘/’字符的話,那麼其搜索目標就是根目錄,所以任務完成。

調用hash_name,hash_name函數計算第一個目錄項的hash值並返回目錄項長度,對於本例中的/dev/root,第一個目錄項是dev,長度爲3。用name和len構造struct qstr結構體。若發現是最後一個目錄項,就將struct qstr賦給nd->last, 然後直接返回,若不是最後一項,就調用walk_component搜索這個目錄項。

static int link_path_walk(const char *name, struct nameidata *nd)
{
	struct path next;
	int err;
	
	while (*name=='/')
		name++;
	if (!*name)
		return 0;

	/* At this point we know we have a real path component. */
	for(;;) {
		struct qstr this;
		long len;
		int type;

		err = may_lookup(nd);
 		if (err)
			break;

		len = hash_name(name, &this.hash);
		this.name = name;
		this.len = len;

		type = LAST_NORM;
		if (name[0] == '.') switch (len) {
			case 2:
				if (name[1] == '.') {
					type = LAST_DOTDOT;
					nd->flags |= LOOKUP_JUMPED;
				}
				break;
			case 1:
				type = LAST_DOT;
		}
		if (likely(type == LAST_NORM)) {
			struct dentry *parent = nd->path.dentry;
			nd->flags &= ~LOOKUP_JUMPED;
			if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
				err = parent->d_op->d_hash(parent, nd->inode,
							   &this);
				if (err < 0)
					break;
			}
		}

		if (!name[len])                                                   //若是最後一項,就直接返回;
			goto last_component;
		/*
		 * If it wasn't NUL, we know it was '/'. Skip that
		 * slash, and continue until no more slashes.
		 */
		do {
			len++;
		} while (unlikely(name[len] == '/'));
		if (!name[len])
			goto last_component;

                name += len;                        //運行到這裏,表示當前節點爲中間節點;                        

		err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);      //搜索目錄項;
		if (err < 0)
			return err;

		if (err) {
			err = nested_symlink(&next, nd);
			if (err)
				return err;
		}
		if (can_lookup(nd->inode))
			continue;
		err = -ENOTDIR; 
		break;
		/* here ends the main loop */

last_component:
		nd->last = this;
		nd->last_type = type;
		return 0;
	}
	terminate_walk(nd);
	return err;
}

權限檢查。

static inline int may_lookup(struct nameidata *nd)
{
	if (nd->flags & LOOKUP_RCU) {
		int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
		if (err != -ECHILD)
			return err;
		if (unlazy_walk(nd, NULL))
			return -ECHILD;
	}
	return inode_permission(nd->inode, MAY_EXEC);
}

hash_name返回第一個目錄項的長度。

static inline unsigned long hash_name(const char *name, unsigned int *hashp)
{
	unsigned long hash = init_name_hash();               //    #define init_name_hash()        0;
	unsigned long len = 0, c;

	c = (unsigned char)*name;
	do {
		len++;
		hash = partial_name_hash(c, hash);           // 計算並返回哈希值;
		c = (unsigned char)name[len];
	} while (c && c != '/');
	*hashp = end_name_hash(hash);                        // return hash;
	return len;
}


計算hash值;

static inline unsigned long
partial_name_hash(unsigned long c, unsigned long prevhash)
{
	return (prevhash + (c << 4) + (c >> 4)) * 11;
}


walk_component:  (fs/namei.c)

調用do_lookup函數搜索目錄項的inode。如果找不到inode節點,則終止搜索。如果這個inode節點被mount上另一個文件系統,更新搜索路徑。

static inline int walk_component(struct nameidata *nd, struct path *path,
		struct qstr *name, int type, int follow)
{
	struct inode *inode;
	int err;
	/*
	 * "." and ".." are special - ".." especially so because it has
	 * to be able to know about the current root directory and
	 * parent relationships.
	 */
	if (unlikely(type != LAST_NORM))
		return handle_dots(nd, type);
	err = do_lookup(nd, name, path, &inode);
	if (unlikely(err)) {
		terminate_walk(nd);
		return err;
	}
	if (!inode) {
		path_to_nameidata(path, nd);
		terminate_walk(nd);
		return -ENOENT;
	}
	if (should_follow_link(inode, follow)) {
		if (nd->flags & LOOKUP_RCU) {
			if (unlikely(unlazy_walk(nd, path->dentry))) {
				terminate_walk(nd);
				return -ECHILD;
			}
		}
		BUG_ON(inode != path->dentry->d_inode);
		return 1;
	}
	path_to_nameidata(path, nd);                                              //將path的內容轉化到nd中;
	nd->inode = inode;
	return 0;
}


do_lookup: (fs/namei.c)  做實際的路徑搜索工作。

do_lookup先獲取要搜索目錄項的父目錄parent,根據LOOPUP_RCU標誌,調用__d_lookup_rcu或者__d_lookup。假設沒有LOOKUP_RCU標誌,則進入__d_lookup函數,若找到則__d_lookup返回找到的dentry,用找到的dentry給path->dentry賦值,將dentry對應的dentry->d_inode賦給輸出參數inode,inode有可能爲NULL。

static int do_lookup(struct nameidata *nd, struct qstr *name,
			struct path *path, struct inode **inode)
{
	struct vfsmount *mnt = nd->path.mnt;
	struct dentry *dentry, *parent = nd->path.dentry;
	int need_reval = 1;
	int status = 1;
	int err;

	/*
	 * Rename seqlock is not required here because in the off chance
	 * of a false negative due to a concurrent rename, we're going to
	 * do the non-racy lookup, below.
	 */
	if (nd->flags & LOOKUP_RCU) {
		unsigned seq;
		*inode = nd->inode;
		dentry = __d_lookup_rcu(parent, name, &seq, inode);
		if (!dentry)
			goto unlazy;

		/* Memory barrier in read_seqcount_begin of child is enough */
		if (__read_seqcount_retry(&parent->d_seq, nd->seq))
			return -ECHILD;
		nd->seq = seq;

		if (unlikely(d_need_lookup(dentry)))
			goto unlazy;
		if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
			status = d_revalidate(dentry, nd);
			if (unlikely(status <= 0)) {
				if (status != -ECHILD)
					need_reval = 0;
				goto unlazy;
			}
		}
		path->mnt = mnt;
		path->dentry = dentry;
		if (unlikely(!__follow_mount_rcu(nd, path, inode)))
			goto unlazy;
		if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
			goto unlazy;
		return 0;
unlazy:
		if (unlazy_walk(nd, dentry))
			return -ECHILD;
	} else {
		dentry = __d_lookup(parent, name);
	}

	if (unlikely(!dentry))
		goto need_lookup;

	if (unlikely(d_need_lookup(dentry))) {
		dput(dentry);
		goto need_lookup;
	}

	if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
		status = d_revalidate(dentry, nd);
	if (unlikely(status <= 0)) {
		if (status < 0) {
			dput(dentry);
			return status;
		}
		if (!d_invalidate(dentry)) {
			dput(dentry);
			goto need_lookup;
		}
	}
done:
	path->mnt = mnt;
	path->dentry = dentry;
	err = follow_managed(path, nd->flags);
	if (unlikely(err < 0)) {
		path_put_conditional(path, nd);
		return err;
	}
	if (err)
		nd->flags |= LOOKUP_JUMPED;
	*inode = path->dentry->d_inode;
	return 0;

need_lookup:
	BUG_ON(nd->inode != parent->d_inode);

	mutex_lock(&parent->d_inode->i_mutex);
	dentry = __lookup_hash(name, parent, nd);
	mutex_unlock(&parent->d_inode->i_mutex);
	if (IS_ERR(dentry))
		return PTR_ERR(dentry);
	goto done;
}

__d_lookup: (fs/dcache.c)

在內存中尋找該節點已經建立的dentry結構,內核中有個hash表dentry_hashtable,是一個struct hlist_bl_head結構的指針數組,每一個struct hlist_bl_head結構包含有一個隊列頭指針struct hlist_bl_node*。

一旦在內存中建立起一個目錄節點的dentry結構,就根據其節點名的hash值和父目錄的dentry值,調用d_hash獲得dentry_hashtable中的一個隊列頭指針,然後將dentry成員變量dentry->d_hash掛入dentry_hashtable表中的這個隊列,需要尋找時會根據hash值查找dentry_hashtable表。

將dentry->d_hash掛入dentry_hashtable的過程在d_rehash函數中完成,後面分析。

這裏__d_lookup先調用d_hash從dentry_hashtable表中獲得這個隊列。遍歷這個隊列,看看隊列中有沒有目錄項的hash值和要搜索的目錄項的hash值相同的,如果有,再比較parent是否是同一個parent,最後調用dentry_cmp進一步比較是否是同一個目錄項,如果是,表示找到了,就增加引用計數並將其返回。如果沒有找到,返回NULL。

struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
{
	unsigned int len = name->len;
	unsigned int hash = name->hash;
	const unsigned char *str = name->name;
	struct hlist_bl_head *b = d_hash(parent, hash);
	struct hlist_bl_node *node;
	struct dentry *found = NULL;
	struct dentry *dentry;

	/*
	 * Note: There is significant duplication with __d_lookup_rcu which is
	 * required to prevent single threaded performance regressions
	 * especially on architectures where smp_rmb (in seqcounts) are costly.
	 * Keep the two functions in sync.
	 */

	/*
	 * The hash list is protected using RCU.
	 *
	 * Take d_lock when comparing a candidate dentry, to avoid races
	 * with d_move().
	 *
	 * It is possible that concurrent renames can mess up our list
	 * walk here and result in missing our dentry, resulting in the
	 * false-negative result. d_lookup() protects against concurrent
	 * renames using rename_lock seqlock.
	 *
	 * See Documentation/filesystems/path-lookup.txt for more details.
	 */
	rcu_read_lock();
	
	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {                           //遍歷散列
		const char *tname;
		int tlen;

		if (dentry->d_name.hash != hash)
			continue;

		spin_lock(&dentry->d_lock);
		if (dentry->d_parent != parent)
			goto next;
		if (d_unhashed(dentry))
			goto next;

		/*
		 * It is safe to compare names since d_move() cannot
		 * change the qstr (protected by d_lock).
		 */
		tlen = dentry->d_name.len;
		tname = dentry->d_name.name;
		if (parent->d_flags & DCACHE_OP_COMPARE) {
			if (parent->d_op->d_compare(parent, parent->d_inode,
						dentry, dentry->d_inode,
						tlen, tname, name))
				goto next;
		} else {
			if (dentry_cmp(tname, tlen, str, len))
				goto next;
		}

		dentry->d_count++;
		found = dentry;
		spin_unlock(&dentry->d_lock);
		break;
next:
		spin_unlock(&dentry->d_lock);
 	}
 	rcu_read_unlock();

 	return found;
}

d_hash爲散列函數,它將parent與hash值相結合,映射到dentry_hashtable表中,返回相應的散列鏈。

static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
					unsigned int hash)
{
	hash += (unsigned long) parent / L1_CACHE_BYTES;
	hash = hash + (hash >> D_HASHBITS);
	return dentry_hashtable + (hash & D_HASHMASK);
}


__d_lookup然後遍歷剛剛得到的隊列上的每個元素。hlist_bl_for_each_entry_rcu實現遍歷。

#define hlist_bl_for_each_entry_rcu(tpos, pos, head, member)		\
	for (pos = hlist_bl_first_rcu(head);				\
		pos &&							\
		({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \
		pos = rcu_dereference_raw(pos->next))

static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h)
{
	return (struct hlist_bl_node *)
		((unsigned long)rcu_dereference(h->first) & ~LIST_BL_LOCKMASK);
}


struct hlist_bl_head {
	struct hlist_bl_node *first;
};

struct hlist_bl_node {
	struct hlist_bl_node *next, **pprev;
};


container_of返回包含d_hash的struct dentry結構體;container_of是linux內核常用的宏,能根據結構體內部成員的指針返回包含這個成員的結構體的指針。

#define hlist_bl_entry(ptr, type, member) container_of(ptr,type,member)


執行到這裏,就層層返回到do_lookup,用找到的dentry給path賦值,接着調用follow_managed函數 (fs/namei.c)。

follow_managed處理一些有特殊用途的目錄項。比如標記爲mount點的目錄項,如果目錄項是一個mount點,那就切換到另一個文件系統,將path->dentry重新設爲新文件系統的根。

最後,do_lookup函數將找到的dentry->d_inode賦值給輸出參數inode,返回。

static int follow_managed(struct path *path, unsigned flags)
{
	struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
	unsigned managed;
	bool need_mntput = false;
	int ret = 0;

	/* Given that we're not holding a lock here, we retain the value in a
	 * local variable for each dentry as we look at it so that we don't see
	 * the components of that value change under us */
	while (managed = ACCESS_ONCE(path->dentry->d_flags),
	       managed &= DCACHE_MANAGED_DENTRY,
	       unlikely(managed != 0)) {
		/* Allow the filesystem to manage the transit without i_mutex
		 * being held. */
		if (managed & DCACHE_MANAGE_TRANSIT) {
			BUG_ON(!path->dentry->d_op);
			BUG_ON(!path->dentry->d_op->d_manage);
			ret = path->dentry->d_op->d_manage(path->dentry, false);
			if (ret < 0)
				break;
		}

		/* Transit to a mounted filesystem. */
		if (managed & DCACHE_MOUNTED) {
			struct vfsmount *mounted = lookup_mnt(path);
			if (mounted) {
				dput(path->dentry);
				if (need_mntput)
					mntput(path->mnt);
				path->mnt = mounted;
				path->dentry = dget(mounted->mnt_root);
				need_mntput = true;
				continue;
			}

			/* Something is mounted on this dentry in another
			 * namespace and/or whatever was mounted there in this
			 * namespace got unmounted before we managed to get the
			 * vfsmount_lock */
		}

		/* Handle an automount point */
		if (managed & DCACHE_NEED_AUTOMOUNT) {
			ret = follow_automount(path, flags, &need_mntput);
			if (ret < 0)
				break;
			continue;
		}

		/* We didn't change the current path point */
		break;
	}

	if (need_mntput && path->mnt == mnt)
		mntput(path->mnt);
	if (ret == -EISDIR)
		ret = 0;
	return ret < 0 ? ret : need_mntput;
}
返回到walk_component函數,檢查inode是不是NULL,如果是則終止搜索。

調用path_to_nameidata函數,path_to_nameidata函數將path的內容轉化到nd裏面去。將找到的inode節點賦給nd->inode。

static inline void path_to_nameidata(const struct path *path,
					struct nameidata *nd)
{
	if (!(nd->flags & LOOKUP_RCU)) {
		dput(nd->path.dentry);
		if (nd->path.mnt != path->mnt)
			mntput(nd->path.mnt);
	}
	nd->path.mnt = path->mnt;
	nd->path.dentry = path->dentry;
}


返回到link_path_walk函數,繼續循環搜索下一個目錄項。若發現是最後一個目錄項,就將struct qstr this賦給nd->last, 然後直接返回到path_lookupat函數。

如果link_path_walk返回0,調用complete_walk函數完成搜索 (fs/namei.c)。


static int complete_walk(struct nameidata *nd)
{
	struct dentry *dentry = nd->path.dentry;
	int status;

	if (nd->flags & LOOKUP_RCU) {
		nd->flags &= ~LOOKUP_RCU;
		if (!(nd->flags & LOOKUP_ROOT))
			nd->root.mnt = NULL;
		spin_lock(&dentry->d_lock);
		if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
			spin_unlock(&dentry->d_lock);
			rcu_read_unlock();
			br_read_unlock(vfsmount_lock);
			return -ECHILD;
		}
		BUG_ON(nd->inode != dentry->d_inode);
		spin_unlock(&dentry->d_lock);
		mntget(nd->path.mnt);
		rcu_read_unlock();
		br_read_unlock(vfsmount_lock);
	}

	if (likely(!(nd->flags & LOOKUP_JUMPED)))
		return 0;

	if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
		return 0;

	if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
		return 0;

	/* Note: we do not d_invalidate() */
	status = d_revalidate(dentry, nd);
	if (status > 0)
		return 0;

	if (!status)
		status = -ESTALE;

	path_put(&nd->path);
	return status;
}

層層返回到kern_path_create,調用lookup_hash函數 (fs/namei.c)進行最後一個目錄項的搜索。


static struct dentry *lookup_hash(struct nameidata *nd)
{
	return __lookup_hash(&nd->last, nd->path.dentry, nd);
}

調用__lookup_hash函數進行搜索,__lookup_hash調用lookup_dcache搜索dcache,若找不到則lookup_real調用對應文件系統的lookup函數去查找。

static struct dentry *__lookup_hash(struct qstr *name,
		struct dentry *base, struct nameidata *nd)
{
	bool need_lookup;
	struct dentry *dentry;

	dentry = lookup_dcache(name, base, nd, &need_lookup);
	if (!need_lookup)
		return dentry;

	return lookup_real(base->d_inode, dentry, nd);
}

調用d_lookup搜索。

static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
				    struct nameidata *nd, bool *need_lookup)
{
	struct dentry *dentry;
	int error;

	*need_lookup = false;
	dentry = d_lookup(dir, name);
	if (dentry) {
		if (d_need_lookup(dentry)) {
			*need_lookup = true;
		} else if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
			error = d_revalidate(dentry, nd);
			if (unlikely(error <= 0)) {
				if (error < 0) {
					dput(dentry);
					return ERR_PTR(error);
				} else if (!d_invalidate(dentry)) {
					dput(dentry);
					dentry = NULL;
				}
			}
		}
	}

	if (!dentry) {
		dentry = d_alloc(dir, name);
		if (unlikely(!dentry))
			return ERR_PTR(-ENOMEM);

		*need_lookup = true;
	}
	return dentry;
}

d_lookup進一步調用__d_lookup。__d_lookup上面已經分析過了,就是在內存的dentry_hashtable表中查找dentry結構。這裏/dev下的root還沒有創建,所以肯定找不到,返回NULL。層層返回到lookup_dcache,如果d_lookup返回的dentry爲空,則調用d_alloc分配一個dentry對象。

struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
{
	struct dentry *dentry;
	unsigned seq;

        do {
                seq = read_seqbegin(&rename_lock);
                dentry = __d_lookup(parent, name);
                if (dentry)
			break;
	} while (read_seqretry(&rename_lock, seq));
	return dentry;
}

d_alloc: (fs/dcache.c)

調用__d_alloc分配一個dcache對象並初始化。設置dentry的父目錄,並將新的dentry鏈入其父目錄的d_subdirs鏈表中。

struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
{
	struct dentry *dentry = __d_alloc(parent->d_sb, name);
	if (!dentry)
		return NULL;

	spin_lock(&parent->d_lock);
	/*
	 * don't need child lock because it is not subject
	 * to concurrency here
	 */
	__dget_dlock(parent);
	dentry->d_parent = parent;
	list_add(&dentry->d_u.d_child, &parent->d_subdirs);
	spin_unlock(&parent->d_lock);

	return dentry;
}

__d_alloc: (fs/dcache.c)

分配並初始化一個dcache對象。

struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
{
	struct dentry *dentry;
	char *dname;

	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
	if (!dentry)
		return NULL;

	if (name->len > DNAME_INLINE_LEN-1) {
		dname = kmalloc(name->len + 1, GFP_KERNEL);
		if (!dname) {
			kmem_cache_free(dentry_cache, dentry); 
			return NULL;
		}
	} else  {
		dname = dentry->d_iname;
	}	
	dentry->d_name.name = dname;

	dentry->d_name.len = name->len;
	dentry->d_name.hash = name->hash;
	memcpy(dname, name->name, name->len);
	dname[name->len] = 0;

	dentry->d_count = 1;
	dentry->d_flags = 0;
	spin_lock_init(&dentry->d_lock);
	seqcount_init(&dentry->d_seq);
	dentry->d_inode = NULL;
	dentry->d_parent = dentry;
	dentry->d_sb = sb;
	dentry->d_op = NULL;
	dentry->d_fsdata = NULL;
	INIT_HLIST_BL_NODE(&dentry->d_hash);
	INIT_LIST_HEAD(&dentry->d_lru);
	INIT_LIST_HEAD(&dentry->d_subdirs);
	INIT_LIST_HEAD(&dentry->d_alias);
	INIT_LIST_HEAD(&dentry->d_u.d_child);
	d_set_d_op(dentry, dentry->d_sb->s_d_op);

	this_cpu_inc(nr_dentry);
    printk("__d_alloc: dentry name: %s\n", dentry->d_name.name);
	return dentry;
}

返回到lookup_dcache中,將need_lookup置爲真,向上返回到__lookup_hash,__lookup_hash最後調用lookup_real通過文件系統自己的lookup函數從設備讀目錄信息。


lookup_real: (fs/namei.c)

在緩存中無法找到指定的目錄項,那就創建該目錄項。通過底層文件系統的lookup函數從設備讀,lookup在讀目錄項的過程中,也把該目錄項對應的inode結構從磁盤讀出來,並賦值給dentry的d_inode字段。

static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
				  struct nameidata *nd)
{
	struct dentry *old;

	/* Don't create child dentry for a dead directory. */
	if (unlikely(IS_DEADDIR(dir))) {
		dput(dentry);
		return ERR_PTR(-ENOENT);
	}

	old = dir->i_op->lookup(dir, dentry, nd);
	if (unlikely(old)) {
		dput(dentry);
		dentry = old;
	}
	return dentry;
}


simple_lookup: (fs/libfs.c)

struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{
	static const struct dentry_operations simple_dentry_operations = {
		.d_delete = simple_delete_dentry,
	};

	if (dentry->d_name.len > NAME_MAX)
		return ERR_PTR(-ENAMETOOLONG);
	d_set_d_op(dentry, &simple_dentry_operations);
	d_add(dentry, NULL);
	return NULL;
}


d_set_d_op: (fs/dcache.c)

用simple_dentry_operations初始化dentry->d_op。

void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
{
	WARN_ON_ONCE(dentry->d_op);
	WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH	|
				DCACHE_OP_COMPARE	|
				DCACHE_OP_REVALIDATE	|
				DCACHE_OP_DELETE ));
	dentry->d_op = op;
	if (!op)
		return;
	if (op->d_hash)
		dentry->d_flags |= DCACHE_OP_HASH;
	if (op->d_compare)
		dentry->d_flags |= DCACHE_OP_COMPARE;
	if (op->d_revalidate)
		dentry->d_flags |= DCACHE_OP_REVALIDATE;
	if (op->d_delete)
		dentry->d_flags |= DCACHE_OP_DELETE;
	if (op->d_prune)
		dentry->d_flags |= DCACHE_OP_PRUNE;

}



d_add: (include/linux/cache.h)

傳入的inode參數爲NULL。d_instantiate函數將dentry->d_inode置爲NULL。

d_rehash將dentry加入hash表。

static inline void d_add(struct dentry *entry, struct inode *inode)
{
	d_instantiate(entry, inode);
	d_rehash(entry);
}

d_rehash: (fs/dcache.c)

void d_rehash(struct dentry * entry)
{
	spin_lock(&entry->d_lock);
	_d_rehash(entry);
	spin_unlock(&entry->d_lock);
}


_d_rehash: (fs/dcache.c)

static void _d_rehash(struct dentry * entry)
{
	__d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
}


__d_rehash: (fs/dcache.c)

通過hlist_bl_add_head_rcu將dentry鏈入dentry_hashtable表。

static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
{
	BUG_ON(!d_unhashed(entry));
	hlist_bl_lock(b);
	entry->d_flags |= DCACHE_RCUACCESS;
	hlist_bl_add_head_rcu(&entry->d_hash, b);
	hlist_bl_unlock(b);
}

hlist_bl_add_head_rcu: (include/linux/rculist_bl.h)

static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n,
					struct hlist_bl_head *h)
{
	struct hlist_bl_node *first;

	/* don't need hlist_bl_first_rcu because we're under lock */
	first = hlist_bl_first(h);

	n->next = first;
	if (first)
		first->pprev = &n->next;
	n->pprev = &h->first;

	/* need _rcu because we can have concurrent lock free readers */
	hlist_bl_set_first_rcu(h, n);
}



lookup_real返回後,向上一直返回到kern_path_create函數,接着kern_path_create函數檢查返回的dentry->d_inode是否不空,如果不空表示inode節點已經存在了,不需要創建,本例中/dev下的root節點是不存在的(需要創建),因此將nd.path賦值給path後,直接返回dentry。

返回到sys_mknodat函數,執行mnt_want_write,這個函數告訴文件系統將要執行寫操作了,如果不可寫,將會返回錯誤。

如果可寫,調用vfs_mknod函數創建inode節點。


vfs_mknod : (fs/namei.c)

這個函數其實就是調用了要創建節點的父目錄的inode節點的方法i_op->mknod。

int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
{
	int error = may_create(dir, dentry);

	if (error)
		return error;

	if ((S_ISCHR(mode) || S_ISBLK(mode)) &&
	    !ns_capable(inode_userns(dir), CAP_MKNOD))
		return -EPERM;

	if (!dir->i_op->mknod)
		return -EPERM;

	error = devcgroup_inode_mknod(mode, dev);
	if (error)
		return error;

	error = security_inode_mknod(dir, dentry, mode, dev);
	if (error)
		return error;

	error = dir->i_op->mknod(dir, dentry, mode, dev);
	if (!error)
		fsnotify_create(dir, dentry);
	return error;
}

inode節點的方法是創建inode節點時初始化的,這裏也就是/dev節點創建時初始化的,dev節點是在rootfs虛擬文件系統下創建的(前面說過rootfs虛擬文件系統是linux kernel真正意義上掛載的第一個根文件系統),它的i_op被賦值爲ramfs_dir_inode_operations。

(fs/ramfs/inode.c)

static const struct inode_operations ramfs_dir_inode_operations = {
	.create		= ramfs_create,
	.lookup		= simple_lookup,
	.link		= simple_link,
	.unlink		= simple_unlink,
	.symlink	= ramfs_symlink,
	.mkdir		= ramfs_mkdir,
	.rmdir		= simple_rmdir,
	.mknod		= ramfs_mknod,
	.rename		= simple_rename,
};


所以i_op->mknod方法就是調用的ramfs_mknod函數。

ramfs_mknod: (fs/ramfs/inode.c)

ramfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
{
	struct inode * inode = ramfs_get_inode(dir->i_sb, dir, mode, dev);
	int error = -ENOSPC;

	if (inode) {
		d_instantiate(dentry, inode);
		dget(dentry);	/* Extra count - pin the dentry in core */
		error = 0;
		dir->i_mtime = dir->i_ctime = CURRENT_TIME;
	}
	return error;
}

ramfs_mknod函數調用了ramfs_get_inode函數創建一個新的inode。

ramfs_get_inode: (fs/ramfs/inode.c)

struct inode *ramfs_get_inode(struct super_block *sb,
				const struct inode *dir, umode_t mode, dev_t dev)
{
	struct inode * inode = new_inode(sb);

	if (inode) {
		inode->i_ino = get_next_ino();
		inode_init_owner(inode, dir, mode);
		inode->i_mapping->a_ops = &ramfs_aops;
		inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
		mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
		mapping_set_unevictable(inode->i_mapping);
		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
		switch (mode & S_IFMT) {
		default:
			init_special_inode(inode, mode, dev);
			break;
		case S_IFREG:
			inode->i_op = &ramfs_file_inode_operations;
			inode->i_fop = &ramfs_file_operations;
			break;
		case S_IFDIR:
			inode->i_op = &ramfs_dir_inode_operations;
			inode->i_fop = &simple_dir_operations;

			/* directory inodes start off with i_nlink == 2 (for "." entry) */
			inc_nlink(inode);
			break;
		case S_IFLNK:
			inode->i_op = &page_symlink_inode_operations;
			break;
		}
	}
	return inode;
}


調用new_inode分配一個新inode。

new_inode: (fs/inode.c)

new_inode進一步調用new_inode_pseudo函數新建inode。如果創建成功,則將其加入到該文件系統超級塊的inode鏈表。

struct inode *new_inode(struct super_block *sb)
{
	struct inode *inode;

	spin_lock_prefetch(&inode_sb_list_lock);

	inode = new_inode_pseudo(sb);
	if (inode)
		inode_sb_list_add(inode);
	return inode;
}

new_inode_pseudo: (fs/inode.c)

pseudo是“虛擬”的意思,因爲當前是在虛擬根文件系統————rootfs文件系統下創建節點,前面提過rootfs文件系統是基於內存的特殊文件系統,它的inode是隻存在於內存中的。而其它像ext, fat等文件系統的inode節點都存儲在硬盤等外部設備中。

new_inode_pseudo調用alloc_inode創建inode。

struct inode *new_inode_pseudo(struct super_block *sb)
{
	struct inode *inode = alloc_inode(sb);

	if (inode) {
		spin_lock(&inode->i_lock);
		inode->i_state = 0;
		spin_unlock(&inode->i_lock);
		INIT_LIST_HEAD(&inode->i_sb_list);
	}
	return inode;
}

alloc_inode: (fs/inode.c)

如果對應文件系統超級塊的s_op->alloc_inode存在,則調用它,否則調用kmem_cache_alloc函數分配inode。

static struct inode *alloc_inode(struct super_block *sb)
{
	struct inode *inode;

	if (sb->s_op->alloc_inode)
		inode = sb->s_op->alloc_inode(sb);
	else
		inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);

	if (!inode)
		return NULL;

	if (unlikely(inode_init_always(sb, inode))) {
		if (inode->i_sb->s_op->destroy_inode)
			inode->i_sb->s_op->destroy_inode(inode);
		else
			kmem_cache_free(inode_cachep, inode);
		return NULL;
	}

	return inode;
}

在掛載rootfs虛擬根文件系統的時候,對應超級塊的s_op已經被設置爲ramfs_ops了。可以看到,alloc_inode沒有設置,所以調用kmem_cache_alloc函數來分配inode。

static const struct super_operations ramfs_ops = {
	.statfs		= simple_statfs,
	.drop_inode	= generic_delete_inode,
	.show_options	= generic_show_options,
};

inode分配好後就將它返回new_inode_pseudo,再返回到new_inode,接着new_inode函數調用inode_sb_list_add將新的inode加入到文件系統超級塊的inode鏈表中。

返回到ramfs_get_inode。
從new_inode返回後,進行一些初始化工作(包括inode的節點號),然後進入init_special_inode函數。


init_special_inode: (fs/inode.c)

前面create_dev的時候傳進來的參數mode是塊設備,所以這裏將inode節點操作函數設爲def_blk_fops,設備號設爲rdev。

void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
	inode->i_mode = mode;
    	if (S_ISCHR(mode)) {
		inode->i_fop = &def_chr_fops;
		inode->i_rdev = rdev;
	} else if (S_ISBLK(mode)) {
       		inode->i_fop = &def_blk_fops;
		inode->i_rdev = rdev;
	} else if (S_ISFIFO(mode))
		inode->i_fop = &def_fifo_fops;
	else if (S_ISSOCK(mode))
		inode->i_fop = &bad_sock_fops;
	else
		printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
				  " inode %s:%lu\n", mode, inode->i_sb->s_id,
				  inode->i_ino);
}

至此,inode節點就創建好了,向上層層返回到ramfs_get_inode,ramfs_get_inode就將創建好的inode節點直接返回到ramfs_mknod。

ramfs_mknod進一步調用d_instantiate。


d_instantiate: (fs/dcache.c)

d_instantiate函數封裝了__d_instantiate。

void d_instantiate(struct dentry *entry, struct inode * inode)
{
	BUG_ON(!list_empty(&entry->d_alias));
	if (inode)
		spin_lock(&inode->i_lock);
	__d_instantiate(entry, inode);
	if (inode)
		spin_unlock(&inode->i_lock);
	security_d_instantiate(entry, inode);
}

__d_instantiate: (fs/dcache.c)

__d_instantiate將inode與dentry關聯起來。

static void __d_instantiate(struct dentry *dentry, struct inode *inode)
{
	spin_lock(&dentry->d_lock);
	if (inode) {
		if (unlikely(IS_AUTOMOUNT(inode)))
			dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
		list_add(&dentry->d_alias, &inode->i_dentry);
	}
	dentry->d_inode = inode;
	dentry_rcuwalk_barrier(dentry);
	spin_unlock(&dentry->d_lock);
	fsnotify_d_instantiate(dentry, inode);
}

層層返回,至此,/dev/root設備就創建完畢了。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章