Linux 中節點(inode)、設備(cdev)、驅動(ops)如何關聯

inode與cdev結構體對比(片選)

struct inode {
    uid_t    i_uid;
    gid_t    i_gid;
    dev_t    i_rdev;    // 設備文件的設備號
    union {
        struct pipe_inode_info     *i_pipe;
        struct block_device        *i_bdev;
        struct cdev                *i_cdev;		// 如果是字符設備,該指針會被使用到
     };
     const struct inode_operations    *i_op;
     const struct file_operations    *i_fop;    /* former ->i_op->default_file_ops */
};

struct cdev {
    struct kobject kobj;
    struct module *owner;
    const struct file_operations *ops;
    struct list_head list;
    dev_t dev;
    unsigned int count;
};

可能存在的聯繫

  1. inode中struct cdev *i_cdev;可能保存了設備(cdev)的指針
  2. inode\cdev 都有文件操作符:struct file_operations *fops;
  3. inode\cdev 都有設備號信息:dev_t dev;

字符設備驅動代碼(片選)

#define VSER_MAJOR      256
#define VSER_MINOR      0
#define VSER_DEV_CNT    2
#define VSER_DEV_NAME   "vser"

static DEFINE_KFIFO(vsfifo0, char, 32);
static DEFINE_KFIFO(vsfifo1, char, 32);

struct vser_dev {
    struct kfifo *fifo;
    struct cdev cdev;
};

static struct vser_dev vsdev[2];

static int vser_open(struct inode *inode, struct file *filp)
{
    filp->private_data = container_of(inode->i_cdev, struct vser_dev, cdev);
    return 0;
}

static ssize_t vser_read(struct file *filp, char __user *buf, size_t count, loff_t *pos)
{
    ssize_t Ret = 0;
    unsigned int copied = 0;
    struct vser_dev *dev = filp->private_data;

    Ret = kfifo_to_user(dev->fifo, buf, count, &copied);
    if (Ret)
        return Ret;

    return copied;
}

static struct file_operations vser_ops = {
    .owner = THIS_MODULE,
    .open = vser_open,
    .release = vser_release,
    .read = vser_read,
    .write = vser_write,
};

static int __init vser_init(void)
{
    int i;
    int ret;
    dev_t dev;

    dev = MKDEV(VSER_MAJOR, VSER_MINOR);
    ret = register_chrdev_region(dev, VSER_DEV_CNT, VSER_DEV_NAME);
    if (ret)
        goto reg_err;

    for (i = 0; i < VSER_DEV_CNT; i++) {
        cdev_init(&vsdev[i].cdev, &vser_ops);
        vsdev[i].cdev.owner = THIS_MODULE;
        vsdev[i].fifo = i == 0 ? (struct kfifo *) &vsfifo0 : (struct kfifo*)&vsfifo1;

        ret = cdev_add(&vsdev[i].cdev, dev + i, 1);
        if (ret)
            goto add_err;
    }

    return 0;

add_err:
    for (--i; i > 0; --i)
        cdev_del(&vsdev[i].cdev);
    unregister_chrdev_region(dev, VSER_DEV_CNT);
reg_err:
    return ret;
}

module_init(vser_init);
module_exit(vser_exit);

其中

    // 第一個字符設備的設備號dev
    dev = MKDEV(VSER_MAJOR, VSER_MINOR);
    // 連續申請VSER_DEV_CNT個字符設備的設備號,設備名爲VSER_DEV_NAME
    ret = register_chrdev_region(dev, VSER_DEV_CNT, VSER_DEV_NAME);
    ...
    // 增加VSER_DEV_CNT個字符設備到系統中
    for (i = 0; i < VSER_DEV_CNT; i++) {
        // 初始化cdev->ops
        cdev_init(&vsdev[i].cdev, &vser_ops);
        ...
        vsdev[i].fifo = i == 0 ? (struct kfifo *) &vsfifo0 : (struct kfifo*)&vsfifo1;
        ...
        // 初始化cdev->dev, 值爲:dev + i, 僅申請一個內存地址存放該cdev
        ret = cdev_add(&vsdev[i].cdev, dev + i, 1);
    }

register_chrdev_region

int register_chrdev_region(dev_t from, unsigned count, const char *name)
    -> __register_chrdev_region(MAJOR(n), MINOR(n), next - n, name);
#define CHRDEV_MAJOR_HASH_SIZE    255
// 用於保存設備號信息
static struct char_device_struct {
    struct char_device_struct *next;
    unsigned int major;
    unsigned int baseminor;
    int minorct;
    char name[64];
    struct file_operations *fops;
    struct cdev *cdev;        /* will die */
} *chrdevs[CHRDEV_MAJOR_HASH_SIZE];

static struct char_device_struct *
__register_chrdev_region(unsigned int major, unsigned int baseminor,
               int minorct, const char *name)
{
    struct char_device_struct *cd, **cp;
    int ret = 0;
    int i;
    // 申請新的內存空間,存放新設備號信息
    cd = kzalloc(sizeof(struct char_device_struct), GFP_KERNEL);
    if (cd == NULL)
        return ERR_PTR(-ENOMEM);

    mutex_lock(&chrdevs_lock);
    // 對於傳入主設備號major=0的,在chrdevs數組中查找未使用的元素,記錄索引i,作爲動態申請得到的主設備號。
    /* temporary */
    if (major == 0) {
        for (i = ARRAY_SIZE(chrdevs)-1; i > 0; i--) {
            if (chrdevs[i] == NULL)
                break;
        }

        if (i == 0) {
            ret = -EBUSY;
            goto out;
        }
        major = i;
        ret = major;
    }

    cd->major = major;
    cd->baseminor = baseminor;
    cd->minorct = minorct;
    strncpy(cd->name,name, 64);
    // 將主設備號major轉化爲數組索引i, i=major % CHRDEV_MAJOR_HASH_SIZE; 宏 CHRDEV_MAJOR_HASH_SIZE 的值爲255;可以相像,cahrdevs是一個保存了255條鏈表,每條鏈表都是major轉化後hash值相同的設備的設備號信息的集合。因此, major=1, major=256,的兩個設備號存放在同一條鏈表上。
    i = major_to_index(major);
    // 先比較major
    // 同一條鏈表,major從小往大排,所以在鏈表中查找合適的位置,主要關注major
    // 1、當前節點的major大於新節點
    // 2、當前節點的major值與新節點相同並且當前minor也大於新節點
    for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next)
        if ((*cp)->major > major ||
            ((*cp)->major == major &&
             (((*cp)->baseminor >= baseminor) ||
              ((*cp)->baseminor + (*cp)->minorct > baseminor))))
            break;
    // 再比較minor
    /* Check for overlapping minor ranges.  */
    if (*cp && (*cp)->major == major) {
        int old_min = (*cp)->baseminor;
        int old_max = (*cp)->baseminor + (*cp)->minorct - 1;
        int new_min = baseminor;
        int new_max = baseminor + minorct - 1;

        /* New driver overlaps from the left.  */
        if (new_max >= old_min && new_max <= old_max) {
            ret = -EBUSY;
            goto out;
        }

        /* New driver overlaps from the right.  */
        if (new_min <= old_max && new_min >= old_min) {
            ret = -EBUSY;
            goto out;
        }
    }
    // chardevs[major%255]這條鏈表中插入新節點
    cd->next = *cp;
    *cp = cd;
    mutex_unlock(&chrdevs_lock);
    return cd;
out:
    mutex_unlock(&chrdevs_lock);
    kfree(cd);
    return ERR_PTR(ret);
}

cdev_add

int cdev_add(struct cdev *p, dev_t dev, unsigned count)
    kobj_map(cdev_map, dev, count, NULL, exact_match, exact_lock, p);
struct kobj_map {
    struct probe {
        struct probe *next;
        dev_t dev;
        unsigned long range;
        struct module *owner;
        kobj_probe_t *get;
        int (*lock)(dev_t, void *);
        void *data;
    } *probes[255];
    struct mutex *lock;
};

// 全局變量
static struct kobj_map *cdev_map;

int kobj_map(struct kobj_map *domain, dev_t dev, unsigned long range,
         struct module *module, kobj_probe_t *probe,
         int (*lock)(dev_t, void *), void *data)
{
    unsigned n = MAJOR(dev + range - 1) - MAJOR(dev) + 1;
    unsigned index = MAJOR(dev);
    unsigned i;
    struct probe *p;

    if (n > 255)
        n = 255;
    // 申請內存,保存設備探針
    p = kmalloc(sizeof(struct probe) * n, GFP_KERNEL);

    if (p == NULL)
        return -ENOMEM;

    for (i = 0; i < n; i++, p++) {
        p->owner = module;
        p->get = probe;
        p->lock = lock;
        p->dev = dev;
        p->range = range;
        p->data = data;
    }
    mutex_lock(domain->lock);
    // domain = cdev_map, cdev_map 保存了255條探針鏈表,索引值與major對應。
    // p -= n
    for (i = 0, p -= n; i < n; i++, p++, index++) {
        struct probe **s = &domain->probes[index % 255];
        while (*s && (*s)->range < range)
            s = &(*s)->next;
        p->next = *s;
        *s = p;
    }
    mutex_unlock(domain->lock);
    return 0;
}

mknod

因爲指定設備號,這樣也便於明確生成節點,使用mknode命令:
mknod /dev/vser c 256 0
如果是動態申請的設備號,在設備加載後可以通過proc獲取:
cat /proc/devices


調用順序

  • 系統調用順序
sys_mknod		${KERNEL_BOOT}/fs/namei.c
    sys_mknodat(AT_FDCWD, filename, mode, dev);
        vfs_mknod(nd.dentry->d_inode, dentry,mode, new_decode_dev(dev));
            dir->i_op->mknod(dir, dentry, mode, dev);
            // 根據文件系統決定
                yaffs_mknod
  • yaffs2調用順序
init_special_inode
	yaffs_FillInodeFromObject
		read_inode						// struct super_operations yaffs_super_ops = { .read_inode = yaffs_read_inode,
			iget
				yaffs_get_inode
					yaffs_get_inode
					yaffs_lookup
					yaffs_mknod			// struct file_operations yaffs_dir_inode_operations = { .mknod = yaffs_mknod
						yaffs_dir_inode_operations
						yaffs_mkdir
						yaffs_create
					yaffs_symlink

關於Yaffs2文件系統

static DECLARE_FSTYPE(yaffs2_fs_type, "yaffs2", yaffs2_read_super, FS_REQUIRES_DEV);

static struct file_system_type yaffs2_fs_type = {
	.owner = THIS_MODULE,
	.name = "yaffs2",
	.get_sb = yaffs2_read_super,		// 重要入口
	.kill_sb = kill_block_super,
	.fs_flags = FS_REQUIRES_DEV,
};

static struct super_block *yaffs2_read_super(struct super_block *sb, void *data, int silent)
{
	return yaffs_internal_read_super(2, sb, data, silent);
}

static struct super_block *yaffs_internal_read_super(int yaffsVersion,
						     struct super_block *sb,
						     void *data, int silent)
{
	...
	sb->s_magic = YAFFS_MAGIC;
	sb->s_op = &yaffs_super_ops;
	...
}

關於yaffs2文件系統加載

fs_to_install
	register_filesystem(fsinst->fst);
		init_yaffs_fs	// 驅動加載入口
			module_init(init_yaffs_fs)

可見yaffs2文件系統也是以內核模塊的形式加入內核的。

數據結構

/* file */
static struct file_operations yaffs_file_operations = {
/* file inode */
static struct file_operations yaffs_file_inode_operations = {
/* directory */
static struct file_operations yaffs_dir_operations = {
/* directory inode */
static struct file_operations yaffs_dir_inode_operations = {
	...
	.mknod = yaffs_mknod,
	...
}
/* symlink inode */
static struct inode_operations yaffs_symlink_inode_operations = {
/* super block */
static struct super_operations yaffs_super_ops = {
	...
	.read_inode = yaffs_read_inode,
	...
}

猜測,其他文件系統也需要填充這幾個操作符。
對於目錄類型的對象才存在mknod操作,即有:yaffs_dir_inode_operations.mknod方法。

static struct inode_operations yaffs_dir_inode_operations = {
    .create = yaffs_create,
    .lookup = yaffs_lookup,
    .link = yaffs_link,
    .unlink = yaffs_unlink,
    .symlink = yaffs_symlink,
    .mkdir = yaffs_mkdir,
    .rmdir = yaffs_unlink,
    .mknod = yaffs_mknod,
    .rename = yaffs_rename,
    .setattr = yaffs_setattr,
};

/*
 * File creation. Allocate an inode, and we're done..
 */
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))        // 內核版本區分, 2.5以後的版本
static int yaffs_mknod(struct inode *dir, struct dentry *dentry, int mode,
               dev_t rdev)
#else        // 2.5及更舊的版本
static int yaffs_mknod(struct inode *dir, struct dentry *dentry, int mode,
               int rdev)
#endif
{
    struct inode *inode;

    yaffs_Object *obj = NULL;
    yaffs_Device *dev;
    // 文件夾的Kobj對象
    yaffs_Object *parent = yaffs_InodeToObject(dir);

    int error = -ENOSPC;
    uid_t uid = current->fsuid;
    gid_t gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid;

    if((dir->i_mode & S_ISGID) && S_ISDIR(mode))
        mode |= S_ISGID;

    if (parent) {
        T(YAFFS_TRACE_OS,
          (KERN_DEBUG "yaffs_mknod: parent object %d type %d\n",
           parent->objectId, parent->variantType));
    } else {
        T(YAFFS_TRACE_OS,
          (KERN_DEBUG "yaffs_mknod: could not get parent object\n"));
        return -EPERM;
    }

    T(YAFFS_TRACE_OS, ("yaffs_mknod: making oject for %s, "
               "mode %x dev %x\n",
               dentry->d_name.name, mode, rdev));

    dev = parent->myDev;

    yaffs_GrossLock(dev);
	// 根據類型,創建相應yaffs_object
    switch (mode & S_IFMT) {
    default:
        // 字符類型
        /* Special (socket, fifo, device...) */
        T(YAFFS_TRACE_OS, (KERN_DEBUG
                   "yaffs_mknod: making special\n"));
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
        obj =
            yaffs_MknodSpecial(parent, dentry->d_name.name, mode, uid,
                       gid, old_encode_dev(rdev));
#else
        obj =
            yaffs_MknodSpecial(parent, dentry->d_name.name, mode, uid,
                       gid, rdev);
#endif
        break;
    case S_IFREG:        /* file          */
        T(YAFFS_TRACE_OS, (KERN_DEBUG "yaffs_mknod: making file\n"));
        obj =
            yaffs_MknodFile(parent, dentry->d_name.name, mode, uid,
                    gid);
        break;
    case S_IFDIR:        /* directory */
        T(YAFFS_TRACE_OS,
          (KERN_DEBUG "yaffs_mknod: making directory\n"));
        obj =
            yaffs_MknodDirectory(parent, dentry->d_name.name, mode,
                     uid, gid);
        break;
    case S_IFLNK:        /* symlink */
        T(YAFFS_TRACE_OS, (KERN_DEBUG "yaffs_mknod: making file\n"));
        obj = NULL;    /* Do we ever get here? */
        break;
    }

    /* Can not call yaffs_get_inode() with gross lock held */
    yaffs_GrossUnlock(dev);

    if (obj) {
        // 將inode與obj、dentry關聯
        inode = yaffs_get_inode(dir->i_sb, mode, rdev, obj);
        d_instantiate(dentry, inode);
        T(YAFFS_TRACE_OS,
          (KERN_DEBUG "yaffs_mknod created object %d count = %d\n",
           obj->objectId, atomic_read(&inode->i_count)));
        error = 0;
    } else {
        T(YAFFS_TRACE_OS,
          (KERN_DEBUG "yaffs_mknod failed making object\n"));
        error = -ENOMEM;
    }

    return error;
}

/*
 * Mknod (create) a new object.
 * equivalentObject only has meaning for a hard link;
 * aliasString only has meaning for a sumlink.
 * rdev only has meaning for devices (a subset of special objects)
 */
static yaffs_Object *yaffs_MknodObject(yaffs_ObjectType type,
                       yaffs_Object * parent,
                       const YCHAR * name,
                       __u32 mode,
                       __u32 uid,
                       __u32 gid,
                       yaffs_Object * equivalentObject,
                       const YCHAR * aliasString, __u32 rdev)
{
    yaffs_Object *in;
    YCHAR *str;

    yaffs_Device *dev = parent->myDev;

    /* Check if the entry exists. If it does then fail the call since we don't want a dup.*/
    if (yaffs_FindObjectByName(parent, name)) {
        return NULL;
    }

    in = yaffs_CreateNewObject(dev, -1, type);

    if(type == YAFFS_OBJECT_TYPE_SYMLINK){
        str = yaffs_CloneString(aliasString);
        if(!str){
            yaffs_FreeObject(in);
            return NULL;
        }
    }

    if (in) {
        in->chunkId = -1;
        in->valid = 1;
        in->variantType = type;

        in->yst_mode = mode;

#ifdef CONFIG_YAFFS_WINCE
        yfsd_WinFileTimeNow(in->win_atime);
        in->win_ctime[0] = in->win_mtime[0] = in->win_atime[0];
        in->win_ctime[1] = in->win_mtime[1] = in->win_atime[1];

#else
        in->yst_atime = in->yst_mtime = in->yst_ctime = Y_CURRENT_TIME;

        in->yst_rdev = rdev;
        in->yst_uid = uid;
        in->yst_gid = gid;
#endif
        in->nDataChunks = 0;

        yaffs_SetObjectName(in, name);
        in->dirty = 1;

        yaffs_AddObjectToDirectory(parent, in);

        in->myDev = parent->myDev;

        switch (type) {
        case YAFFS_OBJECT_TYPE_SYMLINK:
            in->variant.symLinkVariant.alias = str;
            break;
        case YAFFS_OBJECT_TYPE_HARDLINK:
            in->variant.hardLinkVariant.equivalentObject =
                equivalentObject;
            in->variant.hardLinkVariant.equivalentObjectId =
                equivalentObject->objectId;
            list_add(&in->hardLinks, &equivalentObject->hardLinks);
            break;
        case YAFFS_OBJECT_TYPE_FILE:
        case YAFFS_OBJECT_TYPE_DIRECTORY:
        case YAFFS_OBJECT_TYPE_SPECIAL:
        case YAFFS_OBJECT_TYPE_UNKNOWN:
            /* do nothing */
            break;
        }

        if (yaffs_UpdateObjectHeader(in, name, 0, 0, 0) < 0) {
            /* Could not create the object header, fail the creation */
            yaffs_DestroyObject(in);
            in = NULL;
        }

    }

    return in;
}

struct inode *yaffs_get_inode(struct super_block *sb, int mode, int dev,
			      yaffs_Object * obj)
{
	struct inode *inode;

	if (!sb) {
		T(YAFFS_TRACE_OS,
		  (KERN_DEBUG "yaffs_get_inode for NULL super_block!!\n"));
		return NULL;

	}

	if (!obj) {
		T(YAFFS_TRACE_OS,
		  (KERN_DEBUG "yaffs_get_inode for NULL object!!\n"));
		return NULL;

	}

	T(YAFFS_TRACE_OS,
	  (KERN_DEBUG "yaffs_get_inode for object %d\n", obj->objectId));

	inode = iget(sb, obj->objectId);

	/* NB Side effect: iget calls back to yaffs_read_inode(). */
	/* iget also increments the inode's i_count */
	/* NB You can't be holding grossLock or deadlock will happen! */

	return inode;
}

linux-2.6.22.6\include\linux\fs.h

static inline struct inode *iget(struct super_block *sb, unsigned long ino)
{
	struct inode *inode = iget_locked(sb, ino);

	if (inode && (inode->i_state & I_NEW)) {
		sb->s_op->read_inode(inode);
		unlock_new_inode(inode);
	}

	return inode;
}

Documentation/filesystems/Locking
->read_inode() is not a method - it's a callback used in iget().

static struct super_block *yaffs_internal_read_super(int yaffsVersion,
						     struct super_block *sb,
						     void *data, int silent)
{
	...
	sb->s_magic = YAFFS_MAGIC;
	sb->s_op = &yaffs_super_ops;
	...
}

static struct super_operations yaffs_super_ops = {
	...
	.read_inode = yaffs_read_inode,
	...
}

static void yaffs_read_inode(struct inode *inode)
{
	/* NB This is called as a side effect of other functions, but
	 * we had to release the lock to prevent deadlocks, so
	 * need to lock again.
	 */

	yaffs_Object *obj;
	yaffs_Device *dev = yaffs_SuperToDevice(inode->i_sb);

	T(YAFFS_TRACE_OS,
	  (KERN_DEBUG "yaffs_read_inode for %d\n", (int)inode->i_ino));

	yaffs_GrossLock(dev);

	obj = yaffs_FindObjectByNumber(dev, inode->i_ino);

	yaffs_FillInodeFromObject(inode, obj);

	yaffs_GrossUnlock(dev);
}

static void yaffs_FillInodeFromObject(struct inode *inode, yaffs_Object * obj)
{
	if (inode && obj) {


		/* Check mode against the variant type and attempt to repair if broken. */
 		__u32 mode = obj->yst_mode;
 		switch( obj->variantType ){
 		case YAFFS_OBJECT_TYPE_FILE :
 		        if( ! S_ISREG(mode) ){
 			        obj->yst_mode &= ~S_IFMT;
 			        obj->yst_mode |= S_IFREG;
 			}

 			break;
 		case YAFFS_OBJECT_TYPE_SYMLINK :
 		        if( ! S_ISLNK(mode) ){
 			        obj->yst_mode &= ~S_IFMT;
 				obj->yst_mode |= S_IFLNK;
 			}

 			break;
 		case YAFFS_OBJECT_TYPE_DIRECTORY :
 		        if( ! S_ISDIR(mode) ){
 			        obj->yst_mode &= ~S_IFMT;
 			        obj->yst_mode |= S_IFDIR;
 			}

 			break;
 		case YAFFS_OBJECT_TYPE_UNKNOWN :
 		case YAFFS_OBJECT_TYPE_HARDLINK :
 		case YAFFS_OBJECT_TYPE_SPECIAL :
 		default:
 		        /* TODO? */
 		        break;
 		}

		inode->i_ino = obj->objectId;
		inode->i_mode = obj->yst_mode;
		inode->i_uid = obj->yst_uid;
		inode->i_gid = obj->yst_gid;
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19))
		inode->i_blksize = inode->i_sb->s_blocksize;
#endif
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))

		inode->i_rdev = old_decode_dev(obj->yst_rdev);
		inode->i_atime.tv_sec = (time_t) (obj->yst_atime);
		inode->i_atime.tv_nsec = 0;
		inode->i_mtime.tv_sec = (time_t) obj->yst_mtime;
		inode->i_mtime.tv_nsec = 0;
		inode->i_ctime.tv_sec = (time_t) obj->yst_ctime;
		inode->i_ctime.tv_nsec = 0;
#else
		inode->i_rdev = obj->yst_rdev;
		inode->i_atime = obj->yst_atime;
		inode->i_mtime = obj->yst_mtime;
		inode->i_ctime = obj->yst_ctime;
#endif
		inode->i_size = yaffs_GetObjectFileLength(obj);
		inode->i_blocks = (inode->i_size + 511) >> 9;

		inode->i_nlink = yaffs_GetObjectLinkCount(obj);

		T(YAFFS_TRACE_OS,
		  (KERN_DEBUG
		   "yaffs_FillInode mode %x uid %d gid %d size %d count %d\n",
		   inode->i_mode, inode->i_uid, inode->i_gid,
		   (int)inode->i_size, atomic_read(&inode->i_count)));

		switch (obj->yst_mode & S_IFMT) {
		default:	/* fifo, device or socket */
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
			init_special_inode(inode, obj->yst_mode,
					   old_decode_dev(obj->yst_rdev));
#else
			init_special_inode(inode, obj->yst_mode,
					   (dev_t) (obj->yst_rdev));
#endif
			break;
		case S_IFREG:	/* file */
			inode->i_op = &yaffs_file_inode_operations;
			inode->i_fop = &yaffs_file_operations;
			inode->i_mapping->a_ops =
			    &yaffs_file_address_operations;
			break;
		case S_IFDIR:	/* directory */
			inode->i_op = &yaffs_dir_inode_operations;
			inode->i_fop = &yaffs_dir_operations;
			break;
		case S_IFLNK:	/* symlink */
			inode->i_op = &yaffs_symlink_inode_operations;
			break;
		}

		yaffs_InodeToObjectLV(inode) = obj;

		obj->myInode = inode;

	} else {
		T(YAFFS_TRACE_OS,
		  (KERN_DEBUG "yaffs_FileInode invalid parameters\n"));
	}

}

linux-2.6.22.6\fs\inode.c

void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
	inode->i_mode = mode;
	if (S_ISCHR(mode)) {
		inode->i_fop = &def_chr_fops;
		inode->i_rdev = rdev;
	} else if (S_ISBLK(mode)) {
		inode->i_fop = &def_blk_fops;
		inode->i_rdev = rdev;
	} else if (S_ISFIFO(mode))
		inode->i_fop = &def_fifo_fops;
	else if (S_ISSOCK(mode))
		inode->i_fop = &bad_sock_fops;
	else
		printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n",
		       mode);
}
EXPORT_SYMBOL(init_special_inode);

關於節點數據類型

需要判斷節點類型,涉及到數據

struct kstat {
	u64		ino;
	dev_t		dev;
	umode_t		mode;
	unsigned int	nlink;
	uid_t		uid;
	gid_t		gid;
	dev_t		rdev;
	loff_t		size;
	...
};

其中,對於文件的類型和權限由傳入的mode確定。
源碼中具體值以8進製表示。

關於8進制

C/C++規定,一個數如果要指明它採用八進制,必須在它前面加上一個0,如:123是十進制,但0123則表示採用八進制。這就是八進制數在C、C++中的表達方法。

mode_t其實就是普通的unsigned int.
目前,st_mode使用了其低16bit. 0170000 => 1+ 3*5 = 16.

其中,最低的9位(0-8)是權限,9-11是id,12-15是類型。
具體定義如下:

S_IFMT   0170000 bitmask for the file type bitfields
S_IFSOCK 0140000 socket
S_IFLNK 0120000 symbolic link
S_IFREG 0100000 regular file
S_IFBLK 0060000 block device
S_IFDIR 0040000 directory
S_IFCHR 0020000 character device
S_IFIFO 0010000 fifo
S_ISUID 0004000 set UID bit
S_ISGID 0002000 set GID bit (see below)
S_ISVTX 0001000 sticky bit (see below)
S_IRWXU 00700     mask for file owner permissions
S_IRUSR 00400 owner has read permission
S_IWUSR 00200 owner has write permission
S_IXUSR 00100 owner has execute permission
S_IRWXG 00070     mask for group permissions
S_IRGRP 00040 group has read permission
S_IWGRP 00020 group has write permission
S_IXGRP 00010 group has execute permission
S_IRWXO 00007     mask for permissions for others (not in group)
S_IROTH 00004 others have read permission
S_IWOTH 00002 others have write permisson
S_IXOTH 00001 others have execute permission

當我們需要快速獲得文件類型或訪問權限時,最好的方法就是使用glibc定義的宏。
如:S_ISDIR,S_IRWXU等, 定義如下:
#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR)
對於字符設備,

void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
       inode->i_mode = mode;
       if (S_ISCHR(mode)) {
              inode->i_fop = &def_chr_fops;
              inode->i_rdev = rdev;
       } else if (S_ISBLK(mode)) {
              inode->i_fop = &def_blk_fops;
              inode->i_rdev = rdev;
       } else if (S_ISFIFO(mode))
              inode->i_fop = &def_fifo_fops;
       else if (S_ISSOCK(mode))
              inode->i_fop = &bad_sock_fops;
       else
              printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
                              " inode %s:%lu\n", mode, inode->i_sb->s_id,
                              inode->i_ino);
}

從上面的代碼實現(藍色部分),我們可以看到i_rdev初始化爲rdev,而rdev是由mknod中的參數構造而來的,這和我們設想的是一樣的;但是接下來出乎我們意料的是i_fop卻初始化爲&def_chr_fops(def_chr_fops定義如下),而更出乎我們意料的是查遍了整個流程也沒有看到i_cdev的初始化。這到底是怎麼回事呢?事到如今,我們也是被無他法了,上面的實現明確的告訴我們,i_cdev並沒有被初始化,而i_fop的初始化與設備號沒有丁點關係而且初始化的值也不是我們所實現的那個struct file_operations指針,inode僅僅是保存了設備號而已啊!這可怎麼辦啊?我們怎麼該調用到我們的驅動程序啊?朋友們請別泄氣,狄大人的經驗告訴我們,真相總是會水落石出的,只要我們能堅持住!哈哈!且聽下回分解吧……

const struct file_operations def_chr_fops = {
       .open = chrdev_open,
       .llseek = noop_llseek,
};

Open

熟悉linux應用編程的朋友們應該都知道,要操作一個文件,除了該文件必須存在外,還需要先通過open系統調用去得到一個文件句柄,有了這個句柄後續的操作才能進行。對於設備文件的操作也是同樣的道理。於是我們似乎又可以找到一些線索了,也許在字符設備文件的open操作中,我們能夠看到我們曾經猜測的東西,它們沒有在mknod中被完成而是延後到了open的時候來完成了。好吧,讓我們從open系統調用開始來揭開這層層的謎團吧!
open系統調用在C庫頭文件中的原型如下:
int open(const char *pathname, int flags, mode_t mode);
毫無疑問open函數必須通過pathname去找到該文件對應的inode(這裏假設我們的inode設備節點已經存在)。找到inode節點後,將調用inode裏i_fop成員的open方法,對於字符設備而言,將調用chrdev_open函數。該函數實現如下:


static int chrdev_open(struct inode *inode, struct file *filp)
{
       struct cdev *p;
       struct cdev *new = NULL;
       int ret = 0;
 
       spin_lock(&cdev_lock);
       p = inode->i_cdev;
       if (!p) {
              struct kobject *kobj;
              int idx;
              spin_unlock(&cdev_lock);
              kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
              if (!kobj)
                     return -ENXIO;
              new = container_of(kobj, struct cdev, kobj);
              spin_lock(&cdev_lock);
              /* Check i_cdev again in case somebody beat us to it while
                 we dropped the lock. */
              p = inode->i_cdev;
              if (!p) {
                     inode->i_cdev = p = new;
                     list_add(&inode->i_devices, &p->list);
                     new = NULL;
              } else if (!cdev_get(p))
                     ret = -ENXIO;
       } else if (!cdev_get(p))
              ret = -ENXIO;
       spin_unlock(&cdev_lock);
       cdev_put(new);
       if (ret)
              return ret;
 
       ret = -ENXIO;
       filp->f_op = fops_get(p->ops);                        // 獲取cdev的ops
       if (!filp->f_op)
              goto out_cdev_put;
 
       if (filp->f_op->open) {
              ret = filp->f_op->open(inode, filp);            // 執行cdev的open
              if (ret)
                     goto out_cdev_put;
       }
 
       return 0;

 out_cdev_put:
       cdev_put(p);
       return ret;
}

該函數流程如下:
(1) 判斷inode的i_cdev成員是否爲空(據我們所知,從我們mknode開始到現在還沒有誰給它賦過值,因此到目前爲止還是空的)。
(2) 如果爲空,將通過kobj_lookup與container_of的組合找出inode->i_rdev所對應的struct cdev結構。
(3) 將通過inode->i_rdev查找到的struct cdev結構指針賦值給inode->i_cdev(注意下次open時inode將不爲空),然後將inode加入struct cdev鏈表中。
(4) 將inode->i_cdev中的i_fop賦值給由chrdev_open傳遞進來的filp的f_op成員。
(5) 如果inode->i_cdev中的i_fop不爲空,則調用其指向的open方法。

以上流程是對應創建設備節點後第一次調用open的流程,該流程與我們在“設備文件的創建”一節中猜測的inode值的初始化過程還是有點出入的,對於inode->i_cdev初始化的猜測,我們是正確的,而對於inode->i_fop我們得出的結論是設備節點的inode的i_fop值從創建後一直是&def_chr_fops(.open= chrdev_open), 而struct cdev結構的i_fop指針只是賦給了代表每個打開文件的filp結構中的ops。
如果open不是創建設備節點後第一次被調用,則chrdev_open函數的執行流程是執行完(1)後直接跳過(2)、(3)兩個步驟到(4)。
至此,open成功返回!
對於用戶空間的open我們知道它返回一個int型的句柄,而後續的所有操作都是根據該句柄進行的,如read和write:
ssize_t read(int fd, void *buf, size_t count);
ssize_t write(int fd, const void *buf, size_t count);
對於設備文件而言這些操作直接對應於驅動程序實現的struct file_operations裏的實現!那我們現在的問題又來了,內核是怎麼通過fd找到與其對應的struct file_operations的?
在用戶空用文件路徑名代表着一個文件,而open函數返回的fd則代表着一個打開文件的抽象,即對於一個文件可以同時存在對其進行操作的多個窗口。當然這些都必須得到內核的支持纔行,因此在內核空間用一個inode結構代表一個文件,而用struct file結構代表一個打開的文件。用戶空間每open一個文件,內核都會爲其生成一個struct file結構。該結構的成員也不少,這裏只有兩個成員是我們關心的
const struct file_operations *f_op;
void *private_data;
f_op成員我們可以看到在chrdev_open被賦值了,賦的值是inode->i_cdev->ops;而private_data的作用則是用於f_op裏各個方法間傳遞數據用的,這在驅動程序的實現過程中將會非常有用。
那內核是如何根據用戶空間的fd找到其對應的struct file結構的呢?其實對於每個進程而言,內核中表示一個進程的數據結構(如struct task_struct task)裏回維護一張打開的文件描述符表(task->files->fdt->fd,struct file *指針數組),在open的過程中,內核會以得到的fd爲下標,將新生成的struct file的指針填入表中,即
task->files->fdt->fd[fd] = filp;
因此以fd的其他文件操作可以很輕易的找到其對應的struct file,進而調用f_op對應的方法。
Open 段參考,文章:http://blog.chinaunix.net/uid-25424552-id-3387451.html

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章