1. 概述
Linux 虛擬文件系統是建立在具體文件系統之上,其包括幾種主要的對象,分別是超級塊對象,目錄項對象,索引節點對象,與進程相關的文件對象,安裝點對象,文件系統類型對象。在VFS中,多個文件系統可以被安裝在同一個目錄,例如/dev/sda和/dev/sdb先後被安裝在/project目錄下,則/dev/sdb會隱藏/dev/sda文件系統。同一個文件系統也可以安裝在不同的目錄下,此時只有一個超級塊對象。因此,超級塊對象代表了文件系統。目錄項對象主要用於路徑查找過程,例如,將文件系統/dev/sda安裝在/project目錄下,首先需要找到/project/目錄的目錄項對象,另外,目錄項對象和目錄不同,每個目錄和文件都具有目錄項對象,存在於內存。索引節點對象描述了文件或目錄的元數據,包括文件的訪問時間等,在創建索引節點的時候,將文件的相關操作指針賦給索引節點對象i_fpos。每個打開的文件對象對應一個文件描述符,常用於文件的open操作。每個文件系統都有對應的類型對象,如ext3,ext2,ext4和fuse文件系統,而這個結構體存儲了相關的文件類型。安裝點對象表示文件系統的掛載位置以及不同文件系統之間的關係,新掛載的文件系統都需要加入mount
tree.
2. 對象
(1) superblock對象<linux/fs.h>
struct super_block{
struct list_head s_list /*Pointers for superblock list所有的superblock鏈表*/
unsigned long s_blocksize /*Block size in bytes每個塊的字節數*/
unsigned long s_old_blocksize /*Block size in bytes as reported by the underlying block device drive*/
unsigned char s_blocksize_bits /*Block size in number of bits每個塊佔多少比特*/
unsigned char s_dirt /*Modified (dirty) flag超級塊爲髒,需要和磁盤上的超級塊同步*/
unsigned long long s_maxbytes /*Maximum size of the files文件的最大長度*/
struct file_system_type * s_type /*Filesystem type所屬的文件系統類型如ext3*/
struct super_operations * s_op /*Superblock methods超級塊的相關操作,對象包括屬性和操作*/
struct dquot_operations * dq_op /*Disk quota handling methods*/
struct quotactl_ops * s_qcop /*Disk quota administration methods*/
struct export_operations * s_export_op /*Export operations used by network filesystems網絡文件系統相關操作*/
unsigned long s_flags /*Mount flags掛載標誌*/
unsigned long s_magic /*Filesystem magic number文件系統魔數*/
struct dentry * s_root /*Dentry object of the filesystem's root directory文件系統的根目錄項對象*/
struct rw_semaphore s_umount /*Semaphore used for unmounting umount時使用*/
struct semaphore s_lock /*Superblock semaphore*/
int s_count /*Reference counter計數器,一個文件系統可能被安裝到多個目錄*/
int s_syncing /*Flag indicating that inodes of the superblock are being synchronized */
int s_need_sync_fs /*Flag used when synchronizing the superblock's mounted filesystem*/
atomic_t s_active /*Secondary reference counter*/
void * s_security /* Pointer to superblock security structure*/
struct xattr_handler ** s_xattr /*Pointer to superblock extended attribute structure*/
struct list_head s_inodes /*List of all inodes所有的inode節點*/
struct list_head s_dirty /*List of modified inodes修改的inode節點鏈表*/
struct list_head s_io /*List of inodes waiting to be written to disk等待寫到磁盤上的inode節點列表*/
struct hlist_head s_anon /*List of anonymous dentries for handling remote network filesystems*/
struct list_head s_files /*List of file objects文件對象鏈表*/
struct block_device *s_bdev /*Pointer to the block device driver descriptor塊設備描述符*/
struct list_head s_instances /*Pointers for a list of superblock objects of a given filesystem type屬於指定文件系統類型的鏈表指針*/
struct quota_info s_dquot /*Descriptor for disk quota*/
int s_frozen /*Flag used when freezing the filesystem (forcing it to a consistent state)*/
wait_queue_head_t s_wait_unfrozen /*Wait queue where processes sleep until the filesystem is unforzen*/
char[] s_id /*Name of the block device containing the superblock*/
void * s_fs_info /*Pointer to superblock information of a specific filesystem具體文件系統的超級塊信息*/
struct semaphore s_vfs_rename_sem /*Semaphore used by VFS when renaming files across directories*/
u32 s_time_gran /*Timestamp's granularity (in nanoseconds)*/
}
/*
* NOTE: write_inode, delete_inode, clear_inode, put_inode can be called
* without the big kernel lock held in all filesystems.
*/
struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb); /*爲索引節點分配空間*/
void (*destroy_inode)(struct inode *); /*destory 索引節點對象*/
void (*read_inode) (struct inode *); /*利用i_ino索引節點號從磁盤上讀取相關的數據來填充索引節點*/
void (*dirty_inode) (struct inode *); /*當索引節點被標識爲dirty調用,即修改過*/
int (*write_inode) (struct inode *, int flag); /*更新文件系統的索引節點,flag表示是否需要同步*/
void (*put_inode) (struct inode *); /*當索引節點被釋放時*/
void (*drop_inode) (struct inode *); /*當索引節點即將被destory*/
void (*delete_inode) (struct inode *); /*當索引節點被destory時,刪除VFS的索引節點和磁盤上的文件數據和元數據*/
void (*put_super) (struct super_block *); /*釋放超級塊對象,因爲相應的文件系統已經umounted*/
void (*write_super) (struct super_block *); /*更新文件系統的超級塊*/
int (*sync_fs)(struct super_block *sb, int wait); /*同步文件系統*/
void (*write_super_lockfs) (struct super_block *);
void (*unlockfs) (struct super_block *);
int (*statfs) (struct dentry *, struct kstatfs *buf); /*返回文件的信息放在buf*/
int (*remount_fs) (struct super_block *, int *, char *); /'*重新安裝文件系統*/
void (*clear_inode) (struct inode *); /*當索引節點正在destory*/
void (*umount_begin) (struct vfsmount *, int); /*終止mount操作,因爲相應的umount正在開始,僅用於網絡文件系統*/
int (*show_options)(struct seq_file *, struct vfsmount *);
int (*show_stats)(struct seq_file *, struct vfsmount *);
#ifdef CONFIG_QUOTA
ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
#endif
};
(2)inode節點對象,每個文件擁有一個inode節點對象,包含了文件的元數據信息,在<linux/fs.h>頭文件
struct inode {
struct hlist_node i_hash; /*Pointers for the hash list指向hash鏈表指針*/
struct list_head i_list; /*Pointers for the list that describes the inode's current state索引節點狀態鏈表,分爲沒有使用,正在使用和髒鏈表*/
struct list_head i_sb_list; /*指向超級塊鏈表的指針*/
struct list_head i_dentry; /*與這個inode節點相關聯的目錄項對象*/
unsigned long i_ino; /*索引節點號*/
atomic_t i_count; /*索引節點的使用計數*/
unsigned int i_nlink; /*硬連接數,多個硬連接共享同一個索引節點,但目錄項對象不同(在不同的目錄下)*/
uid_t i_uid; /*owner identifier*/
gid_t i_gid; /*group identifier*/
dev_t i_rdev; /*real device identifier實際設備標識*/
unsigned long i_version; /*version number,automatically increased after each use*/
loff_t i_size; /*文件長度*/
#ifdef __NEED_I_SIZE_ORDERED
seqcount_t i_size_seqcount;
#endif
struct timespec i_atime; /*文件的最後訪問時間*/
struct timespec i_mtime; /*文件最後修改時間*/
struct timespec i_ctime; /*索引節點最後修改時間*/
unsigned int i_blkbits; /*塊大小*/
blkcnt_t i_blocks; /*文件的塊數量*/
unsigned short i_bytes; /*文件最後一個塊的字節數*/
umode_t i_mode; /*文件類型和存取權限*/
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
struct mutex i_mutex;
struct rw_semaphore i_alloc_sem;
struct inode_operations *i_op; /*索引節點相關操作*/
const struct file_operations *i_fop; /* former ->i_op->default_file_ops 在索引節點創建時賦值,在打開文件時賦值給文件對象file*/
struct super_block *i_sb; /*超級塊對象指針*/
struct file_lock *i_flock;
struct address_space *i_mapping; /*地址空間對象指針*/
struct address_space i_data; /*地址空間對象*/
#ifdef CONFIG_QUOTA
struct dquot *i_dquot[MAXQUOTAS];
#endif
struct list_head i_devices;
union {
struct pipe_inode_info *i_pipe;
struct block_device *i_bdev;
struct cdev *i_cdev;
};
int i_cindex;
__u32 i_generation;
#ifdef CONFIG_DNOTIFY
unsigned long i_dnotify_mask; /* Directory notify events */
struct dnotify_struct *i_dnotify; /* for directory notifications */
#endif
#ifdef CONFIG_INOTIFY
struct list_head inotify_watches; /* watches on this inode */
struct mutex inotify_mutex; /* protects the watches list */
#endif
unsigned long i_state; /*索引節點狀態,是否爲髒*/
unsigned long dirtied_when; /* jiffies of first dirtying */
unsigned int i_flags; /*文件系統安裝標誌*/
atomic_t i_writecount;
#ifdef CONFIG_SECURITY
void *i_security;
#endif
void *i_private; /* fs or device private pointer */
};
struct inode_operations {
int (*create) (struct inode *,struct dentry *,int, struct nameidata *); /*創建一個和目錄項對象相關聯的磁盤索引節點*/
struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); /*在目錄項高速緩存查找和索引節點相關的目錄項對象*/
int (*link) (struct dentry *old_dentry,struct inode *,struct dentry *new_dentry); /*創建一個硬連接,關聯新的目錄項對象*/
int (*unlink) (struct inode *,struct dentry *); /*移除硬連接*/
int (*symlink) (struct inode *,struct dentry *,const char *); /*創建符號鏈接*/
int (*mkdir) (struct inode *,struct dentry *,int); /*創建一個新的索引節點*/
int (*rmdir) (struct inode *,struct dentry *);
int (*mknod) (struct inode *,struct dentry *,int,dev_t);
int (*rename) (struct inode *, struct dentry *,
struct inode *, struct dentry *);
int (*readlink) (struct dentry *, char __user *,int);
void * (*follow_link) (struct dentry *, struct nameidata *); /*轉化符號鏈接*/
void (*put_link) (struct dentry *, struct nameidata *, void *);
void (*truncate) (struct inode *);
int (*permission) (struct inode *, int, struct nameidata *);
int (*setattr) (struct dentry *, struct iattr *);
int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*removexattr) (struct dentry *, const char *);
void (*truncate_range)(struct inode *, loff_t, loff_t);
};
(3)文件對象struct file,當進程打開文件時,在內存創建一個文件對象,在磁盤上沒有相對應的映像,所以沒有dirty標誌,<linux/fs.h>
struct file {
/*
* fu_list becomes invalid after file_free is called and queued via
* fu_rcuhead for RCU freeing
*/
union {
struct list_head fu_list; /*指向文件對象的鏈表指針*/
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
#define f_dentry f_path.dentry /*和文件對象相關聯的目錄項對象*/
#define f_vfsmnt f_path.mnt /*包含文件對象的安裝點對象*/
const struct file_operations *f_op; /*文件操作表指針*/
atomic_t f_count; /*文件對象索引次數,一個文件可以被多個進程同時打開,每個進程的struct file不相同*/
unsigned int f_flags; /*打開文件時的指定標誌*/
mode_t f_mode; /*打開方式*/
loff_t f_pos; /*當前文件指針的位置,非常重要*/
struct fown_struct f_owner;
unsigned int f_uid, f_gid; /*用戶ID,組ID*/
struct file_ra_state f_ra; /*文件預讀狀態*/
unsigned long f_version;
#ifdef CONFIG_SECURITY
void *f_security;
#endif
/* needed for tty driver, and maybe others */
void *private_data;
#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct list_head f_ep_links;
spinlock_t f_ep_lock;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping; /*地址空間指針*/
};
/*
* NOTE:
* read, write, poll, fsync, readv, writev, unlocked_ioctl and compat_ioctl
* can be called without the big kernel lock held in all filesystems.
*/
struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *);
int (*fsync) (struct file *, struct dentry *, int datasync);
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
int (*dir_notify)(struct file *filp, unsigned long arg);
int (*flock) (struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
};
(4)目錄項對象,內核爲路徑的每一部分創建目錄項對象,如/tmp/test,會爲/,tmp和test分別創建目錄項對象,用於定位文件或目錄。爲了加快查找,目錄項對象通常存放在目錄項高速緩存。目錄項對象在磁盤上沒有相對應的映像。當目錄項對象不在dentry_cache中,就需要從磁盤讀取相關信息,在內存創建一個目錄項對象和相關聯的索引節點對象。在<linux/dcache.h>
struct dentry {
atomic_t d_count; /*使用計數*/
unsigned int d_flags; /* protected by d_lock 目錄項緩存標誌 */
spinlock_t d_lock; /* per dentry lock */
struct inode *d_inode; /* Where the name belongs to - NULL is
* negative 和文件名相關聯的inode節點*/
/*
* The next three fields are touched by __d_lookup. Place them here
* so they all fit in a cache line.
*/
struct hlist_node d_hash; /* lookup hash list */
struct dentry *d_parent; /* parent directory 父目錄項對象*/
struct qstr d_name; /*文件名結構體,包括文件名長度,文件名,hash值*/
struct list_head d_lru; /* LRU list */
/*
* d_child and d_rcu can share memory
*/
union {
struct list_head d_child; /* child of parent list子目錄項鍊表 */
struct rcu_head d_rcu;
} d_u;
struct list_head d_subdirs; /* our children */
struct list_head d_alias; /* inode alias list */
unsigned long d_time; /* used by d_revalidate */
struct dentry_operations *d_op; /*目錄項操作方法*/
struct super_block *d_sb; /* The root of the dentry tree */
void *d_fsdata; /* fs-specific data */
#ifdef CONFIG_PROFILING
struct dcookie_struct *d_cookie; /* cookie, if any */
#endif
int d_mounted; /*文件系統的安裝次數,同一目錄可以安裝不同的文件系統*/
unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */
};
struct dentry_operations {
int (*d_revalidate)(struct dentry *, struct nameidata *); /*判定目錄項是否還有效*/
int (*d_hash) (struct dentry *, struct qstr *); /*hash值*/
int (*d_compare) (struct dentry *, struct qstr *, struct qstr *); /*比較文件名*/
int (*d_delete)(struct dentry *); /*d_count爲0時調用*/
void (*d_release)(struct dentry *);
void (*d_iput)(struct dentry *, struct inode *);
};
(5)進程相關結構體:每個進程擁有自己的當前工作目錄和根目錄,通過fs_struct結構體維護<linux/fs_struct.h>
struct fs_struct {
atomic_t count;
rwlock_t lock;
int umask; /*設置文件權限*/
struct dentry * root, * pwd, * altroot; /*根目錄項對象,當前目錄項對象,altroot通常爲空*/
struct vfsmount * rootmnt, * pwdmnt, * altrootmnt; /*根目錄的安裝的文件系統安裝點對象,當前工作目錄安裝的文件系統安裝點對象*/
};
(6)與進程關聯的files_struct對象,包括指向struct file對象的數組指針fd,一個進程打開的最多文件數max_fds等,在<linux/file.h>
/*
* Open file table structure
*/
struct files_struct {
/*
* read mostly part
*/
atomic_t count;
struct fdtable *fdt; /*文件表指針*/
struct fdtable fdtab;
/*
* written part on a separate cache line in SMP
*/
spinlock_t file_lock ____cacheline_aligned_in_smp;
int next_fd; /*下一個文件描述符*/
struct embedded_fd_set close_on_exec_init;
struct embedded_fd_set open_fds_init;
struct file * fd_array[NR_OPEN_DEFAULT]; /*文件對象數組指針*/
};
struct fdtable {
unsigned int max_fds; /*當前最大的文件描述符數*/
struct file ** fd; /* current fd array */
fd_set *close_on_exec;
fd_set *open_fds; /*已經打開的文件描述符集*/
struct rcu_head rcu;
struct fdtable *next;
};
(7)文件系統類型file_system_type ,每個文件系統類型對應多個具體的文件系統,在<linux/fs.h>
/*文件系統類型說明 */
struct file_system_type {
const char *name;/*文件系統名稱*/
int fs_flags;/*文件系統類型標誌*/
int (*get_sb) (struct file_system_type *, int,
const char *, void *, struct vfsmount *);/*讀超級塊*/
void (*kill_sb) (struct super_block *);/*移除超級塊*/
struct module *owner;/*指向實現文件系統的module*/
struct file_system_type * next;/*指向文件系統類型表的下一個*/
struct list_head fs_supers;/*給定類型文件系統的超級塊表頭*/
struct lock_class_key s_lock_key;
struct lock_class_key s_umount_key;
};
其中,
(1) file_systems是所有的文件系統類型鏈表頭,如ext2,ext3,ext4. 而next指針指向下一個文件系統類型。
(2)fs_supers是同類文件系統的鏈表頭,下一個由超級塊對象的s_instances指針指向.
(3)get_sb和kill_sb分配一個超級塊對象和銷燬一個超級塊對象。
(8)mount point 對象,爲了在內存中保存掛載點,掛載標誌,以及和其它文件系統之間的關係,如父子關係。必須維護一個vfsmount對象,在<linux/mount.h>
struct vfsmount {
struct list_head mnt_hash; /*指向hash表指針*/
struct vfsmount *mnt_parent; /* fs we are mounted on 父安裝點*/
struct dentry *mnt_mountpoint; /* dentry of mountpoint 安裝點目錄項對象*/
struct dentry *mnt_root; /* root of the mounted tree 安裝點根的目錄項對象*/
struct super_block *mnt_sb; /* pointer to superblock 指向超級塊指針*/
struct list_head mnt_mounts; /* list of children, anchored here 子安裝點頭指針*/
struct list_head mnt_child; /* and going through their mnt_child 子安裝點的下一個對象*/
atomic_t mnt_count;/*使用次數*/
int mnt_flags;/*安裝標誌*/
int mnt_expiry_mark; /* true if marked for expiry */
char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 設備名*/
struct list_head mnt_list; /*指向命名空間的下一個安裝點對象,其中鏈表頭存在命名空間的list域內*/
struct list_head mnt_expire; /* link in fs-specific expiry list */
struct list_head mnt_share; /* circular list of shared mounts */
struct list_head mnt_slave_list;/* list of slave mounts */
struct list_head mnt_slave; /* slave list entry */
struct vfsmount *mnt_master; /* slave is on master->mnt_slave_list */
struct mnt_namespace *mnt_ns; /* containing namespace 指向進程的命名空間 */
int mnt_pinned;
};
處理文件系統掛載的相關函數:
alloc_vfsmnt(name)
Allocates and initializes a mounted filesystem descriptor
free_vfsmnt(mnt)
Frees a mounted filesystem descriptor pointed by mnt
lookup_mnt(mnt,dentry)
Looks up a descriptor in the hash table and returns its address
3. 總結
super_block-關聯每個具體的文件系統
inode-對應文件元數據信息
dentry-目錄項對象,內存表示形式,文件和目錄都擁有目錄項對象
file-打開文件在內存的表示形式
fs_struct-進程相關的根目錄,工作目錄,根安裝點和當前目錄安裝點
file_system_type-在啓動時註冊文件系統類型,如EXT3,或者是加載模塊時註冊
vfsmount-維護已掛載的文件系統之間的關係,如父子關係