get_inode(dev,numb)
/*===========================================================================*
* get_inode *
*===========================================================================*/
PUBLIC struct inode *get_inode(dev, numb)
dev_t dev; /* device on which inode resides */
int numb; /* inode number (ANSI: may not be unshort) */
{
/* Find a slot in the inode table, load the specified inode into it, and
* return a pointer to the slot. If 'dev' == NO_DEV, just return a free slot.
*/
register struct inode *rip, *xp;
/* Search the inode table both for (dev, numb) and a free slot. */
xp = NIL_INODE;
for (rip = &inode[0]; rip < &inode[NR_INODES]; rip++) {
if (rip->i_count > 0) { /* only check used slots for (dev, numb) */
if (rip->i_dev == dev && rip->i_num == numb) {
/* This is the inode that we are looking for. */
rip->i_count++;
return(rip); /* (dev, numb) found */
}
} else {
xp = rip; /* remember this free slot for later */
}
}
/* Inode we want is not currently in use. Did we find a free slot? */
if (xp == NIL_INODE) { /* inode table completely full */
err_code = ENFILE;
return(NIL_INODE);
}
/* A free inode slot has been located. Load the inode into it. */
xp->i_dev = dev;
xp->i_num = numb;
xp->i_count = 1;
if (dev != NO_DEV) rw_inode(xp, READING); /* get inode from disk */
xp->i_update = 0; /* all the times are initially up-to-date */
return(xp);
}
-----------------------------------------------------------------------------------------------------------------------------
/*===========================================================================*
* rw_inode *
*===========================================================================*/
PUBLIC void rw_inode(rip, rw_flag)
register struct inode *rip; /* pointer to inode to be read/written */
int rw_flag; /* READING or WRITING */
{
/* An entry in the inode table is to be copied to or from the disk. */
register struct buf *bp;
register struct super_block *sp;
d1_inode *dip;
d2_inode *dip2;
block_t b, offset;
/* Get the block where the inode resides. */
sp = get_super(rip->i_dev); /* get pointer to super block */
rip->i_sp = sp; /* inode must contain super block pointer */
offset = sp->s_imap_blocks + sp->s_zmap_blocks + 2;
b = (block_t) (rip->i_num - 1)/sp->s_inodes_per_block + offset;
bp = get_block(rip->i_dev, b, NORMAL);
dip = bp->b_v1_ino + (rip->i_num - 1) % V1_INODES_PER_BLOCK;
dip2 = bp->b_v2_ino + (rip->i_num - 1) % V2_INODES_PER_BLOCK;
/* Do the read or write. */
if (rw_flag == WRITING) {
if (rip->i_update) update_times(rip); /* times need updating */
if (sp->s_rd_only == FALSE) bp->b_dirt = DIRTY;
}
/* Copy the inode from the disk block to the in-core table or vice versa.
* If the fourth parameter below is FALSE, the bytes are swapped.
*/
if (sp->s_version == V1)
old_icopy(rip, dip, rw_flag, sp->s_native);
else
new_icopy(rip, dip2, rw_flag, sp->s_native);
put_block(bp, INODE_BLOCK);
rip->i_dirt = CLEAN;
}
/*===========================================================================*
* old_icopy *
*===========================================================================*/
PRIVATE void old_icopy(rip, dip, direction, norm)
register struct inode *rip; /* pointer to the in-core inode struct */
register d1_inode *dip; /* pointer to the d1_inode inode struct */
int direction; /* READING (from disk) or WRITING (to disk) */
int norm; /* TRUE = do not swap bytes; FALSE = swap */
{
/* The V1.x IBM disk, the V1.x 68000 disk, and the V2 disk (same for IBM and
* 68000) all have different inode layouts. When an inode is read or written
* this routine handles the conversions so that the information in the inode
* table is independent of the disk structure from which the inode came.
* The old_icopy routine copies to and from V1 disks.
*/
int i;
if (direction == READING) {
/* Copy V1.x inode to the in-core table, swapping bytes if need be. */
rip->i_mode = conv2(norm, (int) dip->d1_mode);
rip->i_uid = conv2(norm, (int) dip->d1_uid );
rip->i_size = conv4(norm, dip->d1_size);
rip->i_mtime = conv4(norm, dip->d1_mtime);
rip->i_atime = rip->i_mtime;
rip->i_ctime = rip->i_mtime;
rip->i_nlinks = (nlink_t) dip->d1_nlinks; /* 1 char */
rip->i_gid = (gid_t) dip->d1_gid; /* 1 char */
rip->i_ndzones = V1_NR_DZONES;
rip->i_nindirs = V1_INDIRECTS;
for (i = 0; i < V1_NR_TZONES; i++)
rip->i_zone[i] = conv2(norm, (int) dip->d1_zone[i]);
} else {
/* Copying V1.x inode to disk from the in-core table. */
dip->d1_mode = conv2(norm, (int) rip->i_mode);
dip->d1_uid = conv2(norm, (int) rip->i_uid );
dip->d1_size = conv4(norm, rip->i_size);
dip->d1_mtime = conv4(norm, rip->i_mtime);
dip->d1_nlinks = (nlink_t) rip->i_nlinks; /* 1 char */
dip->d1_gid = (gid_t) rip->i_gid; /* 1 char */
for (i = 0; i < V1_NR_TZONES; i++)
dip->d1_zone[i] = conv2(norm, (int) rip->i_zone[i]);
}
}
/*===========================================================================*
* new_icopy *
*===========================================================================*/
PRIVATE void new_icopy(rip, dip, direction, norm)
register struct inode *rip; /* pointer to the in-core inode struct */
register d2_inode *dip; /* pointer to the d2_inode struct */
int direction; /* READING (from disk) or WRITING (to disk) */
int norm; /* TRUE = do not swap bytes; FALSE = swap */
{
/* Same as old_icopy, but to/from V2 disk layout. */
int i;
if (direction == READING) {
/* Copy V2.x inode to the in-core table, swapping bytes if need be. */
rip->i_mode = conv2(norm,dip->d2_mode);
rip->i_uid = conv2(norm,dip->d2_uid );
rip->i_nlinks = conv2(norm,(int) dip->d2_nlinks);
rip->i_gid = conv2(norm,(int) dip->d2_gid );
rip->i_size = conv4(norm,dip->d2_size);
rip->i_atime = conv4(norm,dip->d2_atime);
rip->i_ctime = conv4(norm,dip->d2_ctime);
rip->i_mtime = conv4(norm,dip->d2_mtime);
rip->i_ndzones = V2_NR_DZONES;
rip->i_nindirs = V2_INDIRECTS;
for (i = 0; i < V2_NR_TZONES; i++)
rip->i_zone[i] = conv4(norm, (long) dip->d2_zone[i]);
} else {
/* Copying V2.x inode to disk from the in-core table. */
dip->d2_mode = conv2(norm,rip->i_mode);
dip->d2_uid = conv2(norm,rip->i_uid );
dip->d2_nlinks = conv2(norm,rip->i_nlinks);
dip->d2_gid = conv2(norm,rip->i_gid );
dip->d2_size = conv4(norm,rip->i_size);
dip->d2_atime = conv4(norm,rip->i_atime);
dip->d2_ctime = conv4(norm,rip->i_ctime);
dip->d2_mtime = conv4(norm,rip->i_mtime);
for (i = 0; i < V2_NR_TZONES; i++)
dip->d2_zone[i] = conv4(norm, (long) rip->i_zone[i]);
}
}
get_inode先查詢內存中的inode_table,選擇i_count大於0的(即正在被使用的)inode,看dev和numb是否匹配。若匹配則找到,inode的count域加一,返回指針。若遍歷完都未找到匹配,則numb節點不在內存。使用剛纔遍歷過程記錄的空slot,調用rw_inode讀取的inode放入此slot中。剛調入內存的inode在rw_inode函數最後設置爲CLEAN。其他訪問inode的操作可能修改爲DIRTY。
注:dirty標誌僅代表inode存儲在磁盤上部分是dirty的還是clean的。inode僅在內存中的部分與dirty標誌並不相互影響。
疑問:若用給一個參數numb,其代表的inode並未使用alloc_inode分配,結果會怎樣?
put_inode(rip)
/*===========================================================================*
* put_inode *
*===========================================================================*/
PUBLIC void put_inode(rip)
register struct inode *rip; /* pointer to inode to be released */
{
/* The caller is no longer using this inode. If no one else is using it either
* write it back to the disk immediately. If it has no links, truncate it and
* return it to the pool of available inodes.
*/
if (rip == NIL_INODE) return; /* checking here is easier than in caller */
if (--rip->i_count == 0) { /* i_count == 0 means no one is using it now */
if ((rip->i_nlinks & BYTE) == 0) {
/* i_nlinks == 0 means free the inode. */
truncate(rip); /* return all the disk blocks */
rip->i_mode = I_NOT_ALLOC; /* clear I_TYPE field */
rip->i_dirt = DIRTY;
free_inode(rip->i_dev, rip->i_num);
} else {
if (rip->i_pipe == I_PIPE) truncate(rip);
}
rip->i_pipe = NO_PIPE; /* should always be cleared */
if (rip->i_dirt == DIRTY) rw_inode(rip, WRITING);
}
}
參數爲內存inodetable中的項的指針。put函數對inode的icount域減一。若爲0,則現在沒有程序在使用此內存inode,可以從內存inode中刪除。進一步判斷ilink是否爲0,若是,則說明現在此inode可以free掉,且調用truncate函數將此inode中佔用的數據塊全部設爲空閒。將內存inodetable中此項設爲未分配,調用free_inode在位示圖中標記此inode可用。
這裏要注意對pipe文件的處理,當一個進程釋放管道文件的時候應該將其inode刪除。因爲爲一個進程保留管道文件是沒有意義的。
最後,將inode的pipe屬性修改爲NO_PIPE 。如果inode被修改了,調用rw_inode寫回。(當icount減一等於0時,不管ilink是否爲0,都調用rw_inode寫回數據)
單獨看這一個過程的話,可能會覺得只有在ilinks爲0的時候才設置dirty標誌,但是時候設置好像又沒什麼意義了,反正這個inode是沒人使用了。但是當icount爲0,ilinks不等於0時,並沒有設置dirty標誌。接着執行下面的寫回髒inode會不會導致剛纔對於icount的修改沒有寫回到磁盤。(分析錯誤,icount只存在於內存中,根本就不在磁盤中存儲。icount代表當前使用這個inode的進程的數目。)
alloc_inode & free_inode調用位示圖操作,修改位示圖相關位的值。
/*===========================================================================*
* alloc_inode *
*===========================================================================*/
PUBLIC struct inode *alloc_inode(dev, bits)
dev_t dev; /* device on which to allocate the inode */
mode_t bits; /* mode of the inode */
{
/* Allocate a free inode on 'dev', and return a pointer to it. */
register struct inode *rip;
register struct super_block *sp;
int major, minor, inumb;
bit_t b;
sp = get_super(dev); /* get pointer to super_block */
if (sp->s_rd_only) { /* can't allocate an inode on a read only device. */
err_code = EROFS;
return(NIL_INODE);
}
/* Acquire an inode from the bit map. */
b = alloc_bit(sp, IMAP, sp->s_isearch);
if (b == NO_BIT) {
err_code = ENFILE;
major = (int) (sp->s_dev >> MAJOR) & BYTE;
minor = (int) (sp->s_dev >> MINOR) & BYTE;
printf("Out of i-nodes on %sdevice %d/%d\n",
sp->s_dev == ROOT_DEV ? "root " : "", major, minor);
return(NIL_INODE);
}
sp->s_isearch = b; /* next time start here */
inumb = (int) b; /* be careful not to pass unshort as param */
/* Try to acquire a slot in the inode table. */
if ((rip = get_inode(NO_DEV, inumb)) == NIL_INODE) {
/* No inode table slots available. Free the inode just allocated. */
free_bit(sp, IMAP, b);
} else {
/* An inode slot is available. Put the inode just allocated into it. */
rip->i_mode = bits; /* set up RWX bits */
rip->i_nlinks = (nlink_t) 0; /* initial no links */
rip->i_uid = fp->fp_effuid; /* file's uid is owner's */
rip->i_gid = fp->fp_effgid; /* ditto group id */
rip->i_dev = dev; /* mark which device it is on */
rip->i_ndzones = sp->s_ndzones; /* number of direct zones */
rip->i_nindirs = sp->s_nindirs; /* number of indirect zones per blk*/
rip->i_sp = sp; /* pointer to super block */
/* Fields not cleared already are cleared in wipe_inode(). They have
* been put there because truncate() needs to clear the same fields if
* the file happens to be open while being truncated. It saves space
* not to repeat the code twice.
*/
wipe_inode(rip);
}
return(rip);
}
/*===========================================================================*
* wipe_inode *
*===========================================================================*/
PUBLIC void wipe_inode(rip)
register struct inode *rip; /* the inode to be erased */
{
/* Erase some fields in the inode. This function is called from alloc_inode()
* when a new inode is to be allocated, and from truncate(), when an existing
* inode is to be truncated.
*/
register int i;
rip->i_size = 0;
rip->i_update = ATIME | CTIME | MTIME; /* update all times later */
rip->i_dirt = DIRTY;
for (i = 0; i < V2_NR_TZONES; i++) rip->i_zone[i] = NO_ZONE;
}
alloc_inode分配一個inode,mode爲參數中給出的bits。與數據塊的分配不同,並不需要指定在那個塊號附近分配。這裏都是從超級塊中的sp->isearch(第一個可用inode號)開始。先調用alloc_bit從inode位示圖中分配一個bit。然後還要在inodetable中爲此inode找到位置。調用get_inode(NO_DEV, )若沒有,則將剛纔分配到的bit也free掉。
若get_inode成功從inode_table中獲得一個位置rip,則對這個rip指向的inode進行初始化,比如inode_number,mode,uid,gid,dev等。初始的link數目爲0 。還有部分初始化放在wipe_inode中進行。因此別處也要用到這部分初始化,因此放在一個函數裏,減少代碼量。
注意,這裏分配的inode僅僅是在位示圖中分配了一個位,然後在內存inode_table初始化了這個inode。真正磁盤inode存儲區域內並沒有這個inode具體信息(當然屬於這個inode的存儲區域是有的)。所以在分配了inode時在wipe_inode函數中將內存inode_table中對應項設置爲dirty。具體寫到磁盤上要put_inode依據dirty標誌調用rw_inode來完成。
/*===========================================================================*
* free_inode *
*===========================================================================*/
PUBLIC void free_inode(dev, inumb)
dev_t dev; /* on which device is the inode */
ino_t inumb; /* number of inode to be freed */
{
/* Return an inode to the pool of unallocated inodes. */
register struct super_block *sp;
bit_t b;
/* Locate the appropriate super_block. */
sp = get_super(dev);
if (inumb <= 0 || inumb > sp->s_ninodes) return;
b = inumb;
free_bit(sp, IMAP, b);
if (b < sp->s_isearch) sp->s_isearch = b;
}
/*===========================================================================*
* update_times *
*===========================================================================*/
PUBLIC void update_times(rip)
register struct inode *rip; /* pointer to inode to be read/written */
{
/* Various system calls are required by the standard to update atime, ctime,
* or mtime. Since updating a time requires sending a message to the clock
* task--an expensive business--the times are marked for update by setting
* bits in i_update. When a stat, fstat, or sync is done, or an inode is
* released, update_times() may be called to actually fill in the times.
*/
time_t cur_time;
struct super_block *sp;
sp = rip->i_sp; /* get pointer to super block. */
if (sp->s_rd_only) return; /* no updates for read-only file systems */
cur_time = clock_time();
if (rip->i_update & ATIME) rip->i_atime = cur_time;
if (rip->i_update & CTIME) rip->i_ctime = cur_time;
if (rip->i_update & MTIME) rip->i_mtime = cur_time;
rip->i_update = 0; /* they are all up-to-date now */
}
free_inode 直接調用free_bit在位示圖中指示inode爲可用。
注意:在alloc_inode和free_inode中調用alloc_bit和free_bit的時候,參數直接給的是inode號。
在inode位示圖中直接根據inode號定位所在的位。這裏的inode從0開始計數。但是在rw_inode中,給出了inode_number,定位此inode具體所在的磁盤位置(磁盤塊號,塊內偏移地址)。這時候並不能直接使用給出的inode號,因爲具體存儲inode的時候是從1號inode開始的。0號爲保留,作爲錯誤返回值。因此,計算具體存儲位置的時候,並不能直接使用numb,這裏是要減一,其他實現可能還會有不同的處理。
下面是一些解釋:
Usually, the inode 0 is reserved because a return value of 0 usually signals an error. Multiple method in the Linux kernel -- especially in the VFS layer shared by all file systems -- return an ino_t, e.g.find_inode_number .
There are more reserved inode numbers. For example in ext2 :
#define EXT2_BAD_INO 1 /* Bad blocks inode */
#define EXT2_ROOT_INO 2 /* Root inode */
#define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */
#define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */
and ext3 has:
#define EXT3_BAD_INO 1 /* Bad blocks inode */
#define EXT3_ROOT_INO 2 /* Root inode */
#define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */
#define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */
#define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */
#define EXT3_JOURNAL_INO 8 /* Journal inode */
Other fileystems use the ino 1 as root inode number. In general, a file system is free to choose its inode numbers and its reserved ino values (with the exception of 0).
最後關於update_times:
inode中i_update定義爲char類型。主要使用到一個char8位數據中的三位,用來標識三個時間是否已經是最新的。爲0表示是最新的,否則根據具體位確定更新那個時間域。具體比較還需要三個宏ATIME CTIME MTIME值。
#define ATIME 002 /* set if atime field needs updating */
#define CTIME 004 /* set if ctime field needs updating */
#define MTIME 010 /* set if mtime field needs updating */
猜測,這幾個值是8進制,以0開頭表示8進制。
轉換成二進制則是10 100 1000 分別在一個字節的第2 3 4位爲1.
/*===========================================================================*
* dup_inode *
*===========================================================================*/
PUBLIC void dup_inode(ip)
struct inode *ip; /* The inode to be duplicated. */
{
/* This routine is a simplified form of get_inode() for the case where
* the inode pointer is already known.
*/
ip->i_count++;
}