Linux 塊設備原理(二)以ext2文件系統爲例分析塊設備工作原理

在上一篇文章中,分析瞭如何去寫一個基於MTD的flash驅動,大概分析了整個註冊流程。整個塊驅動就工作起來了。那麼文件的讀寫,與塊驅動的關係。文件系統與塊驅動的關係,到底是怎麼樣的呢。這個問題還是值得研究一下的。本文主要分析vfs層到flash驅動層的數據流走向。通過該過程的分析,力求可以看清整個塊驅動的工作原理。

首先大概瞭解一下ext2文件系統的基本原理。

1 ext2文件系統

1.1 ext2文件系統結構框圖

每一個文件或者目錄在磁盤上都有一個inode用於管理文件本身屬性信息,還有數據塊用於存放文件內容。其inode'和數據塊關係如下圖: 

 如果文件比較小,其數據塊少於12個,其數據塊索引就放在inode->i_blocks中,如果文件比較大,操作12個數據塊就需要分配間接塊來保存數據塊索引

上面內容摘自該博文:

 https://blog.csdn.net/chenying126/article/details/77921542

從上面的框圖就可以看出,ext2文件系統的開頭用超級塊來記錄這個文件系統的信息,在系統格式化的時候,就會生成該超級塊。同時文件系統中的文件和目錄則用inode節點來維護,這個和vfs系統中的inode節點有所區別。對ext2文件系統有了一個初步的認識以後,我們下面從ext2文件系統的掛載出來,來分析一下如何從flash中讀取數據,完成文件系統的掛載。

2 ext2文件系統掛載

一般掛載文件系統可以使用如下命令:

mount -t ext2 /dev/mtdblock0 /mnt

把mtdblock0設備掛載到/mnt目錄下面,需要指定掛載的根目錄已經要掛載的塊設備。文件系統掛載的詳細分析,已經在這篇文章中分析過:

https://blog.csdn.net/oqqYuJi12345678/article/details/101689334

這裏重點分析ext2文件系統相關的掛載。掛載過程中會涉及到對flash的讀寫,藉此過程剛好可以瞭解整個驅動的工作過程。ext2問價系統的掛載,跳過前面的掛載根目錄的搜尋,直接分析ext2_mount函數:

static struct dentry *ext2_mount(struct file_system_type *fs_type,
	int flags, const char *dev_name, void *data)
{
	return mount_bdev(fs_type, flags, dev_name, data, ext2_fill_super);
}

ext2_mount函數主要完成ext2文件系統超級塊的初始化。超級塊初始化需要讀取flash上面的文件系統信息。然後爲該文件系統新建dentry和inode。上面函數中,需要留意ext2_fill_super函數,後面會用到。然後看那一下mount_bdev函數:

struct dentry *mount_bdev(struct file_system_type *fs_type,
	int flags, const char *dev_name, void *data,
	int (*fill_super)(struct super_block *, void *, int))
{
	struct block_device *bdev;
	struct super_block *s;
	fmode_t mode = FMODE_READ | FMODE_EXCL;
	int error = 0;

	if (!(flags & MS_RDONLY))
		mode |= FMODE_WRITE;
------------------------------------------------(1)
	bdev = blkdev_get_by_path(dev_name, mode, fs_type);
	if (IS_ERR(bdev))
		return ERR_CAST(bdev);
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
-------------------------------------------------(2)
	s = sget(fs_type, test_bdev_super, set_bdev_super, flags | MS_NOSEC,
		 bdev);
。。。。。。。。。。。。。。。。。。。。。。。
		char b[BDEVNAME_SIZE];

		s->s_mode = mode;
		strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
		sb_set_blocksize(s, block_size(bdev));
----------------------------------------------------(3)
		error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
		if (error) {
			deactivate_locked_super(s);
			goto error;
		}

		s->s_flags |= MS_ACTIVE;
		bdev->bd_super = s;
	}

。。。。。。。。。。。。。。
}

上面函數主要完成3個工作:

(1)通過給定的塊設備節點名,找到真正的塊設備,爲後面的讀寫flash做準備

(2)分配並初始化一個superblock

(3)繼續初始化superblock,併爲根目錄分配dentry和inode,完成文件系統的掛載。

2.1 通過blkdev_get_by_path關聯塊設備

struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
					void *holder)
{
	struct block_device *bdev;
	int err;
--------------------------------------------------(1)
	bdev = lookup_bdev(path);//通過設備名找到block_device 
	if (IS_ERR(bdev))
		return bdev;
---------------------------------------------------------(2)
	err = blkdev_get(bdev, mode, holder);//把block_device 和mtd設備相關聯
	if (err)
		return ERR_PTR(err);

	if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
		blkdev_put(bdev, mode);
		return ERR_PTR(-EACCES);
	}

	return bdev;
}

(1)先分析如何找到block_device

struct block_device *lookup_bdev(const char *pathname)
{
	struct block_device *bdev;
	struct inode *inode;
	struct path path;
	int error;

	if (!pathname || !*pathname)
		return ERR_PTR(-EINVAL);
//這個函數顯而易見是通過路徑行走,找到/mtdblock0節點的dentry和inode,該inode包含該塊設備的設備號
	error = kern_path(pathname, LOOKUP_FOLLOW, &path);
	if (error)
		return ERR_PTR(error);

	inode = path.dentry->d_inode;
	error = -ENOTBLK;
	if (!S_ISBLK(inode->i_mode))
		goto fail;
	error = -EACCES;
	if (path.mnt->mnt_flags & MNT_NODEV)
		goto fail;
	error = -ENOMEM;
	bdev = bd_acquire(inode);//通過inode找到block_device
	if (!bdev)
		goto fail;
out:
	path_put(&path);
	return bdev;
fail:
	bdev = ERR_PTR(error);
	goto out;
}

重點分析bd_acquire函數:

static struct block_device *bd_acquire(struct inode *inode)
{
	struct block_device *bdev;

	spin_lock(&bdev_lock);
	bdev = inode->i_bdev;
。。。。。。。。。。。。。。

	bdev = bdget(inode->i_rdev);//很顯然inode->i_bdev剛開始是沒有值的,所以需要重新獲取
	if (bdev) {
		spin_lock(&bdev_lock);
		if (!inode->i_bdev) {
			/*
			 * We take an additional reference to bd_inode,
			 * and it's released in clear_inode() of inode.
			 * So, we can access it via ->i_mapping always
			 * without igrab().
			 */
			ihold(bdev->bd_inode);
			inode->i_bdev = bdev;
			inode->i_mapping = bdev->bd_inode->i_mapping;
			list_add(&inode->i_devices, &bdev->bd_inodes);
		}
		spin_unlock(&bdev_lock);
	}
	return bdev;
}

bd_acquire

     ----------->bdget

struct block_device *bdget(dev_t dev)
{
	struct block_device *bdev;
	struct inode *inode;
/*這裏先在inode的哈希表中進行查找與dev設備號對應的inode,如果沒找到的話,
	  則通過bdev僞文件系統創建bdev_inode(包含inode和block device的結構體),我覺得應該是找不到的,這邊需要重新分配*/
	inode = iget5_locked(blockdev_superblock, hash(dev),
			bdev_test, bdev_set, &dev);
	if (!inode)
		return NULL;
//通過inode獲取bdev_inode,再通過bdev_inode獲取block device實例
	bdev = &BDEV_I(inode)->bdev;
	if (inode->i_state & I_NEW) {
		bdev->bd_contains = NULL;
		bdev->bd_super = NULL;/*分別設置block device和inode的相關域*/
		bdev->bd_inode = inode;
		bdev->bd_block_size = (1 << inode->i_blkbits);
		bdev->bd_part_count = 0;
		bdev->bd_invalidated = 0;
		inode->i_mode = S_IFBLK;
		inode->i_rdev = dev;
		inode->i_bdev = bdev;//inode和bdev關聯
		inode->i_data.a_ops = &def_blk_aops;
		mapping_set_gfp_mask(&inode->i_data, GFP_USER);
		inode->i_data.backing_dev_info = &default_backing_dev_info;
		spin_lock(&bdev_lock);
		list_add(&bdev->bd_list, &all_bdevs);
		spin_unlock(&bdev_lock);
		unlock_new_inode(inode);
	}
	return bdev;
}

bd_acquire

     ----------->bdget

          ------------->iget5_locked

struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
		int (*test)(struct inode *, void *),
		int (*set)(struct inode *, void *), void *data)
{
................................................
	inode = alloc_inode(sb);
............................................
if (set(inode, data)
。。。。。。。。。。。。。。。。。。。。。。。
}

該函數最重要的函數是alloc_inode,而alloc_inode用到blockdev_superblock超級塊提供的函數分配inode。同時if (set(inode, data)函數和很重要,其執行bdev.bd_dev=dev_t,把設備號記錄在block_device中,後面會用到

sb->s_op->alloc_inode

blockdev_superblock是一個僞文件系統,看一下在哪邊初始化的:

static struct file_system_type bd_type = {
	.name		= "bdev",
	.mount		= bd_mount,
	.kill_sb	= kill_anon_super,
};


void __init bdev_cache_init(void)
{
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
	err = register_filesystem(&bd_type);
	if (err)
		panic("Cannot register bdev pseudo-fs");
	bd_mnt = kern_mount(&bd_type);
	if (IS_ERR(bd_mnt))
		panic("Cannot create bdev pseudo-fs");
	blockdev_superblock = bd_mnt->mnt_sb;   /* For writeback */
}

可以看到,這是個叫做bdev的僞文件系統。其sb->s_op函數集爲:

static const struct super_operations bdev_sops = {
	.statfs = simple_statfs,
	.alloc_inode = bdev_alloc_inode,
	.destroy_inode = bdev_destroy_inode,
	.drop_inode = generic_delete_inode,
	.evict_inode = bdev_evict_inode,
};

可以看到bdev_alloc_inode函數負責分配該僞文件系統的inode:

struct bdev_inode {
	struct block_device bdev;
	struct inode vfs_inode;
}
static struct inode *bdev_alloc_inode(struct super_block *sb)
{
	struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
	if (!ei)
		return NULL;
	return &ei->vfs_inode;
}

這邊分配了bdev_inode ,該結構包含block_device 和inode ,至此,我們拿到了block_device 結構。

(2)再看blkdev_get函數,如何把剛分配的block_device 和mtd設備關聯起來

int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
{
	struct block_device *whole = NULL;
	int res;
。。。。。。。。。。。。。。。。。。。。。。。。
	res = __blkdev_get(bdev, mode, 0);
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
}

__blkdev_get 函數負責從gendisk中獲取信息,並建立相關數據結構之間的聯繫,

注意_blkdev_get()傳遞的最後一個參數爲0,也就是說默認打開的是主設備,獲取到gendisk之後會分四種情況進行處理,也就是針對設備是不是第一次打開以及打開的設備是主設備還是分區來進行不同的處理,具體見代碼註釋

static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
{
	struct gendisk *disk;
	int ret;
	int partno;
	int perm = 0;
 
	。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
 restart:
 
	ret = -ENXIO;
	//獲取該設備的gendisk實例,如果bd_dev對應的是一個分區設備的話,partno將會被修改
-----------------------------------------------------------------------(2.1)
	disk = get_gendisk(bdev->bd_dev, &partno);
	if (!disk)
		goto out_unlock_kernel;
 
	mutex_lock_nested(&bdev->bd_mutex, for_part);
	if (!bdev->bd_openers) {//如果是第一次打開設備
		bdev->bd_disk = disk;//建立block device和gendisk之間的聯繫
		bdev->bd_contains = bdev;
		if (!partno) {//partno爲0,也就是說打開的是主設備而不是分區,前一篇文章mtd註冊的時候,每個分區都單獨註冊一個gendisk,所以partno都爲0
			struct backing_dev_info *bdi;
 
			ret = -ENXIO;
			bdev->bd_part = disk_get_part(disk, partno);//獲取gendisk中的分區數組
			if (!bdev->bd_part)
				goto out_clear;
 -------------------------------------------------------------------------(2.2)
			if (disk->fops->open) {//gendisk中定義了open方式
				ret = disk->fops->open(bdev, mode);//調用open針對具體的設備進行打開操作
				if (ret == -ERESTARTSYS) {
					/* Lost a race with 'disk' being
					 * deleted, try again.
					 * See md.c
					 */
					disk_put_part(bdev->bd_part);
					bdev->bd_part = NULL;
					module_put(disk->fops->owner);
					put_disk(disk);
					bdev->bd_disk = NULL;
					mutex_unlock(&bdev->bd_mutex);
					goto restart;
				}
				if (ret)
					goto out_clear;
			}
			if (!bdev->bd_openers) {
				bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);//從gendisk中提取容量信息設置到block device
				bdi = blk_get_backing_dev_info(bdev);
				if (bdi == NULL)
					bdi = &default_backing_dev_info;
				bdev->bd_inode->i_data.backing_dev_info = bdi;
			}
			//塊設備上的分區改變導致分區在內核中的信息無效,則要重新掃描分區
			if (bdev->bd_invalidated)
				rescan_partitions(disk, bdev);
		} else {//如果打開的是分區
			struct block_device *whole;
			whole = bdget_disk(disk, 0);//獲取主設備的block device實例
			ret = -ENOMEM;
			if (!whole)
				goto out_clear;
			BUG_ON(for_part);
			ret = __blkdev_get(whole, mode, 1);
			if (ret)
				goto out_clear;
			bdev->bd_contains = whole;//設置分區的block device實例的bd_contains域到主設備
			bdev->bd_inode->i_data.backing_dev_info =
			   whole->bd_inode->i_data.backing_dev_info;
			bdev->bd_part = disk_get_part(disk, partno);
			if (!(disk->flags & GENHD_FL_UP) ||
			    !bdev->bd_part || !bdev->bd_part->nr_sects) {
				ret = -ENXIO;
				goto out_clear;
			}
			bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
		}
	}   else {//如果不是第一次打開
		module_put(disk->fops->owner);
		put_disk(disk);
		disk = NULL;
		if (bdev->bd_contains == bdev) {//打開的是主設備
			if (bdev->bd_disk->fops->open) {
				ret = bdev->bd_disk->fops->open(bdev, mode);//調用定義的open
				if (ret)
					goto out_unlock_bdev;
			}
			if (bdev->bd_invalidated)
				rescan_partitions(bdev->bd_disk, bdev);
		}
	}
	bdev->bd_openers++;//計數值加1
	if (for_part)//如果是分區則分區計數值也加1
		bdev->bd_part_count++;
	mutex_unlock(&bdev->bd_mutex);
	unlock_kernel();
	return 0;
 
。。。。。。。。。。。。。。。。。。。。。。。。
}

(2.1)通過設備號和分區號獲取gendisk結構。bdev->bd_dev設備號在block_device新建的時候從塊設備的inode中獲取,上面已經介紹過。

struct gendisk *get_gendisk(dev_t devt, int *partno)
{
	struct gendisk *disk = NULL;

	if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
		struct kobject *kobj;

		kobj = kobj_lookup(bdev_map, devt, partno);
		if (kobj)
                    disk = dev_to_disk(kobj_to_dev(kobj));
	。。。。。。。。。。。。。。。。。。。。。。
	return disk;
}

塊設備都會註冊在bdev_map中。回憶一下上一篇文章,我們如何註冊塊設備的:

我們會爲每個mtd分區調用add_disk函數:

void add_disk(struct gendisk *disk)
{
	。。。。。。。。。。。。。。。。。。。

	blk_register_region(disk_devt(disk), disk->minors, NULL,
			    exact_match, exact_lock, disk);
	register_disk(disk);
	blk_register_queue(disk);
。。。。。。。。。。。。。。。。。。。。。。。。。。
}

其中blk_register_region函數就是向bdev_map結構註冊gendisk,可以通過設備號快速的找到該gendisk:

void blk_register_region(dev_t devt, unsigned long range, struct module *module,
			 struct kobject *(*probe)(dev_t, int *, void *),
			 int (*lock)(dev_t, void *), void *data)
{
	kobj_map(bdev_map, devt, range, module, probe, lock, data);
}

這個和字符設備的註冊是類似的,字符設備註冊已經在這篇文章裏分析過:

https://blog.csdn.net/oqqYuJi12345678/article/details/103102159

所以回過來看上面的kobj_lookup函數,這裏能通過設備號和分區號快速的找到註冊的kobject結構,該結構包含在device結構中,而device結構是part0.__dev,包含在gendisk,最終我們獲取到了註冊的gendisk。

(2.2)gendisk是有fops操作函數集的,繼續回顧上一章的內容,每個需要註冊的mtd分區,都會調用如下函數:

int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
{
	struct mtd_blktrans_ops *tr = new->tr;
	struct mtd_blktrans_dev *d;
	int last_devnum = -1;
	struct gendisk *gd;
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
	gd = alloc_disk(1 << tr->part_bits);

	if (!gd)
		goto error2;

	new->disk = gd;
	gd->private_data = new;
	gd->major = tr->major;
	gd->first_minor = (new->devnum) << tr->part_bits;
	gd->fops = &mtd_block_ops;
。。。。。。。。。。。。。。。。。。。。。。。。。。。。
}
static const struct block_device_operations mtd_block_ops = {
	.owner		= THIS_MODULE,
	.open		= blktrans_open,
	.release	= blktrans_release,
	.ioctl		= blktrans_ioctl,
	.getgeo		= blktrans_getgeo,
};

其所以上面調用到的open函數爲blktrans_open:

static int blktrans_open(struct block_device *bdev, fmode_t mode)
{
	struct mtd_blktrans_dev *dev = blktrans_dev_get(bdev->bd_disk);
	int ret = 0;

	。。。。。。。。。。。。。。。。。。。

	if (dev->tr->open) {
		ret = dev->tr->open(dev);
		if (ret)
			goto error_put;
	}

。。。。。。。。。。。。。。。。。。。。。。。
}
static struct mtd_blktrans_dev *blktrans_dev_get(struct gendisk *disk)
{
	struct mtd_blktrans_dev *dev;
	dev = disk->private_data;
。。。。。。。。。。。。。。。。。。。。。。
	return dev;
}

mtd_blktrans_dev是disk->private_data結構,mtd_blktrans_dev是哪個呢,在上一章文章講過,註冊mtd塊設備的時候,會通過通知鏈註冊塊設備,回調函數爲blktrans_notify_add:

static void blktrans_notify_add(struct mtd_info *mtd)
{
	struct mtd_blktrans_ops *tr;

	if (mtd->type == MTD_ABSENT)
		return;

	list_for_each_entry(tr, &blktrans_majors, list)
		tr->add_mtd(tr, mtd);
}

這邊的mtd_blktrans_ops爲mtdblock_tr:

static struct mtd_blktrans_ops mtdblock_tr = {
	.name		= "mtdblock",
	.major		= 31,
	.part_bits	= 0,
	.blksize 	= 512,
	.open		= mtdblock_open,
	.flush		= mtdblock_flush,
	.release	= mtdblock_release,
	.readsect	= mtdblock_readsect,
	.writesect	= mtdblock_writesect,
	.add_mtd	= mtdblock_add_mtd,
	.remove_dev	= mtdblock_remove_dev,
	.owner		= THIS_MODULE,
};

所以接着調用mtdblock_add_mtd:

static void mtdblock_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
{
	struct mtdblk_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);

	if (!dev)
		return;

	dev->mbd.mtd = mtd;//把mtdblk_dev 和mtd_info 分區關聯起來
	dev->mbd.devnum = mtd->index;

	dev->mbd.size = mtd->size >> 9;
	dev->mbd.tr = tr;

	if (!(mtd->flags & MTD_WRITEABLE))
		dev->mbd.readonly = 1;

	if (add_mtd_blktrans_dev(&dev->mbd))
		kfree(dev);
}

所以上面的dev->tr就是mtdblock_tr,dev->tr->open函數爲mtdblock_open,那麼mtd_blktrans_dev是怎麼和gendisk聯繫起來的呢,接着看add_mtd_blktrans_dev:

int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
{
	struct mtd_blktrans_ops *tr = new->tr;
	struct mtd_blktrans_dev *d;
	int last_devnum = -1;
	struct gendisk *gd;
	int ret;

	。。。。。。。。。。。。。。。。。
	gd = alloc_disk(1 << tr->part_bits);

	if (!gd)
		goto error2;

	new->disk = gd;
	gd->private_data = new;
	gd->major = tr->major;
	gd->first_minor = (new->devnum) << tr->part_bits;
	gd->fops = &mtd_block_ops;
。。。。。。。。。。。。。。。。。。。。。。。。。。。
//初始化request_queue,並把request_queue的request_fn初始化爲mtd_blktrans_request
//make_request_fn初始化爲blk_queue_bio
    new->rq = blk_init_queue(mtd_blktrans_request, &new->queue_lock);

	if (!new->rq)
		goto error3;

	new->rq->queuedata = new;//把mtd_blktrans_dev 賦值給queuedata,後面讀寫數據用到
	blk_queue_logical_block_size(new->rq, tr->blksize);

	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq);

	if (tr->discard) {
		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, new->rq);
		new->rq->limits.max_discard_sectors = UINT_MAX;
	}

	gd->queue = new->rq;//把request_queue賦值給gendisk,後面讀寫數據時會用到
/* Create processing workqueue */
	new->wq = alloc_workqueue("%s%d", 0, 0,
				  tr->name, new->mtd->index);
	if (!new->wq)
		goto error4;
	INIT_WORK(&new->work, mtd_blktrans_work);//該工作隊列後面用來處理讀寫request

	。。。。。。。。。。。。。。。。。。。。。。
}

最終看到。mtd_blktrans_dev和gd->private_data中,關聯起來了。

2.2 分配super_block結構

struct super_block *sget(struct file_system_type *type,
			int (*test)(struct super_block *,void *),
			int (*set)(struct super_block *,void *),
			int flags,
			void *data)
{
	struct super_block *s = NULL;
	struct super_block *old;
	int err;
。。。。。。。。。。。。。。。。。。。。。。。
	if (!s) {
		spin_unlock(&sb_lock);
		s = alloc_super(type, flags);
		if (!s)
			return ERR_PTR(-ENOMEM);
		goto retry;
	}
		
	err = set(s, data);
	。。。。。。。。。。。。。。。。。。。。。
	return s;
}

set(s, data);爲set_bdev_super:

static int set_bdev_super(struct super_block *s, void *data)
{
	s->s_bdev = data;//data爲之前分配的block_device
	s->s_dev = s->s_bdev->bd_dev;//記錄塊設備號

	/*
	 * We set the bdi here to the queue backing, file systems can
	 * overwrite this in ->fill_super()
	 */
	s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;
	return 0;
}

2.3 ext2_fill_super填充超級塊

填充ext2文件系統的超級塊,需要從flash上面讀取數據,初始化該超級塊,所以這邊是理清上層文件系統和下層驅動之間關係的重頭戲,當然在該函數裏頁完成文件系統掛載的重要工作,爲該文件系統的根目錄創建dentry和inode結構:

static int ext2_fill_super(struct super_block *sb, void *data, int silent)
{
    struct buffer_head * bh;
    struct ext2_sb_info * sbi;
    struct ext2_super_block * es;
    struct inode *root;
    unsigned long sb_block = get_sb_block(&data);
    unsigned long logic_sb_block;
    unsigned long offset = 0;
    int blocksize = BLOCK_SIZE;
    int db_count;
 
 
    sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); //分配ext2_sb_info結構
    if (!sbi)
        goto failed;
 
 
    sb->s_fs_info = sbi; //VFS中的super_block通過sb->s_fs_info與ext2_sb_info相連接
    sbi->s_sb_block = sb_block;
 
 
    blocksize = sb_min_blocksize(sb, BLOCK_SIZE);
    ......
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。(2.3.1)
    if (!(bh = sb_bread(sb, logic_sb_block))) { //從磁盤中讀取原始的超級塊結構ext2_super_block
        ext2_msg(sb, KERN_ERR, "error: unable to read superblock");
        goto failed_sbi;
    }
    es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
    sbi->s_es = es;
    ......
    sb->s_magic = le16_to_cpu(es->s_magic);
    blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
 
 
/*如果超級塊的實際塊大小與假設的大小不一致就重新讀取超級塊,因爲超級塊佔用一個塊大小,函數sb_bread也是從指定塊號讀取一個塊大小,如果實際塊與假設的塊大小不一致就重新讀取一個準確的塊
大小*/
    if (sb->s_blocksize != blocksize) { 
        brelse(bh);
 
 
        if (!sb_set_blocksize(sb, blocksize)) {
            ext2_msg(sb, KERN_ERR,
                "error: bad blocksize %d", blocksize);
            goto failed_sbi;
        }
        logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize;
        offset = (sb_block*BLOCK_SIZE) % blocksize;
        bh = sb_bread(sb, logic_sb_block);
        es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
        sbi->s_es = es;
    }
    ......
    sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size;
 
 
    sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
    sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
    sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
 
 
    sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb);
    sbi->s_itb_per_group = sbi->s_inodes_per_group /
                    sbi->s_inodes_per_block;
    sbi->s_desc_per_block = sb->s_blocksize /
                    sizeof (struct ext2_group_desc);
    sbi->s_sbh = bh; //讓s_sbh指向原始超級塊數據
    sbi->s_mount_state = le16_to_cpu(es->s_state);
    sbi->s_addr_per_block_bits =
        ilog2 (EXT2_ADDR_PER_BLOCK(sb));
    sbi->s_desc_per_block_bits =
        ilog2 (EXT2_DESC_PER_BLOCK(sb));
    ......
    sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
                le32_to_cpu(es->s_first_data_block) - 1)
                    / EXT2_BLOCKS_PER_GROUP(sb)) + 1;
    db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
           EXT2_DESC_PER_BLOCK(sb);
    sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL);
    ......
    for (i = 0; i < db_count; i++) { //讀出所有組描述符
        block = descriptor_loc(sb, logic_sb_block, i);
        sbi->s_group_desc[i] = sb_bread(sb, block);
        if (!sbi->s_group_desc[i]) {
            for (j = 0; j < i; j++)
                brelse (sbi->s_group_desc[j]);
            ext2_msg(sb, KERN_ERR,
                "error: unable to read group descriptors");
            goto failed_mount_group_desc;
        }
    }
    sbi->s_gdb_count = db_count; //設置組描述符所佔用的塊數
......
/*初始化預分配窗口*/
    sbi->s_rsv_window_head.rsv_start = EXT2_RESERVE_WINDOW_NOT_ALLOCATED;
    sbi->s_rsv_window_head.rsv_end = EXT2_RESERVE_WINDOW_NOT_ALLOCATED;
    sbi->s_rsv_window_head.rsv_alloc_hit = 0;
    sbi->s_rsv_window_head.rsv_goal_size = 0;
    ext2_rsv_window_add(sb, &sbi->s_rsv_window_head);
	......
sb->s_op = &ext2_sops; //設置super_operations
	......
-----------------------------------------------------------(2.3.2)
    root = ext2_iget(sb, EXT2_ROOT_INO); 
    if (IS_ERR(root)) {
        ret = PTR_ERR(root);
        goto failed_mount3;
    }
 
 -------------------------------------------------------------(2.3.3)
    sb->s_root = d_make_root(root); //創建根目錄的dentry
 
 
    ......
    ext2_write_super(sb);
    ......

(2.3.1)從flash分區上面讀取ext2文件系統的第一個塊,來初始化文件系統的super_block。sb_bread這個函數很關鍵,理解了這個函數,就理解了從flash上面讀取數據的整個流程。buffer_head 這個結構用來存放從flash中讀取到的數據。上層文件系統利用該結構和mtd層進行交互。

static inline struct buffer_head *
sb_bread(struct super_block *sb, sector_t block)
{
	return __bread(sb->s_bdev, block, sb->s_blocksize);//block是需要讀取的塊號
}
struct buffer_head *
__bread(struct block_device *bdev, sector_t block, unsigned size)
{
------------------------------------------------------(2.3.1.1)
	struct buffer_head *bh = __getblk(bdev, block, size);//先看緩存裏面是否已經有該buffer_head存在

	if (likely(bh) && !buffer_uptodate(bh))
--------------------------------------------------------(2.3.1.2)
		bh = __bread_slow(bh);//該buffer_head需要重新從flash上面讀取數據
	return bh;
}

(2.3.1.1)

struct buffer_head *
__getblk(struct block_device *bdev, sector_t block, unsigned size)
{
	struct buffer_head *bh = __find_get_block(bdev, block, size);//先從緩存中查找

	might_sleep();
	if (bh == NULL)
		bh = __getblk_slow(bdev, block, size);//找不到的話,再重新分配空間
	return bh;
}

__getblk

     ----------->__find_get_block

struct buffer_head *
__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
{
	struct buffer_head *bh = lookup_bh_lru(bdev, block, size);//先從bh_lrus.bhs鏈表中查找是否已經存在,第一次讀取肯定找不到

	if (bh == NULL) {
		bh = __find_get_block_slow(bdev, block);//再從address_space的基數樹中分配一塊空間給buffer_head,如果沒有空間了,則直接返回
		if (bh)
			bh_lru_install(bh);//安裝到bh_lrus.bhs鏈表
	}
	if (bh)
		touch_buffer(bh);
	return bh;
}

我們看一下這個函數__find_get_block_slow:

static struct buffer_head *
__find_get_block_slow(struct block_device *bdev, sector_t block)
{
	struct inode *bd_inode = bdev->bd_inode;
	struct address_space *bd_mapping = bd_inode->i_mapping;
	struct buffer_head *ret = NULL;
	pgoff_t index;
	struct buffer_head *bh;
	struct buffer_head *head;
	struct page *page;
	int all_mapped = 1;

	index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
	page = find_get_page(bd_mapping, index);//從address_space->page_tree中查找是否有空閒page可用
	if (!page)
		goto out;

	spin_lock(&bd_mapping->private_lock);
	if (!page_has_buffers(page))
		goto out_unlock;
	head = page_buffers(page);//如果有,則把該塊空間分配給buffer_head 
	bh = head;
	do {
		if (!buffer_mapped(bh))
			all_mapped = 0;
		else if (bh->b_blocknr == block) {
			ret = bh;
			get_bh(bh);
			goto out_unlock;
		}
		bh = bh->b_this_page;
	} while (bh != head);
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
}

這邊假設都是第一次進來,無法從address_space->page_tree中分配到空間,所以需要執行下一步:

__getblk

     ----------->__getblk_slow

static struct buffer_head *
__getblk_slow(struct block_device *bdev, sector_t block, int size)
{
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。

	for (;;) {
		struct buffer_head *bh;
		int ret;

		bh = __find_get_block(bdev, block, size);//第一次肯定找不到
		if (bh)
			return bh;

		ret = grow_buffers(bdev, block, size);//爲address_space->page_tree基數樹分配空間
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
	}
}

__getblk

     ----------->__getblk_slow

          ---------------->grow_dev_page

static int
grow_dev_page(struct block_device *bdev, sector_t block,
		pgoff_t index, int size, int sizebits)
{
	struct inode *inode = bdev->bd_inode;
	struct page *page;
	struct buffer_head *bh;
	sector_t end_block;
	int ret = 0;		/* Will call free_more_memory() */
//先分配一頁,並把該空間放入address_space->page_tree基數樹
	page = find_or_create_page(inode->i_mapping, index,
		(mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
	if (!page)
		return ret;

	BUG_ON(!PageLocked(page));

	if (page_has_buffers(page)) {
		bh = page_buffers(page);
		if (bh->b_size == size) {
			end_block = init_page_buffers(page, bdev,
						index << sizebits, size);
			goto done;
		}
		if (!try_to_free_buffers(page))
			goto failed;
	}

	/*
	 * Allocate some buffers for this page
	 */
//size是每個bh的大小,把每頁,根據size的大小,重新分割,分成多個bh,並串接起來
	bh = alloc_page_buffers(page, size, 0);
	if (!bh)
		goto failed;

	/*
	 * Link the page to the buffers and initialise them.  Take the
	 * lock to be atomic wrt __find_get_block(), which does not
	 * run under the page lock.
	 */
	spin_lock(&inode->i_mapping->private_lock);
	link_dev_buffers(page, bh);
	end_block = init_page_buffers(page, bdev, index << sizebits, size);
	spin_unlock(&inode->i_mapping->private_lock);
done:
	ret = (block < end_block) ? 1 : -ENXIO;
failed:
	unlock_page(page);
	page_cache_release(page);
	return ret;
}

這樣以後,我們就能通過__find_get_block函數,拿到buffer_head 了

(2.3.1.2)得到了buffer_head以後,就可以利用__bread_slow把數據讀到buffer_head中了:

static struct buffer_head *__bread_slow(struct buffer_head *bh)
{
	。。。。。。。。。。。。。。。。。。。。。。。。
		bh->b_end_io = end_buffer_read_sync;//數據讀完以後需要做一些收尾工作
		submit_bh(READ, bh);//讀取數據的函數
		wait_on_buffer(bh);//等待數據讀取完成
		if (buffer_uptodate(bh))
			return bh;
。。。。。。。。。。。。。。。。。。
}

submit_bh

   -------------->_submit_bh

bio是linux內核裏文件系統層和block層之間溝通的數據結構(有點像sk_buffer之於網絡)

int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
{
	struct bio *bio;
	int ret = 0;

	bio = bio_alloc(GFP_NOIO, 1);//分配bio 結構

	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
	bio->bi_bdev = bh->b_bdev;//把block_device賦值給bio 
	bio->bi_io_vec[0].bv_page = bh->b_page;
	bio->bi_io_vec[0].bv_len = bh->b_size;
	bio->bi_io_vec[0].bv_offset = bh_offset(bh);

	bio->bi_vcnt = 1;
	bio->bi_size = bh->b_size;

	bio->bi_end_io = end_bio_bh_io_sync;//完成數據讀的收尾工作
	bio->bi_private = bh;
	bio->bi_flags |= bio_flags;

	/* Take care of bh's that straddle the end of the device */
	guard_bh_eod(rw, bio, bh);

	if (buffer_meta(bh))
		rw |= REQ_META;
	if (buffer_prio(bh))
		rw |= REQ_PRIO;

	bio_get(bio);
	submit_bio(rw, bio);//提交讀寫請求

	if (bio_flagged(bio, BIO_EOPNOTSUPP))
		ret = -EOPNOTSUPP;

	bio_put(bio);
	return ret;
}

submit_bio

     -------------->generic_make_request

把bio變成request,怎麼個變法?如果幾個bio要讀寫的區域是連續的,就攢成一個request(一個request下掛多個連續bio,就是通常說的“合併bio請求”);如果這個bio跟其它bio都連不上,那它自己就創建一個新的request,把自己掛到這個request下。合併bio請求也是有限度的,如果這些連續bio的訪問區域加起來超過了一定的大小(在/sys/block/xxx/queue/max_sectors_kb裏設置),那麼就不能再合併成一個request了。

void generic_make_request(struct bio *bio)
{
	struct bio_list bio_list_on_stack;

	if (current->bio_list) {
		bio_list_add(current->bio_list, bio);
		return;
	}


	bio_list_init(&bio_list_on_stack);
	current->bio_list = &bio_list_on_stack;
	do {
		struct request_queue *q = bdev_get_queue(bio->bi_bdev);//通過前面block_device已經和分區的gendisk建立起了關係,所以這邊獲取分區的gendisk的request_queue

		q->make_request_fn(q, bio);//把bio結構放入queue中,上面註冊gendisk的時候,我們知道
//make_request_fn函數爲blk_queue_bio

		bio = bio_list_pop(current->bio_list);
	} while (bio);
	current->bio_list = NULL; /* deactivate */
}

blk_queue_bio中就包合併bio的算法,這邊不是本文分析的重點,先略過:

static void blk_queue_bio(struct request_queue *q, struct bio *bio)
{
    const bool sync = !!(bio->bi_rw & REQ_SYNC);
    struct blk_plug *plug;
    int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
    struct request *req;
    unsigned int request_count = 0;

    /*
     * low level driver can indicate that it wants pages above a
     * certain limit bounced to low memory (ie for highmem, or even
     * ISA dma in theory)
     */
/* 爲了建立bounce buffer,以防止不適合這次I/O操作的時候利用bounce buffer*/
    blk_queue_bounce(q, &bio);                    

    if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {    //數據完整性校驗
        bio_endio(bio, -EIO);
        return;
    }

    if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
        spin_lock_irq(q->queue_lock);
        where = ELEVATOR_INSERT_FLUSH;
        goto get_rq;
    }

    /*
     * Check if we can merge with the plugged list before grabbing
     * any locks.
     */
    if (!blk_queue_nomerges(q) &&                  //請求隊列不允許合併請求
        blk_attempt_plug_merge(q, bio, &request_count))    //將bio合併到當前plugged的請求隊列中
        return;

    spin_lock_irq(q->queue_lock);

    el_ret = elv_merge(q, &req, bio);              //elv_merge是核心函數,找到bio前向或者後向合併的請求
    if (el_ret == ELEVATOR_BACK_MERGE) {            //進行後向合併操作
        if (bio_attempt_back_merge(q, req, bio)) {
            elv_bio_merged(q, req, bio);
            if (!attempt_back_merge(q, req))
                elv_merged_request(q, req, el_ret);
            goto out_unlock;
        }
    } else if (el_ret == ELEVATOR_FRONT_MERGE) {      // 進行前向合併操作
        if (bio_attempt_front_merge(q, req, bio)) {
            elv_bio_merged(q, req, bio);
            if (!attempt_front_merge(q, req))
                elv_merged_request(q, req, el_ret);
            goto out_unlock;
        }
    }
/* 無法找到對應的請求實現合併 */
get_rq:
    /*
     * This sync check and mask will be re-done in init_request_from_bio(),
     * but we need to set it earlier to expose the sync flag to the
     * rq allocator and io schedulers.
     */
    rw_flags = bio_data_dir(bio);
    if (sync)
        rw_flags |= REQ_SYNC;

    /*
     * Grab a free request. This is might sleep but can not fail.
     * Returns with the queue unlocked.
     */
    req = get_request(q, rw_flags, bio, GFP_NOIO);          //獲取一個empty request請求
    if (IS_ERR(req)) {
        bio_endio(bio, PTR_ERR(req));    /* @q is dead */
        goto out_unlock;
    }

    /*
     * After dropping the lock and possibly sleeping here, our request
     * may now be mergeable after it had proven unmergeable (above).
     * We don't worry about that case for efficiency. It won't happen
     * often, and the elevators are able to handle it.
     */
    init_request_from_bio(req, bio);                  //採用bio對request請求進行初始化

    if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
        req->cpu = raw_smp_processor_id();

    plug = current->plug;
    if (plug) {
        /*
         * If this is the first request added after a plug, fire
         * of a plug trace.
         */
        if (!request_count)
            trace_block_plug(q);
        else {
            if (request_count >= BLK_MAX_REQUEST_COUNT) {
                blk_flush_plug_list(plug, false);            //請求數量達到隊列上限值,進行unplug操作
                trace_block_plug(q);
            }
        }
        list_add_tail(&req->queuelist, &plug->list);          //將請求加入到隊列
        blk_account_io_start(req, true);
    } else {
        spin_lock_irq(q->queue_lock);
        add_acct_request(q, req, where);
        __blk_run_queue(q);
out_unlock:
        spin_unlock_irq(q->queue_lock);
    }
}

對於每一次讀寫flash,都對應一個request請求,先調用init_request_from_bio函數,把bio放到該request中,假設我們沒有用plug,所以調用add_acct_request把request放入request_queue中。然後調用__blk_run_queue

__blk_run_queue

    --------------->__blk_run_queue_uncond

inline void __blk_run_queue_uncond(struct request_queue *q)
{
	if (unlikely(blk_queue_dead(q)))
		return;

	/*
	 * Some request_fn implementations, e.g. scsi_request_fn(), unlock
	 * the queue lock internally. As a result multiple threads may be
	 * running such a request function concurrently. Keep track of the
	 * number of active request_fn invocations such that blk_drain_queue()
	 * can wait until all these request_fn calls have finished.
	 */
	q->request_fn_active++;
	q->request_fn(q);
	q->request_fn_active--;
}

核心函數是request_fn,上面註冊gendisk的時候知道,該函數爲mtd_blktrans_request:

static void mtd_blktrans_request(struct request_queue *rq)
{
	struct mtd_blktrans_dev *dev;
	struct request *req = NULL;

	dev = rq->queuedata;//前面註冊的時候初始化爲mtd_blktrans_dev了

	if (!dev)
		while ((req = blk_fetch_request(rq)) != NULL)
			__blk_end_request_all(req, -ENODEV);
	else
		queue_work(dev->wq, &dev->work);//前面初始化的時候知道該work函數爲mtd_blktrans_work
}

最終用工作隊列來處理該讀寫請求,具體的工作隊列處理函數爲mtd_blktrans_work:

static void mtd_blktrans_work(struct work_struct *work)
{
	struct mtd_blktrans_dev *dev =
		container_of(work, struct mtd_blktrans_dev, work);
	struct mtd_blktrans_ops *tr = dev->tr;
	struct request_queue *rq = dev->rq;
	struct request *req = NULL;
	int background_done = 0;

	spin_lock_irq(rq->queue_lock);

	while (1) {
		int res;

		dev->bg_stop = false;
//從隊列裏面獲取需要處理的request
		if (!req && !(req = blk_fetch_request(rq))) {
			if (tr->background && !background_done) {
				spin_unlock_irq(rq->queue_lock);
				mutex_lock(&dev->lock);
				tr->background(dev);
				mutex_unlock(&dev->lock);
				spin_lock_irq(rq->queue_lock);
				/*
				 * Do background processing just once per idle
				 * period.
				 */
				background_done = !dev->bg_stop;
				continue;
			}
			break;
		}

		spin_unlock_irq(rq->queue_lock);

		mutex_lock(&dev->lock);
//處理具體的request
		res = do_blktrans_request(dev->tr, dev, req);
		mutex_unlock(&dev->lock);

		spin_lock_irq(rq->queue_lock);

		if (!__blk_end_request_cur(req, res))
			req = NULL;

		background_done = 0;
	}

	if (req)
		__blk_end_request_all(req, -EIO);

	spin_unlock_irq(rq->queue_lock);
}

看一下該函數do_blktrans_request:

static int do_blktrans_request(struct mtd_blktrans_ops *tr,
			       struct mtd_blktrans_dev *dev,
			       struct request *req)
{
	unsigned long block, nsect;
	char *buf;

	block = blk_rq_pos(req) << 9 >> tr->blkshift;
	nsect = blk_rq_cur_bytes(req) >> tr->blkshift;

	buf = req->buffer;
。。。。。。。。。。。。。。。。。。。。。。。。。。。。

	switch(rq_data_dir(req)) {
	case READ: //讀請求需要調用mtd_blktrans_ops 的readsect,該函數由前面可知爲mtdblock_readsect
		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
			if (tr->readsect(dev, block, buf))
				return -EIO;
		rq_flush_dcache_pages(req);
		return 0;
	case WRITE:
		if (!tr->writesect)
			return -EIO;

		rq_flush_dcache_pages(req);
		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
			if (tr->writesect(dev, block, buf))
				return -EIO;
		return 0;
。。。。。。。。。。。。。。。。。。。。
}

所以接着調用mtdblock_readsect

static int mtdblock_readsect(struct mtd_blktrans_dev *dev,
			      unsigned long block, char *buf)
{
//由註冊的時候可知mtd_blktrans_dev 包含在mtdblk_dev,由mtdblk_dev可以找到分區的所有信息
	struct mtdblk_dev *mtdblk = container_of(dev, struct mtdblk_dev, mbd);
	return do_cached_read(mtdblk, block<<9, 512, buf);
}
static int do_cached_read (struct mtdblk_dev *mtdblk, unsigned long pos,
			   int len, char *buf)
{
	struct mtd_info *mtd = mtdblk->mbd.mtd;//前面註冊的時候把mtd_info 放入了這裏
	unsigned int sect_size = mtdblk->cache_size;
	size_t retlen;
	int ret;

	pr_debug("mtdblock: read on \"%s\" at 0x%lx, size 0x%x\n",
			mtd->name, pos, len);

	if (!sect_size)
		return mtd_read(mtd, pos, len, &retlen, buf);//讀取數據,mtd_read調用了mtd->_read

	while (len > 0) {
		unsigned long sect_start = (pos/sect_size)*sect_size;
		unsigned int offset = pos - sect_start;
		unsigned int size = sect_size - offset;
		if (size > len)
			size = len;

		/*
		 * Check if the requested data is already cached
		 * Read the requested amount of data from our internal cache if it
		 * contains what we want, otherwise we read the data directly
		 * from flash.
		 */
		if (mtdblk->cache_state != STATE_EMPTY &&
		    mtdblk->cache_offset == sect_start) {
			memcpy (buf, mtdblk->cache_data + offset, size);//把督導的數據複製到buffer中
		} else {
			ret = mtd_read(mtd, pos, size, &retlen, buf);
			if (ret)
				return ret;
			if (retlen != size)
				return -EIO;
		}

		buf += size;
		pos += size;
		len -= size;
	}

	return 0;
}

mtd_read中主要代碼爲mtd->_read,那麼該read函數是哪一個呢,我們在上一章註冊mtd驅動的時候,對於每一個分區,有這樣調用:

s3c2410_nand_add_partition

     ----------------->mtd_device_parse_register

            -------------------->add_mtd_partitions

static struct mtd_part *allocate_partition(struct mtd_info *master,
			const struct mtd_partition *part, int partno,
			uint64_t cur_offset)
{
	struct mtd_part *slave;
	char *name;

	/* allocate the partition structure */
	slave = kzalloc(sizeof(*slave), GFP_KERNEL);
。。。。。。。。。。。。

	slave->mtd._read = part_read;
	slave->mtd._write = part_write;

。。。。。。。。。。。。。。。。。。
}

所以該函數爲part_read:

static int part_read(struct mtd_info *mtd, loff_t from, size_t len,
		size_t *retlen, u_char *buf)
{
	struct mtd_part *part = PART(mtd);
。。。。。。。。。。。。。。。。。。。。。。。。。
	res = part->master->_read(part->master, from + part->offset, len,
				  retlen, buf);
	。。。。。。。。。。。。。。。。。。
}

上一章我們把flash分成4個分區,每個分區指向一個共同的master,最終註冊了各分區,但是有些共同的函數,還是需要調用master的。還是回到上章驅動註冊

s3c24xx_nand_probe

     ------------------->nand_scan_tail

nand_scan_tail中初始化master 的信息:

int nand_scan_tail(struct mtd_info *mtd)
{
	int i;
	struct nand_chip *chip = mtd->priv;

。。。。。。。。。。。。。。。。。。
	mtd->_read = nand_read;
。。。。。。。。。。。。。。。。。。。
}

所以接着又調用的到了nand_read:

nand_read

    ------------------>nand_do_read_ops

static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
			    struct mtd_oob_ops *ops)
{
	int chipnr, page, realpage, col, bytes, aligned, oob_required;
	struct nand_chip *chip = mtd->priv;//獲取分區中指向的具體的chip,該初始化是在s3c2410_nand_init_chip中,把chip賦值給mtd->priv
	struct mtd_ecc_stats stats;
	int ret = 0;
	uint32_t readlen = ops->len;
	uint32_t oobreadlen = ops->ooblen;
	uint32_t max_oobsize = ops->mode == MTD_OPS_AUTO_OOB ?
		mtd->oobavail : mtd->oobsize;

	uint8_t *bufpoi, *oob, *buf;
	unsigned int max_bitflips = 0;

	stats = mtd->ecc_stats;

	chipnr = (int)(from >> chip->chip_shift);
	chip->select_chip(mtd, chipnr);

	realpage = (int)(from >> chip->page_shift);
	page = realpage & chip->pagemask;

	col = (int)(from & (mtd->writesize - 1));

	buf = ops->datbuf;
	oob = ops->oobbuf;
	oob_required = oob ? 1 : 0;

	while (1) {
		bytes = min(mtd->writesize - col, readlen);
		aligned = (bytes == mtd->writesize);

		/* Is the current page in the buffer? */
		if (realpage != chip->pagebuf || oob) {
			bufpoi = aligned ? buf : chip->buffers->databuf;

			chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);

			/*
			 * Now read the page into the buffer.  Absent an error,
			 * the read methods return max bitflips per ecc step.
			 */
			if (unlikely(ops->mode == MTD_OPS_RAW))
				ret = chip->ecc.read_page_raw(mtd, chip, bufpoi,
							      oob_required,
							      page);
			else if (!aligned && NAND_HAS_SUBPAGE_READ(chip) &&
				 !oob)
				ret = chip->ecc.read_subpage(mtd, chip,
							col, bytes, bufpoi);
			else
				ret = chip->ecc.read_page(mtd, chip, bufpoi,
							  oob_required, page);
			if (ret < 0) {
				if (!aligned)
					/* Invalidate page cache */
					chip->pagebuf = -1;
				break;
			}

			max_bitflips = max_t(unsigned int, max_bitflips, ret);

			/* Transfer not aligned data */
			if (!aligned) {
				if (!NAND_HAS_SUBPAGE_READ(chip) && !oob &&
				    !(mtd->ecc_stats.failed - stats.failed) &&
				    (ops->mode != MTD_OPS_RAW)) {
					chip->pagebuf = realpage;
					chip->pagebuf_bitflips = ret;
				} else {
					/* Invalidate page cache */
					chip->pagebuf = -1;
				}
				memcpy(buf, chip->buffers->databuf + col, bytes);
			}

			buf += bytes;

			if (unlikely(oob)) {
				int toread = min(oobreadlen, max_oobsize);

				if (toread) {
					oob = nand_transfer_oob(chip,
						oob, ops, toread);
					oobreadlen -= toread;
				}
			}

			if (chip->options & NAND_NEED_READRDY) {
				/* Apply delay or wait for ready/busy pin */
				if (!chip->dev_ready)
					udelay(chip->chip_delay);
				else
					nand_wait_ready(mtd);
			}
		} else {
			memcpy(buf, chip->buffers->databuf + col, bytes);
			buf += bytes;
			max_bitflips = max_t(unsigned int, max_bitflips,
					     chip->pagebuf_bitflips);
		}

		readlen -= bytes;

		if (!readlen)
			break;

		/* For subsequent reads align to page boundary */
		col = 0;
		/* Increment page address */
		realpage++;

		page = realpage & chip->pagemask;
		/* Check, if we cross a chip boundary */
		if (!page) {
			chipnr++;
			chip->select_chip(mtd, -1);
			chip->select_chip(mtd, chipnr);
		}
	}
	chip->select_chip(mtd, -1);

	ops->retlen = ops->len - (size_t) readlen;
	if (oob)
		ops->oobretlen = ops->ooblen - oobreadlen;

	if (ret < 0)
		return ret;

	if (mtd->ecc_stats.failed - stats.failed)
		return -EBADMSG;

	return max_bitflips;
}

可以看到,通過mtd層,終於調用到具體的driver層,調用nand_chip提供的具體的flash芯片操作方法來讀取數據。讀取完數據以後,回到前面工作線程mtd_blktrans_work中,通知讀數據已經完成:

__blk_end_request_cur最終會調用到blk_update_request:

blk_update_request

    ---------------->req_bio_endio

static void req_bio_endio(struct request *rq, struct bio *bio,
              unsigned int nbytes, int error)
{
    ......
    ......
 
 
    bio->bi_size -= nbytes;        // 更新 bio 的數據長度和起始扇區號
    bio->bi_sector += (nbytes >> 9);
 
 
    if (bio_integrity(bio))        
        bio_integrity_advance(bio, nbytes);
            // 與 integrity data 相關的函數和結構體我還沒有研究過,
            // 不過bio_integrity_advance這個函數只對bio->bi_integrity
            // 進行操作,不會改變 bio 的其他重要成員
 
 
    /* don't actually finish bio if it's part of flush sequence */
    if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
        bio_endio(bio, error);    // 如果bio被完成了(size=0),調用bio_endio
}

// bio_endio 函數中主要是對 bio->bi_end_io 的調用,bi_end_io爲end_bio_bh_io_sync:

static void end_bio_bh_io_sync(struct bio *bio, int err)
{
	struct buffer_head *bh = bio->bi_private;
。。。。。。。。。。。。。
	bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
。。。。。。。。。。。。。
}

b_end_io爲end_buffer_read_sync

end_buffer_read_sync

     -------------->__end_buffer_read_notouch

            -------------------->unlock_buffer

void unlock_buffer(struct buffer_head *bh)
{
	clear_bit_unlock(BH_Lock, &bh->b_state);
	smp_mb__after_clear_bit();
	wake_up_bit(&bh->b_state, BH_Lock);
}

unlock_buffer會喚醒上面__bread_slow函數中,在wait_on_buffer上等待的進程。至此,讀寫flash流程結束。

(2.3.2)爲文件系統根目錄初始化一個inode結構

(2.3.3)爲文件系統根目錄初始化一個dentry結構

最後用一張圖總結一下上面的流程:

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章