IO的請求隊列何來何往

最近在研究數據與元數據分離的相關主題,隨意對IO請求過程進行了深入研究(針對scsi設備)。尤其是請求隊列在塊層,scsi層中有反反覆覆的操作,所以格外關注了請求隊列。

在看到我們我們將磁盤添加到系統中,先是分配gendisk,也就是alloc_disk,在分配好之後,塊設備驅動就會調用add_disk添加到系統,其中很重要的有個blk_register_queue(disk).

int blk_register_queue(struct gendisk *disk)
{
	int ret;
	struct device *dev = disk_to_dev(disk);

	struct request_queue *q = disk->queue;

	if (WARN_ON(!q))
		return -ENXIO;

	ret = blk_trace_init_sysfs(dev);
	if (ret)
		return ret;

	ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue");
	if (ret < 0)
		return ret;

	kobject_uevent(&q->kobj, KOBJ_ADD);

	if (!q->request_fn)
		return 0;

	ret = elv_register_queue(q);
	if (ret) {
		kobject_uevent(&q->kobj, KOBJ_REMOVE);
		kobject_del(&q->kobj);
		return ret;
	}

	return 0;
}


其中會看到很重要的一個是
<span style="color:#ff0000;"><span style="font-size:14px;">struct request_queue *q = disk->queue;</span></span>
可是,我們在alloc_disk中並沒有分配過請求隊列,那麼它究竟是哪裏來的呢??

但是在後面看到請求到scsi子系統那一層時候,才發現原來這裏有伏筆。每每這個時候,總是感概設計linux的人真是我膜拜的偶像啊。

只要有新的 SCSI 設備附加到系統, SCSI 中間層就會調用 sd_probe 函數.來看:

static int sd_probe(struct device *dev)
{
	struct scsi_device *sdp = to_scsi_device(dev);
	struct scsi_disk *sdkp;
	struct gendisk *gd;
	u32 index;
	int error;

	error = -ENODEV;
	if (sdp->type != TYPE_DISK && sdp->type != TYPE_MOD && sdp->type != TYPE_RBC)
		goto out;

	SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp,
					"sd_attach\n"));

	error = -ENOMEM;
	sdkp = kzalloc(sizeof(*sdkp), GFP_KERNEL);
	if (!sdkp)
		goto out;

	gd = alloc_disk(SD_MINORS);
	if (!gd)
		goto out_free;

	do {
		if (!ida_pre_get(&sd_index_ida, GFP_KERNEL))
			goto out_put;

		spin_lock(&sd_index_lock);
		error = ida_get_new(&sd_index_ida, &index);
		spin_unlock(&sd_index_lock);
	} while (error == -EAGAIN);

	if (error)
		goto out_put;

	error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN);
	if (error)
		goto out_free_index;

	sdkp->device = sdp;
	sdkp->driver = &sd_template;
	sdkp->disk = gd;
	sdkp->index = index;
	sdkp->openers = 0;
	sdkp->previous_state = 1;

	if (!sdp->request_queue->rq_timeout) {
		if (sdp->type != TYPE_MOD)
			blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT);
		else
			blk_queue_rq_timeout(sdp->request_queue,
					     SD_MOD_TIMEOUT);
	}

	device_initialize(&sdkp->dev);
	sdkp->dev.parent = &sdp->sdev_gendev;
	sdkp->dev.class = &sd_disk_class;
	dev_set_name(&sdkp->dev, dev_name(&sdp->sdev_gendev));

	if (device_add(&sdkp->dev))
		goto out_free_index;

	get_device(&sdp->sdev_gendev);

	get_device(&sdkp->dev);	/* prevent release before async_schedule */
	async_schedule(sd_probe_async, sdkp);

	return 0;

 out_free_index:
	spin_lock(&sd_index_lock);
	ida_remove(&sd_index_ida, index);
	spin_unlock(&sd_index_lock);
 out_put:
	put_disk(gd);
 out_free:
	kfree(sdkp);
 out:
	return error;}
這裏邊主要做的工作有:

1)分配scsi_disk

2)分配gendisk,所以現在應該明白了,在系統啓動或者有新的設備加入系統時候,會調用alloc_disik來分配gendisk

3)一些初始化,主要關注的是各層結構之間的聯繫例如 scsi_disk->disk=gd,這樣就將底層的scsi_disk結構與上面塊設備層用的gendisk聯繫在一起了。

sdkp->device = sdp;

sdkp->disk = gd;

4)sd_probe_async,這主要是做一些異步操作。而這裏邊就將scsi_devicerequest_queue與gendisk的request_queue聯繫在一起了。

static void sd_probe_async(void *data, async_cookie_t cookie)
{
	struct scsi_disk *sdkp = data;
	struct scsi_device *sdp;
	struct gendisk *gd;
	u32 index;
	struct device *dev;

	sdp = sdkp->device;
	gd = sdkp->disk;
	index = sdkp->index;
	dev = &sdp->sdev_gendev;

	if (index < SD_MAX_DISKS) {
		gd->major = sd_major((index & 0xf0) >> 4);
		gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
		gd->minors = SD_MINORS;
	}
	gd->fops = &sd_fops;
	gd->private_data = &sdkp->driver;
	gd->queue = sdkp->device->request_queue;

	/* defaults, until the device tells us otherwise */
	sdp->sector_size = 512;
	sdkp->capacity = 0;
	sdkp->media_present = 1;
	sdkp->write_prot = 0;
	sdkp->WCE = 0;
	sdkp->RCD = 0;
	sdkp->ATO = 0;
	sdkp->first_scan = 1;

	sd_revalidate_disk(gd);

	blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);

	gd->driverfs_dev = &sdp->sdev_gendev;
	gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS;
	if (sdp->removable)
		gd->flags |= GENHD_FL_REMOVABLE;

	dev_set_drvdata(dev, sdkp);
	add_disk(gd);//會把queue一起註冊到系統
	sd_dif_config_host(sdkp);

	sd_revalidate_disk(gd);

	sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n",
		  sdp->removable ? "removable " : "");
	put_device(&sdkp->dev);
}

齊總,有一句 gd->queue=sdkp->device->request_queue,那麼也就是說gendisk中request_queue不是在分配gendisk時候給分配的(因爲我們在那裏的源碼就是沒找到的)。gendisk的queue的結構直接就指向該scsi_device的request_queue,那麼這個scsi_device的request_queue又在哪裏呢?

找到在scsi設備探測過程中的scsi_alloc_sdev

<span style="color:#333333;">static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
					   unsigned int lun, void *hostdata)
{
	struct scsi_device *sdev;
	int display_failure_msg = 1, ret;
	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
	extern void scsi_evt_thread(struct work_struct *work);

	sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size,
		       GFP_ATOMIC);
	if (!sdev)
		goto out;

	sdev->vendor = scsi_null_device_strs;
	sdev->model = scsi_null_device_strs;
	sdev->rev = scsi_null_device_strs;
	sdev->host = shost;
	sdev->id = starget->id;
	sdev->lun = lun;
	sdev->channel = starget->channel;
	sdev->sdev_state = SDEV_CREATED;
	INIT_LIST_HEAD(&sdev->siblings);
	INIT_LIST_HEAD(&sdev->same_target_siblings);
	INIT_LIST_HEAD(&sdev->cmd_list);
	INIT_LIST_HEAD(&sdev->starved_entry);
	INIT_LIST_HEAD(&sdev->event_list);
	spin_lock_init(&sdev->list_lock);
	INIT_WORK(&sdev->event_work, scsi_evt_thread);

	sdev->sdev_gendev.parent = get_device(&starget->dev);
	sdev->sdev_target = starget;

	/* usually NULL and set by ->slave_alloc instead */
	sdev->hostdata = hostdata;

	/* if the device needs this changing, it may do so in the
	 * slave_configure function */
	sdev->max_device_blocked = SCSI_DEFAULT_DEVICE_BLOCKED;

	/*
	 * Some low level driver could use device->type
	 */
	sdev->type = -1;

	/*
	 * Assume that the device will have handshaking problems,
	 * and then fix this field later if it turns out it
	 * doesn't
	 */
	sdev->borken = 1;

	</span><span style="color:#ff0000;">sdev->request_queue = scsi_alloc_queue(sdev);</span><span style="color:#333333;">
	if (!sdev->request_queue) {
		/* release fn is set up in scsi_sysfs_device_initialise, so
		 * have to free and put manually here */
		put_device(&starget->dev);
		kfree(sdev);
		goto out;
	}

	sdev->request_queue->queuedata = sdev;
	scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun);

	scsi_sysfs_device_initialize(sdev);

	if (shost->hostt->slave_alloc) {
		ret = shost->hostt->slave_alloc(sdev);
		if (ret) {
			/*
			 * if LLDD reports slave not present, don't clutter
			 * console with alloc failure messages
			 */
			if (ret == -ENXIO)
				display_failure_msg = 0;
			goto out_device_destroy;
		}
	}

	return sdev;

out_device_destroy:
	scsi_device_set_state(sdev, SDEV_DEL);
	transport_destroy_device(&sdev->sdev_gendev);
	put_device(&sdev->sdev_gendev);
out:
	if (display_failure_msg)
		printk(ALLOC_FAILURE_MSG, __func__);
	return NULL;
}</span>
該函數用於分配scsi_device,其中有一句sdev->request_queue = scsi_alloc_queue(sdev); 跟進去會看到主要是q = __scsi_alloc_queue(sdev->host, scsi_request_fn);,而這個函數又是幹嘛呢?

struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
					 request_fn_proc *request_fn)
{
	struct request_queue *q;
	struct device *dev = shost->shost_gendev.parent;

	q = blk_init_queue(request_fn, NULL);
	if (!q)
		return NULL;

	/*
	 * this limit is imposed by hardware restrictions
	 */
	blk_queue_max_hw_segments(q, shost->sg_tablesize);
	blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);

	blk_queue_max_sectors(q, shost->max_sectors);
	blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
	blk_queue_segment_boundary(q, shost->dma_boundary);
	dma_set_seg_boundary(dev, shost->dma_boundary);

	blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));

	/* New queue, no concurrency on queue_flags */
	if (!shost->use_clustering)
		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);

	/*
	 * set a reasonable default alignment on word boundaries: the
	 * host and device may alter it using
	 * blk_queue_update_dma_alignment() later.
	 */
	blk_queue_dma_alignment(q, 0x03);

	return q;
調用了q = blk_init_queue(request_fn, NULL);(同時要注意這裏的request_fn)我們傳入的是scsi_request_fn 這在請求的處理過程中是很重要的,後面說。

這裏就是分配請求隊列。所以整理的過程就是這樣

1)在系統啓動時候的設備探測中,我們會調用scsi_alloc_sdev分配scsi_device邏輯設備,在這過程中我們同時分配了其函數,並且在分配queue時候傳入了request_fn爲scsi_request_fn

2)在探測過程中,同時有調用到sd_probe ,這個過程主要做的是,a),分配scsi_diski, b)分配gendisik,c)聯繫起各個結構,將scsi-disk的disk指針指向gendisk,將scsi_disk的device指向scsi_device,同時將gendisk的gd->queue指向該scsi_device 的queue字段。

所以現在應該就很清楚了IO請求隊列的身世。

爲什麼我去追究了這個過程呢?因爲在看到submit_bio後調用的generic_make_request中,我們看到在將請求插入到隊列中前,他會先去獲取設備的請求隊列,通過q = bdev_get_queue(bio->bi_bdev);,而他是通過return bdev->bd_disk->queue;以及在後面的make_request調度後調用 q->request_fn。總是疑惑在上面處理的東西用的是gendisk的request_queue,他到scsi層使用的request_queue跟上面一樣的嗎?q_>request_fn他怎麼知道就是執行到了scsi_request_fn呢?

所以現在就明白了,在系統啓動時候探測到scsi設備時候,會分配好scsi_disk,gendisk,scsi_device,然後聯繫起他們的關係,將scsi-disk的disk指針指向gendisk,將scsi_disk的device指向scsi_device,同時將gendisk的gd->queue指向該scsi_device 的queue字段。所以在generic_make_request 中通過gendisk獲取queue也就是下面的scsi_device的queue,而最後調用的q->request_fn,因爲這裏的q就是下面scsi_device的q,他在alloc_queue的時候就將request_fn實例化爲sicsi_request_fn傳進去了。所以在generic_make_request中當然q->request_fn就是執行scsi_request_fn這個策略例程了。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章