IO的请求队列何来何往

最近在研究数据与元数据分离的相关主题,随意对IO请求过程进行了深入研究(针对scsi设备)。尤其是请求队列在块层,scsi层中有反反复复的操作,所以格外关注了请求队列。

在看到我们我们将磁盘添加到系统中,先是分配gendisk,也就是alloc_disk,在分配好之后,块设备驱动就会调用add_disk添加到系统,其中很重要的有个blk_register_queue(disk).

int blk_register_queue(struct gendisk *disk)
{
	int ret;
	struct device *dev = disk_to_dev(disk);

	struct request_queue *q = disk->queue;

	if (WARN_ON(!q))
		return -ENXIO;

	ret = blk_trace_init_sysfs(dev);
	if (ret)
		return ret;

	ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue");
	if (ret < 0)
		return ret;

	kobject_uevent(&q->kobj, KOBJ_ADD);

	if (!q->request_fn)
		return 0;

	ret = elv_register_queue(q);
	if (ret) {
		kobject_uevent(&q->kobj, KOBJ_REMOVE);
		kobject_del(&q->kobj);
		return ret;
	}

	return 0;
}


其中会看到很重要的一个是
<span style="color:#ff0000;"><span style="font-size:14px;">struct request_queue *q = disk->queue;</span></span>
可是,我们在alloc_disk中并没有分配过请求队列,那么它究竟是哪里来的呢??

但是在后面看到请求到scsi子系统那一层时候,才发现原来这里有伏笔。每每这个时候,总是感概设计linux的人真是我膜拜的偶像啊。

只要有新的 SCSI 设备附加到系统, SCSI 中间层就会调用 sd_probe 函数.来看:

static int sd_probe(struct device *dev)
{
	struct scsi_device *sdp = to_scsi_device(dev);
	struct scsi_disk *sdkp;
	struct gendisk *gd;
	u32 index;
	int error;

	error = -ENODEV;
	if (sdp->type != TYPE_DISK && sdp->type != TYPE_MOD && sdp->type != TYPE_RBC)
		goto out;

	SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp,
					"sd_attach\n"));

	error = -ENOMEM;
	sdkp = kzalloc(sizeof(*sdkp), GFP_KERNEL);
	if (!sdkp)
		goto out;

	gd = alloc_disk(SD_MINORS);
	if (!gd)
		goto out_free;

	do {
		if (!ida_pre_get(&sd_index_ida, GFP_KERNEL))
			goto out_put;

		spin_lock(&sd_index_lock);
		error = ida_get_new(&sd_index_ida, &index);
		spin_unlock(&sd_index_lock);
	} while (error == -EAGAIN);

	if (error)
		goto out_put;

	error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN);
	if (error)
		goto out_free_index;

	sdkp->device = sdp;
	sdkp->driver = &sd_template;
	sdkp->disk = gd;
	sdkp->index = index;
	sdkp->openers = 0;
	sdkp->previous_state = 1;

	if (!sdp->request_queue->rq_timeout) {
		if (sdp->type != TYPE_MOD)
			blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT);
		else
			blk_queue_rq_timeout(sdp->request_queue,
					     SD_MOD_TIMEOUT);
	}

	device_initialize(&sdkp->dev);
	sdkp->dev.parent = &sdp->sdev_gendev;
	sdkp->dev.class = &sd_disk_class;
	dev_set_name(&sdkp->dev, dev_name(&sdp->sdev_gendev));

	if (device_add(&sdkp->dev))
		goto out_free_index;

	get_device(&sdp->sdev_gendev);

	get_device(&sdkp->dev);	/* prevent release before async_schedule */
	async_schedule(sd_probe_async, sdkp);

	return 0;

 out_free_index:
	spin_lock(&sd_index_lock);
	ida_remove(&sd_index_ida, index);
	spin_unlock(&sd_index_lock);
 out_put:
	put_disk(gd);
 out_free:
	kfree(sdkp);
 out:
	return error;}
这里边主要做的工作有:

1)分配scsi_disk

2)分配gendisk,所以现在应该明白了,在系统启动或者有新的设备加入系统时候,会调用alloc_disik来分配gendisk

3)一些初始化,主要关注的是各层结构之间的联系例如 scsi_disk->disk=gd,这样就将底层的scsi_disk结构与上面块设备层用的gendisk联系在一起了。

sdkp->device = sdp;

sdkp->disk = gd;

4)sd_probe_async,这主要是做一些异步操作。而这里边就将scsi_devicerequest_queue与gendisk的request_queue联系在一起了。

static void sd_probe_async(void *data, async_cookie_t cookie)
{
	struct scsi_disk *sdkp = data;
	struct scsi_device *sdp;
	struct gendisk *gd;
	u32 index;
	struct device *dev;

	sdp = sdkp->device;
	gd = sdkp->disk;
	index = sdkp->index;
	dev = &sdp->sdev_gendev;

	if (index < SD_MAX_DISKS) {
		gd->major = sd_major((index & 0xf0) >> 4);
		gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
		gd->minors = SD_MINORS;
	}
	gd->fops = &sd_fops;
	gd->private_data = &sdkp->driver;
	gd->queue = sdkp->device->request_queue;

	/* defaults, until the device tells us otherwise */
	sdp->sector_size = 512;
	sdkp->capacity = 0;
	sdkp->media_present = 1;
	sdkp->write_prot = 0;
	sdkp->WCE = 0;
	sdkp->RCD = 0;
	sdkp->ATO = 0;
	sdkp->first_scan = 1;

	sd_revalidate_disk(gd);

	blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);

	gd->driverfs_dev = &sdp->sdev_gendev;
	gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS;
	if (sdp->removable)
		gd->flags |= GENHD_FL_REMOVABLE;

	dev_set_drvdata(dev, sdkp);
	add_disk(gd);//会把queue一起注册到系统
	sd_dif_config_host(sdkp);

	sd_revalidate_disk(gd);

	sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n",
		  sdp->removable ? "removable " : "");
	put_device(&sdkp->dev);
}

齐总,有一句 gd->queue=sdkp->device->request_queue,那么也就是说gendisk中request_queue不是在分配gendisk时候给分配的(因为我们在那里的源码就是没找到的)。gendisk的queue的结构直接就指向该scsi_device的request_queue,那么这个scsi_device的request_queue又在哪里呢?

找到在scsi设备探测过程中的scsi_alloc_sdev

<span style="color:#333333;">static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
					   unsigned int lun, void *hostdata)
{
	struct scsi_device *sdev;
	int display_failure_msg = 1, ret;
	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
	extern void scsi_evt_thread(struct work_struct *work);

	sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size,
		       GFP_ATOMIC);
	if (!sdev)
		goto out;

	sdev->vendor = scsi_null_device_strs;
	sdev->model = scsi_null_device_strs;
	sdev->rev = scsi_null_device_strs;
	sdev->host = shost;
	sdev->id = starget->id;
	sdev->lun = lun;
	sdev->channel = starget->channel;
	sdev->sdev_state = SDEV_CREATED;
	INIT_LIST_HEAD(&sdev->siblings);
	INIT_LIST_HEAD(&sdev->same_target_siblings);
	INIT_LIST_HEAD(&sdev->cmd_list);
	INIT_LIST_HEAD(&sdev->starved_entry);
	INIT_LIST_HEAD(&sdev->event_list);
	spin_lock_init(&sdev->list_lock);
	INIT_WORK(&sdev->event_work, scsi_evt_thread);

	sdev->sdev_gendev.parent = get_device(&starget->dev);
	sdev->sdev_target = starget;

	/* usually NULL and set by ->slave_alloc instead */
	sdev->hostdata = hostdata;

	/* if the device needs this changing, it may do so in the
	 * slave_configure function */
	sdev->max_device_blocked = SCSI_DEFAULT_DEVICE_BLOCKED;

	/*
	 * Some low level driver could use device->type
	 */
	sdev->type = -1;

	/*
	 * Assume that the device will have handshaking problems,
	 * and then fix this field later if it turns out it
	 * doesn't
	 */
	sdev->borken = 1;

	</span><span style="color:#ff0000;">sdev->request_queue = scsi_alloc_queue(sdev);</span><span style="color:#333333;">
	if (!sdev->request_queue) {
		/* release fn is set up in scsi_sysfs_device_initialise, so
		 * have to free and put manually here */
		put_device(&starget->dev);
		kfree(sdev);
		goto out;
	}

	sdev->request_queue->queuedata = sdev;
	scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun);

	scsi_sysfs_device_initialize(sdev);

	if (shost->hostt->slave_alloc) {
		ret = shost->hostt->slave_alloc(sdev);
		if (ret) {
			/*
			 * if LLDD reports slave not present, don't clutter
			 * console with alloc failure messages
			 */
			if (ret == -ENXIO)
				display_failure_msg = 0;
			goto out_device_destroy;
		}
	}

	return sdev;

out_device_destroy:
	scsi_device_set_state(sdev, SDEV_DEL);
	transport_destroy_device(&sdev->sdev_gendev);
	put_device(&sdev->sdev_gendev);
out:
	if (display_failure_msg)
		printk(ALLOC_FAILURE_MSG, __func__);
	return NULL;
}</span>
该函数用于分配scsi_device,其中有一句sdev->request_queue = scsi_alloc_queue(sdev); 跟进去会看到主要是q = __scsi_alloc_queue(sdev->host, scsi_request_fn);,而这个函数又是干嘛呢?

struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
					 request_fn_proc *request_fn)
{
	struct request_queue *q;
	struct device *dev = shost->shost_gendev.parent;

	q = blk_init_queue(request_fn, NULL);
	if (!q)
		return NULL;

	/*
	 * this limit is imposed by hardware restrictions
	 */
	blk_queue_max_hw_segments(q, shost->sg_tablesize);
	blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);

	blk_queue_max_sectors(q, shost->max_sectors);
	blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
	blk_queue_segment_boundary(q, shost->dma_boundary);
	dma_set_seg_boundary(dev, shost->dma_boundary);

	blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));

	/* New queue, no concurrency on queue_flags */
	if (!shost->use_clustering)
		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);

	/*
	 * set a reasonable default alignment on word boundaries: the
	 * host and device may alter it using
	 * blk_queue_update_dma_alignment() later.
	 */
	blk_queue_dma_alignment(q, 0x03);

	return q;
调用了q = blk_init_queue(request_fn, NULL);(同时要注意这里的request_fn)我们传入的是scsi_request_fn 这在请求的处理过程中是很重要的,后面说。

这里就是分配请求队列。所以整理的过程就是这样

1)在系统启动时候的设备探测中,我们会调用scsi_alloc_sdev分配scsi_device逻辑设备,在这过程中我们同时分配了其函数,并且在分配queue时候传入了request_fn为scsi_request_fn

2)在探测过程中,同时有调用到sd_probe ,这个过程主要做的是,a),分配scsi_diski, b)分配gendisik,c)联系起各个结构,将scsi-disk的disk指针指向gendisk,将scsi_disk的device指向scsi_device,同时将gendisk的gd->queue指向该scsi_device 的queue字段。

所以现在应该就很清楚了IO请求队列的身世。

为什么我去追究了这个过程呢?因为在看到submit_bio后调用的generic_make_request中,我们看到在将请求插入到队列中前,他会先去获取设备的请求队列,通过q = bdev_get_queue(bio->bi_bdev);,而他是通过return bdev->bd_disk->queue;以及在后面的make_request调度后调用 q->request_fn。总是疑惑在上面处理的东西用的是gendisk的request_queue,他到scsi层使用的request_queue跟上面一样的吗?q_>request_fn他怎么知道就是执行到了scsi_request_fn呢?

所以现在就明白了,在系统启动时候探测到scsi设备时候,会分配好scsi_disk,gendisk,scsi_device,然后联系起他们的关系,将scsi-disk的disk指针指向gendisk,将scsi_disk的device指向scsi_device,同时将gendisk的gd->queue指向该scsi_device 的queue字段。所以在generic_make_request 中通过gendisk获取queue也就是下面的scsi_device的queue,而最后调用的q->request_fn,因为这里的q就是下面scsi_device的q,他在alloc_queue的时候就将request_fn实例化为sicsi_request_fn传进去了。所以在generic_make_request中当然q->request_fn就是执行scsi_request_fn这个策略例程了。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章