字符設備與塊設備I/O操作有一下不同:
1:塊設備只能以塊爲單位接受輸入和返回輸出,而字符設備以字節爲單位。大多數設備是字符設備,因爲他們不需要緩衝而且不以固定塊大小進行操作。
2:塊設備對應I/O操作有對應的緩衝區,因此他們可以選擇以什麼順序進行訪問,字符設備無緩衝並且直接進行讀寫
3:字符設備只能順序的讀寫,而塊設備能夠隨機的訪問。
弄懂Linux塊設備驅動程序,必須理解塊設備驅動相關的幾個結構體
struct block_device_operations {
int (*open) (struct block_device *, fmode_t);
int (*release) (struct gendisk *, fmode_t);
int (*locked_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*direct_access) (struct block_device *, sector_t,
void **, unsigned long *);
int (*media_changed) (struct gendisk *);
unsigned long long (*set_capacity) (struct gendisk *,
unsigned long long);
int (*revalidate_disk) (struct gendisk *);
int (*getgeo)(struct block_device *, struct hd_geometry *);
struct module *owner;
};
和字符設備驅動一樣,塊設備也有對應的operatios結構體,與字符設備不同的是,該結構體指針不是註冊block_device的時候傳入的,註冊block_device函數式register_blkdev(unsigned int major, const char * name),而block_device_operations 是在給gdisk結構體裏的變量初始化的時候賦值調用的,disk->fops = &xd_fops;
struct request_queue
{
/*
* Together with queue_head for cacheline sharing
*/
struct list_head queue_head;
struct request *last_merge;
struct elevator_queue *elevator;
/*
* the queue request freelist, one for reads and one for writes
*/
struct request_list rq;
request_fn_proc *request_fn;
make_request_fn *make_request_fn;
prep_rq_fn *prep_rq_fn;
unplug_fn *unplug_fn;
merge_bvec_fn *merge_bvec_fn;
prepare_flush_fn *prepare_flush_fn;
softirq_done_fn *softirq_done_fn;
rq_timed_out_fn *rq_timed_out_fn;
dma_drain_needed_fn *dma_drain_needed;
lld_busy_fn *lld_busy_fn;
/*
* Dispatch queue sorting
*/
sector_t end_sector;
struct request *boundary_rq;
/*
* Auto-unplugging state
*/
struct timer_list unplug_timer;
int unplug_thresh; /* After this many requests */
unsigned long unplug_delay; /* After this many jiffies */
struct work_struct unplug_work;
struct backing_dev_info backing_dev_info;
/*
* The queue owner gets to use this for whatever they like.
* ll_rw_blk doesn't touch it.
*/
void *queuedata;
/*
* queue needs bounce pages for pages above this limit
*/
gfp_t bounce_gfp;
/*
* various queue flags, see QUEUE_* below
*/
unsigned long queue_flags;
/*
* protects queue structures from reentrancy. ->__queue_lock should
* _never_ be used directly, it is queue private. always use
* ->queue_lock.
*/
spinlock_t __queue_lock; //保護隊列的自旋鎖
spinlock_t *queue_lock;
/*
* queue kobject
*/
struct kobject kobj;
/*
* queue settings
*/
unsigned long nr_requests; /* Max # of requests */
unsigned int nr_congestion_on;
unsigned int nr_congestion_off;
unsigned int nr_batching;
void *dma_drain_buffer;
unsigned int dma_drain_size;
unsigned int dma_pad_mask;
unsigned int dma_alignment;
struct blk_queue_tag *queue_tags;
struct list_head tag_busy_list;
unsigned int nr_sorted;
unsigned int in_flight[2];
unsigned int rq_timeout;
struct timer_list timeout;
struct list_head timeout_list;
struct queue_limits limits;
/*
* sg stuff
*/
unsigned int sg_timeout;
unsigned int sg_reserved_size;
int node;
#ifdef CONFIG_BLK_DEV_IO_TRACE
struct blk_trace *blk_trace;
#endif
/*
* reserved for flush operations
*/
unsigned int ordered, next_ordered, ordseq;
int orderr, ordcolor;
struct request pre_flush_rq, bar_rq, post_flush_rq;
struct request *orig_bar_rq;
struct mutex sysfs_lock;
#if defined(CONFIG_BLK_DEV_BSG)
struct bsg_class_device bsg_dev;
#endif
};
一個塊請求隊列是一個塊I/O請求的隊列。請求隊列跟蹤等候的塊I/O請求,它存儲用於描敘這個設備能夠支持的請求的類型信息,他們的最大大小,多少不同的段可以進入一個請求,硬件扇區的大小,對齊要求等參數,其結果是:如果請求隊列被配置正確了,它不會交個該設備一個不能處理的請求。請求隊列還實習了一個插入接口,這個接口允許使用多個I/O調度器(也稱作電梯)的工作方式是以最優的方式向驅動提交I/O請求,在linux中提供了四種電梯調度算法,在noop-iosched.c, as-iosched.c, deadline-iosched.c, cfq-iosched.c分別實現了上述四種調度算法。
struct request {
struct list_head queuelist; //鏈表結構體
struct call_single_data csd;
int cpu;
struct request_queue *q;
unsigned int cmd_flags;
enum rq_cmd_type_bits cmd_type;
unsigned long atomic_flags;
/* the following two fields are internal, NEVER access directly */
sector_t __sector; /* sector cursor */
unsigned int __data_len; /* total data len */
struct bio *bio;
struct bio *biotail;
struct hlist_node hash; /* merge hash */
/*
* The rb_node is only used inside the io scheduler, requests
* are pruned when moved to the dispatch queue. So let the
* completion_data share space with the rb_node.
*/
union {
struct rb_node rb_node; /* sort/lookup */
void *completion_data;
};
/*
* two pointers are available for the IO schedulers, if they need
* more they have to dynamically allocate it.
*/
void *elevator_private;
void *elevator_private2;
struct gendisk *rq_disk;
unsigned long start_time;
/* Number of scatter-gather DMA addr+len pairs after
* physical address coalescing is performed.
*/
unsigned short nr_phys_segments;
unsigned short ioprio;
void *special; /* opaque pointer available for LLD use */
char *buffer; /* kaddr of the current segment if available */
int tag;
int errors;
int ref_count;
/*
* when request is used as a packet command carrier
*/
unsigned short cmd_len;
unsigned char __cmd[BLK_MAX_CDB];
unsigned char *cmd;
unsigned int extra_len; /* length of alignment and padding */
unsigned int sense_len;
unsigned int resid_len; /* residual count */
void *sense;
unsigned long deadline;
struct list_head timeout_list;
unsigned int timeout;
int retries;
/*
* completion callback.
*/
rq_end_io_fn *end_io;
void *end_io_data;
/* for bidi */
struct request *next_rq;
};
在Linux扇區中,都是以512Byte爲一個扇區,如果硬件的扇區不是以512字節爲一個扇區,則需要進行調整,
struct gendisk {
/* major, first_minor and minors are input parameters only,
* don't use directly. Use disk_devt() and disk_max_parts().
*/
int major; /* major number of driver */
int first_minor;
int minors; /* maximum number of minors, =1 for
* disks that can't be partitioned. */
char disk_name[DISK_NAME_LEN]; /* name of major driver */
char *(*devnode)(struct gendisk *gd, mode_t *mode);
/* Array of pointers to partitions indexed by partno.
* Protected with matching bdev lock but stat and other
* non-critical accesses use RCU. Always access through
* helpers.
*/
struct disk_part_tbl *part_tbl;
struct hd_struct part0;
const struct block_device_operations *fops;
struct request_queue *queue;
void *private_data;
int flags;
struct device *driverfs_dev; // FIXME: remove
struct kobject *slave_dir;
struct timer_rand_state *random;
atomic_t sync_io; /* RAID */
struct work_struct async_notify;
#ifdef CONFIG_BLK_DEV_INTEGRITY
struct blk_integrity *integrity;
#endif
int node_id;
};
在Linux內核中使用gendisk表示一個獨立的磁盤設備,其中minors表示該磁盤中的分區數,用struct gendisk* allock_gendisk(int minors)來進行設置磁盤的分區數量。
struct bio {
sector_t bi_sector; /* device address in 512 byte
sectors */
struct bio *bi_next; /* request queue link */
struct block_device *bi_bdev;
unsigned long bi_flags; /* status, command, etc */
unsigned long bi_rw; /* bottom bits READ/WRITE,
* top bits priority
*/
unsigned short bi_vcnt; /* how many bio_vec's */
unsigned short bi_idx; /* current index into bvl_vec */
/* Number of segments in this BIO after
* physical address coalescing is performed.
*/
unsigned int bi_phys_segments;
unsigned int bi_size; /* residual I/O count */
/*
* To keep track of the max segment size, we account for the
* sizes of the first and last mergeable segments in this bio.
*/
unsigned int bi_seg_front_size;
unsigned int bi_seg_back_size;
unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
unsigned int bi_comp_cpu; /* completion CPU */
atomic_t bi_cnt; /* pin count */
struct bio_vec *bi_io_vec; /* the actual vec list */
bio_end_io_t *bi_end_io;
void *bi_private;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
struct bio_integrity_payload *bi_integrity; /* data integrity */
#endif
bio_destructor_t *bi_destructor; /* destructor */
/*
* We can inline a number of vecs at the end of the bio, to avoid
* double allocations for a small number of bio_vecs. This member
* MUST obviously be kept at the very end of the bio.
*/
struct bio_vec bi_inline_vecs[0];
};
通常一個bio對應一個I/O請求,IO調度器可以將連續的BIO進行合併成一個請求,所以一個請求可以包含多個bio,其中request 和 io的關係如下圖
</pre><p></p><p></p><p><span style="color:#ff0000;">塊設備驅動程序的例子參考linux代碼中的 drivers\block\xd.c和 drivers\block\z2ram.c; 閱讀分析原始代碼是王道</span></p><p><span style="color:#000000;">一下這個例子是用內存來模擬磁盤</span></p><p><pre class="cpp" name="code">#include <linux/module.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/timer.h>
#include <linux/genhd.h>
#include <linux/hdreg.h>
#include <linux/ioport.h>
#include <linux/init.h>
#include <linux/wait.h>
#include <linux/blkdev.h>
#include <linux/blkpg.h>
#include <linux/delay.h>
#include <linux/io.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/dma.h>
static struct gendisk *ramblock_disk;
static request_queue_t *ramblock_queue;
static int major;
static DEFINE_SPINLOCK(ramblock_lock);
#define RAMBLOCK_SIZE (1024*1024)
static unsigned char *ramblock_buf;
static int ramblock_getgeo(struct block_device *bdev, struct hd_geometry *geo)
{
/* 容量=heads*cylinders*sectors*512 */
geo->heads = 2;
geo->cylinders = 32;
geo->sectors = RAMBLOCK_SIZE/2/32/512;
return 0;
}
static struct block_device_operations ramblock_fops = {
.owner = THIS_MODULE,
.getgeo = ramblock_getgeo,
};
static void do_ramblock_request(request_queue_t * q)
{
static int r_cnt = 0;
static int w_cnt = 0;
struct request *req;
//printk("do_ramblock_request %d\n", ++cnt);
while ((req = elv_next_request(q)) != NULL) {
/* 數據傳輸三要素: 源,目的,長度 */
/* 源/目的: */
unsigned long offset = req->sector * 512;
/* 目的/源: */
// req->buffer
/* 長度: */
unsigned long len = req->current_nr_sectors * 512;
if (rq_data_dir(req) == READ)
{
//printk("do_ramblock_request read %d\n", ++r_cnt);
memcpy(req->buffer, ramblock_buf+offset, len); //磁盤中的讀操作與此不相同
}
else
{
//printk("do_ramblock_request write %d\n", ++w_cnt);
memcpy(ramblock_buf+offset, req->buffer, len); //磁盤中的寫操作與此不相同
}
end_request(req, 1);
}
}
static int ramblock_init(void)
{
/* 1. 分配一個gendisk結構體 */
ramblock_disk = alloc_disk(16); /* 次設備號個數: 分區個數+1 */
/* 2. 設置 */
/* 2.1 分配/設置隊列: 提供讀寫能力 */
ramblock_queue = blk_init_queue(do_ramblock_request, &ramblock_lock);
ramblock_disk->queue = ramblock_queue;
/* 2.2 設置其他屬性: 比如容量 */
major = register_blkdev(0, "ramblock"); /* cat /proc/devices */
ramblock_disk->major = major;
ramblock_disk->first_minor = 0;
sprintf(ramblock_disk->disk_name, "ramblock");
ramblock_disk->fops = &ramblock_fops;
set_capacity(ramblock_disk, RAMBLOCK_SIZE / 512);
/* 3. 硬件相關操作 */
ramblock_buf = kzalloc(RAMBLOCK_SIZE, GFP_KERNEL);
/* 4. 註冊 */
add_disk(ramblock_disk);
return 0;
}
static void ramblock_exit(void)
{
unregister_blkdev(major, "ramblock");
del_gendisk(ramblock_disk);
put_disk(ramblock_disk);
blk_cleanup_queue(ramblock_queue);
kfree(ramblock_buf);
}
module_init(ramblock_init);
module_exit(ramblock_exit);
MODULE_LICENSE("GPL");
上述代碼編譯加載後,運行mount /dev/ramblock /mnt,然後對mnt目錄下進行文件的拷貝操作即是在虛擬磁盤上的操作。