Linux 块设备驱动 实例
任务:用一片虚拟地址连续的内存空间模拟一个块设备,并为其写一个驱动
/*
* Sample disk driver, from the beginning.
*/
#include <linux/autoconf.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/kernel.h> /* printk() */
#include <linux/slab.h> /* kmalloc() */
#include <linux/fs.h> /* everything... */
#include <linux/errno.h> /* error codes */
#include <linux/timer.h>
#include <linux/types.h> /* size_t */
#include <linux/fcntl.h> /* O_ACCMODE */
#include <linux/hdreg.h> /* HDIO_GETGEO */
#include <linux/kdev_t.h>
#include <linux/vmalloc.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h> /* invalidate_bdev */
#include <linux/bio.h>
MODULE_LICENSE("Dual BSD/GPL");
static int sbull_major = 0;
module_param(sbull_major, int, 0);
static int hardsect_size = 512;
module_param(hardsect_size, int, 0);
static int nsectors = 25600; /* How big the drive is */
module_param(nsectors, int, 0);
static int ndevices = 1;
module_param(ndevices, int, 0);
/*
* The different "request modes" we can use.
*/
enum {
RM_SIMPLE = 0, /* The extra-simple request function */
RM_FULL = 1, /* The full-blown version */
RM_NOQUEUE = 2, /* Use make_request */
};
//static int request_mode = RM_FULL;
static int request_mode = RM_SIMPLE;
//static int request_mode = RM_SIMPLE;
module_param(request_mode, int, 0);
/*
* Minor number and partition management.
*/
#define SBULL_MINORS 16
#define MINOR_SHIFT 4
#define DEVNUM(kdevnum) (MINOR(kdev_t_to_nr(kdevnum)) >> MINOR_SHIFT
/*
* We can tweak our hardware sector size, but the kernel talks to us
* in terms of small sectors, always.
*/
#define KERNEL_SECTOR_SIZE 512
/*
* After this much idle time, the driver will simulate a media change.
*/
#define INVALIDATE_DELAY 60*HZ
/*
* The internal representation of our device.
*/
struct sbull_dev {
int size; /* Device size in sectors */
// data 是本程序模拟的块设备,是一片连续的虚拟空间
// 在初始化函数里分配的虚拟地址连续的内存空间
u8 *data; /* The data array */
short users; /* How many users */
short media_change; /* Flag a media change? */
spinlock_t lock; /* For mutual exclusion */
struct request_queue *queue; /* The device request queue */
struct gendisk *gd; /* The gendisk structure */
struct timer_list timer; /* For simulated media changes */
};
static struct sbull_dev *Devices = NULL;
/*
* Handle an I/O request.
*/
static void sbull_transfer(struct sbull_dev *dev, unsigned long sector,
unsigned long nsect, char *buffer, int write)
{
unsigned long offset = sector*KERNEL_SECTOR_SIZE; // 需要读写的扇区的偏移地址
unsigned long nbytes = nsect*KERNEL_SECTOR_SIZE; // 需要读写的字节数
if ((offset + nbytes) > dev->size) { // 判断输入参数是否合法,是否超出边界
printk (KERN_NOTICE "Beyond-end write (%ld %ld)\n", offset, nbytes);
return;
}
// 实际的读写操作
// 由于本程序是用一片连续的内存空间模拟块设备
// 所以这里对硬件(内存空间)的读写操作,就是复制内存
// 在具体点,就是下面的memcpy
// 具体的项目,需修改为具体的接口函数
if (write)
// 写
memcpy(dev->data + offset, buffer, nbytes);
else
// 读
memcpy(buffer, dev->data + offset, nbytes);
}
/*The simple form of the request function.*/
static void sbull_request(struct request_queue *q)
{
struct request *req;
// 服务完队列上的所有请求
while ((req = elv_next_request(q)) != NULL) { // elv_next_request :从队列上去一个下来
struct sbull_dev *dev = req->rq_disk->private_data;
if (! blk_fs_request(req)) {
printk (KERN_NOTICE "Skip non-fs request\n");
end_request(req, 0);
continue;
}
sbull_transfer(dev, req->sector, req->current_nr_sectors,
req->buffer, rq_data_dir(req));
end_request(req, 1);
}
}
/*
* Transfer a single BIO.
*/
static int sbull_xfer_bio(struct sbull_dev *dev, struct bio *bio)
{
int i;
struct bio_vec *bvec;
sector_t sector = bio->bi_sector;
/* Do each segment independently. */
bio_for_each_segment(bvec, bio, i) {
char *buffer = __bio_kmap_atomic(bio, i, KM_USER0);
sbull_transfer(dev, sector, bio_cur_sectors(bio),
buffer, bio_data_dir(bio) == WRITE);
sector += bio_cur_sectors(bio);
__bio_kunmap_atomic(bio, KM_USER0);
}
return 0; /* Always "succeed" */
}
/*
* Transfer a full request.
*/
static int sbull_xfer_request(struct sbull_dev *dev, struct request *req)
{
struct bio *bio;
int nsect = 0;
// steps through each bio that makes up a request.
// 遍历
__rq_for_each_bio(bio, req) {
sbull_xfer_bio(dev, bio);
nsect += bio->bi_size/KERNEL_SECTOR_SIZE;
}
return nsect;
}
/*
* Smarter request function that "handles clustering".
*/
static void sbull_full_request(struct request_queue *q)
{
struct request *req;
int sectors_xferred;
struct sbull_dev *dev = q->queuedata;
printk("<0>""in %s\n",__FUNCTION__);
while ((req = elv_next_request(q)) != NULL) {
if (! blk_fs_request(req)) {
printk (KERN_NOTICE "Skip non-fs request\n");
end_request(req, 0);
continue;
}
sectors_xferred = sbull_xfer_request(dev, req);
__blk_end_request(req,0,sectors_xferred<<9);//add by lht for 2.6.27
}
}
//The direct make request version
static int sbull_make_request(struct request_queue *q, struct bio *bio)
{
struct sbull_dev *dev = q->queuedata;
int status;
status = sbull_xfer_bio(dev, bio);
//bio_endio(bio, bio->bi_size, status);
bio_endio(bio, status);
return 0;
}
/*
* Open and close.
*/
static int sbull_open(struct inode *inode, struct file *filp)
{
struct sbull_dev *dev = inode->i_bdev->bd_disk->private_data;
//printk("<0>" "fdfjdlksjfdlkj\n");
del_timer_sync(&dev->timer);
filp->private_data = dev;
spin_lock(&dev->lock);
if (! dev->users)
check_disk_change(inode->i_bdev);
dev->users++;
spin_unlock(&dev->lock);
return 0;
}
static int sbull_release(struct inode *inode, struct file *filp)
{
struct sbull_dev *dev = inode->i_bdev->bd_disk->private_data;
spin_lock(&dev->lock);
dev->users--;
if (!dev->users) {
dev->timer.expires = jiffies + INVALIDATE_DELAY;
add_timer(&dev->timer);
}
spin_unlock(&dev->lock);
return 0;
}
/*
* Look for a (simulated) media change.
*/
int sbull_media_changed(struct gendisk *gd)
{
struct sbull_dev *dev = gd->private_data;
return dev->media_change;
}
/*
* Revalidate. WE DO NOT TAKE THE LOCK HERE, for fear of deadlocking
* with open. That needs to be reevaluated.
*/
int sbull_revalidate(struct gendisk *gd)
{
struct sbull_dev *dev = gd->private_data;
if (dev->media_change) {
dev->media_change = 0;
memset (dev->data, 0, dev->size);
}
return 0;
}
/*
* The "invalidate" function runs out of the device timer; it sets
* a flag to simulate the removal of the media.
*/
void sbull_invalidate(unsigned long ldev)
{
struct sbull_dev *dev = (struct sbull_dev *) ldev;
spin_lock(&dev->lock);
if (dev->users || !dev->data)
printk (KERN_WARNING "sbull: timer sanity check failed\n");
else
dev->media_change = 1;
spin_unlock(&dev->lock);
}
/*
* The ioctl() implementation
*/
int sbull_ioctl (struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
long size;
struct hd_geometry geo;
struct sbull_dev *dev = filp->private_data;
switch(cmd) {
case HDIO_GETGEO:
/*
* Get geometry: since we are a virtual device, we have to make
* up something plausible. So we claim 16 sectors, four heads,
* and calculate the corresponding number of cylinders. We set the
* start of data at sector four.
*/
//printk("<0>""-------------size=%d\n",size);
/****************for early version************/
//size = dev->size*(hardsect_size/KERNEL_SECTOR_SIZE);
//printk("<0>""-------------size=%d\n",size);
//geo.cylinders = (size & ~0x3f) >> 6;
//geo.cylinders=2000;
//geo.heads = 4;
//geo.sectors = 16;
//geo.sectors=2560;
//geo.start = 0;
//if (copy_to_user((void __user *) arg, &geo, sizeof(geo)))
// return -EFAULT;
return 0;
}
return -ENOTTY; /* unknown command */
}
static int sbull_getgeo(struct block_device *bdev, struct hd_geometry *geo)
{
unsigned long size;
struct sbull_dev *pdev = bdev->bd_disk->private_data;
size = pdev->size;
geo->cylinders = (size & ~0x3f) >> 6;
geo->heads = 4;
geo->sectors = 16;
geo->start = 0;
return 0;
}
/*
* The device operations structure.
*/
static struct block_device_operations sbull_ops = {
.owner = THIS_MODULE,
.open = sbull_open,
.release = sbull_release,
.media_changed = sbull_media_changed,
.revalidate_disk = sbull_revalidate,
.ioctl = sbull_ioctl,
.getgeo = sbull_getgeo,
};
/*
* Set up our internal device.
*/
// 初始化设备结构体 static struct sbull_dev *Devices中的成员
static void setup_device(struct sbull_dev *dev, int which)
{
/*
* Get some memory.
*/
memset (dev, 0, sizeof (struct sbull_dev));
dev->size = nsectors*hardsect_size;
// 分配一片虚拟地址连续的内存空间,作为块设备。
dev->data = vmalloc(dev->size);
if (dev->data == NULL) {
printk (KERN_NOTICE "vmalloc failure.\n");
return;
}
spin_lock_init(&dev->lock);
/*
* The timer which "invalidates" the device.
*/
init_timer(&dev->timer);
dev->timer.data = (unsigned long) dev;
dev->timer.function = sbull_invalidate;
/*
* The I/O queue, depending on whether we are using our own
* make_request function or not.
*/
switch (request_mode) {
case RM_NOQUEUE:
dev->queue = blk_alloc_queue(GFP_KERNEL);
if (dev->queue == NULL)
goto out_vfree;
blk_queue_make_request(dev->queue, sbull_make_request);
break;
case RM_FULL:
dev->queue = blk_init_queue(sbull_full_request, &dev->lock);
if (dev->queue == NULL)
goto out_vfree;
break;
default:
printk(KERN_NOTICE "Bad request mode %d, using simple\n", request_mode);
/* fall into.. */
case RM_SIMPLE:
dev->queue = blk_init_queue(sbull_request, &dev->lock);
if (dev->queue == NULL)
goto out_vfree;
break;
}
blk_queue_hardsect_size(dev->queue, hardsect_size);
dev->queue->queuedata = dev;
/*
* And the gendisk structure.
*/
dev->gd = alloc_disk(SBULL_MINORS);
if (! dev->gd) {
printk (KERN_NOTICE "alloc_disk failure\n");
goto out_vfree;
}
dev->gd->major = sbull_major;
dev->gd->first_minor = which*SBULL_MINORS;
dev->gd->fops = &sbull_ops;
dev->gd->queue = dev->queue;
dev->gd->private_data = dev;
snprintf (dev->gd->disk_name, 32, "sbull%c", which + 'a');
set_capacity(dev->gd, nsectors*(hardsect_size/KERNEL_SECTOR_SIZE));
add_disk(dev->gd);
return;
out_vfree:
if (dev->data)
vfree(dev->data);
}
static int __init sbull_init(void)
{
int i;
/*
* Get registered.
*/
// printk("<0>" "add by lht\n");
sbull_major = register_blkdev(sbull_major, "sbull");
if (sbull_major <= 0) {
printk(KERN_WARNING "sbull: unable to get major number\n");
return -EBUSY;
}
/*
* Allocate the device array, and initialize each one.
*/
Devices = kmalloc(ndevices*sizeof (struct sbull_dev), GFP_KERNEL);
if (Devices == NULL)
goto out_unregister;
for (i = 0; i < ndevices; i++)
setup_device(Devices + i, i);
return 0;
out_unregister:
unregister_blkdev(sbull_major, "sbd");
return -ENOMEM;
}
static void sbull_exit(void)
{
int i;
for (i = 0; i < ndevices; i++) {
struct sbull_dev *dev = Devices + i;
del_timer_sync(&dev->timer);
if (dev->gd) {
del_gendisk(dev->gd);
put_disk(dev->gd);
}
if (dev->queue) {
if (request_mode == RM_NOQUEUE)
// blk_put_queue(dev->queue);
kobject_put(&(dev->queue)->kobj);
else
blk_cleanup_queue(dev->queue);
}
if (dev->data)
vfree(dev->data);
}
unregister_blkdev(sbull_major, "sbull");
kfree(Devices);
}
module_init(sbull_init);
module_exit(sbull_exit);
测试方法:
# Makefile
ifeq ($(KERNELRELEASE),)
#KERNELDIR ?= /home/lht/kernel2.6/linux-2.6.14
KERNELDIR ?= /lib/modules/$(shell uname -r)/build M=$(PWD) modules
PWD := $(shell pwd)
modules:
$(MAKE) -C $(KERNELDIR) M=$(PWD) modules
modules_install:
$(MAKE) -C $(KERNELDIR) M=$(PWD) modules_install
clean:
rm -rf *.o *~ core .depend .*.cmd *.ko *.mod.c .tmp_versions
.PHONY: modules modules_install clean
else
obj-m := sbull.o
endif
将模块插入内核(2.6.27)
root@linuxidc:/source/workplace/test/sbull_linuxidc# insmod sbull.ko
用lsmod查看模块是否成功插入内核
root@linuxidc:/source/workplace/test/sbull_linuxidc# lsmod | grep sbu
sbull 13452 0
出现上面结果,说明成功了
用ls查看/dev下是否有sbull设备
root@linuxidc:/source/workplace/test/sbull_linuxidc# ls /dev | grep sbu
sbulla
出现上面结果,说明有了,如果没有,用命令
mknod /dev/sbulla b 254 0
手动创建
至此,已经有一个块设备了
下面用fdisk对虚拟块设备分区
root@linuxidc:/source/workplace/test/sbull_linuxidc# fdisk /dev/sbulla
Device contains neither a valid DOS partition table, nor Sun, SGI or OSF disklabel
Building a new DOS disklabel with disk identifier 0x14d0973f.
Changes will remain in memory only, until you decide to write them.
After that, of course, the previous content won't be recoverable.
Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)
Command (m for help): n 这里选择n,新建
Command action
e extended
p primary partition (1-4) 这里选p,主分区
p
Partition number (1-4): 1 这里选1,第一个分区
First cylinder (1-400, default 1): 1
Last cylinder, +cylinders or +size{K,M,G} (1-400, default 400):
Using default value 400
Command (m for help): w 这里选w,保存并推出
The partition table has been altered!
Calling ioctl() to re-read partition table.
Syncing disks.
接着将其格式化为ext2
root@linuxidc:/source/workplace/test/sbull_linuxidc# mkfs.ext2 /dev/sbulla1
mke2fs 1.41.3 (12-Oct-2008)
Filesystem label=
OS type: Linux
Block size=1024 (log=0)
Fragment size=1024 (log=0)
3200 inodes, 12792 blocks
639 blocks (5.00%) reserved for the super user
First data block=1
Maximum filesystem blocks=13107200
2 block groups
8192 blocks per group, 8192 fragments per group
1600 inodes per group
Superblock backups stored on blocks:
8193
Writing inode tables: done
Writing superblocks and filesystem accounting information: done
This filesystem will be automatically checked every 29 mounts or
180 days, whichever comes first. Use tune2fs -c or -i to override.
新建一个文件夹,作为此处模拟的块设备的挂载点
root@linuxidc:/source/workplace/test/sbull_linuxidc# ls /mnt/
hgfs initrd
root@linuxidc:/source/workplace/test/sbull_linuxidc# mkdir /mnt/sbulla1
挂载
root@linuxidc:/source/workplace/test/sbull_linuxidc# mount /dev/sbulla1 /mnt/sbulla1
进入目录,新建一个文件,测试一下
root@linuxidc:/source/workplace/test/sbull_linuxidc# cd /mnt/sbulla1/
root@linuxidc:/mnt/sbulla1# ls
lost+found
root@linuxidc:/mnt/sbulla1# echo hi > hello.c
root@linuxidc:/mnt/sbulla1# ls
hello.c lost+found
root@linuxidc:/mnt/sbulla1# cat hello.c
hi
root@linuxidc:/mnt/sbulla1#