3
* Copyright (C) 2011 Citrix Systems Inc.
5
* This file is part of Blktap2.
7
* Blktap2 is free software: you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License version 2
9
* as published by the Free Software Foundation.
11
* Blktap2 is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License version 2 for more details.
16
* You should have received a copy of the GNU General Public License
17
* version 2 along with Blktap2. If not, see
18
* <http://www.gnu.org/licenses/>.
23
#include <linux/device.h>
24
#include <linux/signal.h>
25
#include <linux/sched.h>
26
#include <linux/poll.h>
27
#include <linux/blkdev.h>
28
#include <linux/mman.h>
32
int blktap_ring_major;
33
static struct cdev blktap_ring_cdev;
36
* BLKTAP - immediately before the mmap area,
37
* we have a bunch of pages reserved for shared memory rings.
41
#define BLKTAP_INFO_SIZE_AT(_memb) \
42
offsetof(struct blktap_device_info, _memb) + \
43
sizeof(((struct blktap_device_info*)0)->_memb)
46
blktap_ring_read_response(struct blktap *tap,
47
const struct blktap_ring_response *rsp)
49
struct blktap_ring *ring = &tap->ring;
50
struct blktap_request *request;
56
if (usr_idx < 0 || usr_idx >= BLKTAP_RING_SIZE) {
61
request = ring->pending[usr_idx];
68
if (rsp->operation != request->operation) {
74
"request %d [%p] response: %d\n",
75
request->usr_idx, request, rsp->status);
77
err = rsp->status == BLKTAP_RSP_OKAY ? 0 : -EIO;
79
blktap_device_end_request(tap, request, err);
84
"invalid response, idx:%d status:%d op:%d/%d: err %d\n",
86
rsp->operation, request->operation,
93
blktap_read_ring(struct blktap *tap)
95
struct blktap_ring *ring = &tap->ring;
96
struct blktap_ring_response rsp;
99
down_read(¤t->mm->mmap_sem);
101
up_read(¤t->mm->mmap_sem);
105
/* for each outstanding message on the ring */
106
rp = ring->ring.sring->rsp_prod;
109
for (rc = ring->ring.rsp_cons; rc != rp; rc++) {
110
memcpy(&rsp, RING_GET_RESPONSE(&ring->ring, rc), sizeof(rsp));
111
blktap_ring_read_response(tap, &rsp);
114
ring->ring.rsp_cons = rc;
116
up_read(¤t->mm->mmap_sem);
119
#define MMAP_VADDR(_start, _req, _seg) \
121
((_req) * BLKTAP_SEGMENT_MAX * BLKTAP_PAGE_SIZE) + \
122
((_seg) * BLKTAP_PAGE_SIZE))
124
static int blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
126
return VM_FAULT_SIGBUS;
130
blktap_ring_fail_pending(struct blktap *tap)
132
struct blktap_ring *ring = &tap->ring;
133
struct blktap_request *request;
136
for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) {
137
request = ring->pending[usr_idx];
141
blktap_device_end_request(tap, request, -EIO);
146
blktap_ring_vm_close_sring(struct blktap *tap,
147
struct vm_area_struct *vma)
149
struct blktap_ring *ring = &tap->ring;
150
struct page *page = virt_to_page(ring->ring.sring);
152
blktap_ring_fail_pending(tap);
154
ClearPageReserved(page);
159
if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
160
blktap_control_destroy_tap(tap);
164
blktap_ring_vm_close(struct vm_area_struct *vma)
166
struct blktap *tap = vma->vm_private_data;
168
dev_dbg(tap->ring.dev,
169
"vm_close %lx-%lx (%lu) pgoff %lu\n",
170
vma->vm_start, vma->vm_end, vma_pages(vma),
174
blktap_ring_vm_close_sring(tap, vma);
177
static struct vm_operations_struct blktap_ring_vm_operations = {
178
.close = blktap_ring_vm_close,
179
.fault = blktap_ring_fault,
183
blktap_ring_map_request(struct blktap *tap, struct file *filp,
184
struct blktap_request *request)
186
struct blktap_ring *ring = &tap->ring;
187
unsigned long addr, len, pgoff;
188
int read, write, prot, flags;
190
write = request->operation == BLKTAP_OP_WRITE;
191
read = request->operation == BLKTAP_OP_READ;
194
blktap_request_bounce(tap, request, write);
197
prot |= read ? PROT_WRITE : 0;
199
flags = MAP_FIXED|MAP_SHARED;
201
addr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0);
202
len = request->nr_pages << PAGE_SHIFT;
204
pgoff = 1 + request->usr_idx * BLKTAP_SEGMENT_MAX;
206
addr = do_mmap_pgoff(filp, addr, len, prot, flags, pgoff);
208
return IS_ERR_VALUE(addr) ? addr : 0;
212
blktap_ring_unmap_request(struct blktap *tap,
213
struct blktap_request *request)
215
struct blktap_ring *ring = &tap->ring;
216
unsigned long addr, len;
219
read = request->operation == BLKTAP_OP_READ;
221
blktap_request_bounce(tap, request, !read);
223
addr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0);
224
len = request->nr_pages << PAGE_SHIFT;
226
err = do_munmap(current->mm, addr, len);
231
blktap_ring_free_request(struct blktap *tap,
232
struct blktap_request *request)
234
struct blktap_ring *ring = &tap->ring;
236
ring->pending[request->usr_idx] = NULL;
239
blktap_request_free(tap, request);
242
struct blktap_request*
243
blktap_ring_make_request(struct blktap *tap)
245
struct blktap_ring *ring = &tap->ring;
246
struct blktap_request *request;
249
if (RING_FULL(&ring->ring))
250
return ERR_PTR(-ENOSPC);
252
request = blktap_request_alloc(tap);
254
return ERR_PTR(-ENOMEM);
256
for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++)
257
if (!ring->pending[usr_idx])
260
BUG_ON(usr_idx >= BLKTAP_RING_SIZE);
263
request->usr_idx = usr_idx;
265
ring->pending[usr_idx] = request;
272
blktap_ring_make_rw_request(struct blktap *tap,
273
struct blktap_request *request,
274
struct blktap_ring_request *breq)
276
struct scatterlist *sg;
277
unsigned int i, nsecs = 0;
279
blktap_for_each_sg(sg, request, i) {
280
struct blktap_segment *seg = &breq->u.rw.seg[i];
283
count = sg->length >> 9;
284
first = sg->offset >> 9;
286
seg->first_sect = first;
287
seg->last_sect = first + count - 1;
292
breq->u.rw.sector_number = blk_rq_pos(request->rq);
298
blktap_ring_make_tr_request(struct blktap *tap,
299
struct blktap_request *request,
300
struct blktap_ring_request *breq)
302
struct bio *bio = request->rq->bio;
305
breq->u.tr.nr_sectors = nsecs = bio_sectors(bio);
306
breq->u.tr.sector_number = bio->bi_sector;
312
blktap_ring_submit_request(struct blktap *tap,
313
struct blktap_request *request)
315
struct blktap_ring *ring = &tap->ring;
316
struct blktap_ring_request *breq;
320
"request %d [%p] submit\n", request->usr_idx, request);
322
breq = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt);
324
breq->id = request->usr_idx;
326
breq->operation = request->operation;
327
breq->nr_segments = request->nr_pages;
329
switch (breq->operation) {
331
nsecs = blktap_ring_make_rw_request(tap, request, breq);
333
tap->stats.st_rd_sect += nsecs;
334
tap->stats.st_rd_req++;
337
case BLKTAP_OP_WRITE:
338
nsecs = blktap_ring_make_rw_request(tap, request, breq);
340
tap->stats.st_wr_sect += nsecs;
341
tap->stats.st_wr_req++;
344
case BLKTAP_OP_FLUSH:
345
breq->u.rw.sector_number = 0;
346
tap->stats.st_fl_req++;
350
nsecs = blktap_ring_make_tr_request(tap, request, breq);
352
tap->stats.st_tr_sect += nsecs;
353
tap->stats.st_tr_req++;
359
ring->ring.req_prod_pvt++;
363
blktap_ring_open(struct inode *inode, struct file *filp)
365
struct blktap *tap = NULL;
368
minor = iminor(inode);
370
if (minor < blktap_max_minor)
371
tap = blktaps[minor];
376
if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
382
filp->private_data = tap;
383
tap->ring.task = current;
389
blktap_ring_release(struct inode *inode, struct file *filp)
391
struct blktap *tap = filp->private_data;
393
blktap_device_destroy_sync(tap);
395
tap->ring.task = NULL;
397
if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
398
blktap_control_destroy_tap(tap);
404
blktap_ring_mmap_request(struct blktap *tap,
405
struct vm_area_struct *vma)
407
struct blktap_ring *ring = &tap->ring;
408
struct blktap_request *request;
409
int usr_idx, seg, err;
410
unsigned long addr, n_segs;
412
usr_idx = vma->vm_pgoff - 1;
413
seg = usr_idx % BLKTAP_SEGMENT_MAX;
414
usr_idx /= BLKTAP_SEGMENT_MAX;
416
request = ring->pending[usr_idx];
420
n_segs = request->nr_pages - seg;
421
n_segs = min(n_segs, vma_pages(vma));
423
for (addr = vma->vm_start;
425
seg++, addr += PAGE_SIZE) {
426
struct page *page = request->pages[seg];
428
dev_dbg(tap->ring.dev,
429
"mmap request %d seg %d addr %lx\n",
432
err = vm_insert_page(vma, addr, page);
437
vma->vm_flags |= VM_DONTCOPY;
438
vma->vm_flags |= VM_RESERVED;
444
blktap_ring_mmap_sring(struct blktap *tap, struct vm_area_struct *vma)
446
struct blktap_ring *ring = &tap->ring;
447
struct blktap_sring *sring;
448
struct page *page = NULL;
454
page = alloc_page(GFP_KERNEL|__GFP_ZERO);
458
SetPageReserved(page);
460
err = vm_insert_page(vma, vma->vm_start, page);
464
sring = page_address(page);
465
SHARED_RING_INIT(sring);
466
FRONT_RING_INIT(&ring->ring, sring, PAGE_SIZE);
468
ring->ring_vstart = vma->vm_start;
469
ring->user_vstart = ring->ring_vstart + PAGE_SIZE;
471
vma->vm_private_data = tap;
473
vma->vm_flags |= VM_DONTCOPY;
474
vma->vm_flags |= VM_RESERVED;
476
vma->vm_ops = &blktap_ring_vm_operations;
483
ClearPageReserved(page);
491
blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma)
493
struct blktap *tap = filp->private_data;
495
dev_dbg(tap->ring.dev,
496
"mmap %lx-%lx (%lu) pgoff %lu\n",
497
vma->vm_start, vma->vm_end, vma_pages(vma),
501
return blktap_ring_mmap_sring(tap, vma);
503
return blktap_ring_mmap_request(tap, vma);
507
blktap_ring_ioctl(struct file *filp,
508
unsigned int cmd, unsigned long arg)
510
struct blktap *tap = filp->private_data;
511
struct blktap_ring *ring = &tap->ring;
512
void __user *ptr = (void *)arg;
515
BTDBG("%d: cmd: %u, arg: %lu\n", tap->minor, cmd, arg);
517
if (!ring->vma || ring->vma->vm_mm != current->mm)
521
case BLKTAP_IOCTL_RESPOND:
523
blktap_read_ring(tap);
526
case BLKTAP_IOCTL_CREATE_DEVICE_COMPAT: {
527
struct blktap_device_info info;
528
struct blktap2_params params;
530
if (copy_from_user(¶ms, ptr, sizeof(params)))
533
info.capacity = params.capacity;
534
info.sector_size = params.sector_size;
537
err = blktap_device_create(tap, &info);
541
if (params.name[0]) {
542
strncpy(tap->name, params.name, sizeof(params.name));
543
tap->name[sizeof(tap->name)-1] = 0;
549
case BLKTAP_IOCTL_CREATE_DEVICE: {
550
struct blktap_device_info __user *ptr = (void *)arg;
551
struct blktap_device_info info;
555
mask = BLKTAP_DEVICE_FLAG_RO;
556
mask |= BLKTAP_DEVICE_FLAG_PSZ;
557
mask |= BLKTAP_DEVICE_FLAG_FLUSH;
558
mask |= BLKTAP_DEVICE_FLAG_TRIM;
559
mask |= BLKTAP_DEVICE_FLAG_TRIM_RZ;
561
memset(&info, 0, sizeof(info));
562
sz = base_sz = BLKTAP_INFO_SIZE_AT(flags);
564
if (copy_from_user(&info, ptr, sz))
567
if ((info.flags & BLKTAP_DEVICE_FLAG_PSZ) != 0)
568
sz = BLKTAP_INFO_SIZE_AT(phys_block_offset);
570
if (info.flags & BLKTAP_DEVICE_FLAG_TRIM)
571
sz = BLKTAP_INFO_SIZE_AT(trim_block_offset);
574
if (copy_from_user(&info, ptr, sz))
577
if (put_user(info.flags & mask, &ptr->flags))
580
return blktap_device_create(tap, &info);
583
case BLKTAP_IOCTL_REMOVE_DEVICE:
585
return blktap_device_destroy(tap);
591
static unsigned int blktap_ring_poll(struct file *filp, poll_table *wait)
593
struct blktap *tap = filp->private_data;
594
struct blktap_ring *ring = &tap->ring;
597
poll_wait(filp, &tap->pool->wait, wait);
598
poll_wait(filp, &ring->poll_wait, wait);
600
down_read(¤t->mm->mmap_sem);
601
if (ring->vma && tap->device.gd)
602
blktap_device_run_queue(tap, filp);
603
up_read(¤t->mm->mmap_sem);
605
work = ring->ring.req_prod_pvt - ring->ring.sring->req_prod;
606
RING_PUSH_REQUESTS(&ring->ring);
609
*BLKTAP_RING_MESSAGE(ring->ring.sring) ||
610
test_and_clear_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse))
611
return POLLIN | POLLRDNORM;
616
static struct file_operations blktap_ring_file_operations = {
617
.owner = THIS_MODULE,
618
.open = blktap_ring_open,
619
.release = blktap_ring_release,
620
.unlocked_ioctl = blktap_ring_ioctl,
621
.mmap = blktap_ring_mmap,
622
.poll = blktap_ring_poll,
626
blktap_ring_kick_user(struct blktap *tap)
628
wake_up(&tap->ring.poll_wait);
632
blktap_ring_destroy(struct blktap *tap)
634
struct blktap_ring *ring = &tap->ring;
636
if (ring->task || ring->vma)
643
blktap_ring_create(struct blktap *tap)
645
struct blktap_ring *ring = &tap->ring;
647
init_waitqueue_head(&ring->poll_wait);
648
ring->devno = MKDEV(blktap_ring_major, tap->minor);
654
blktap_ring_debug(struct blktap *tap, char *buf, size_t size)
656
struct blktap_ring *ring = &tap->ring;
657
char *s = buf, *end = buf + size;
660
s += snprintf(s, end - s,
661
"begin pending:%d\n", ring->n_pending);
663
for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) {
664
struct blktap_request *request;
667
request = ring->pending[usr_idx];
671
jiffies_to_timeval(jiffies - request->rq->start_time, &t);
673
s += snprintf(s, end - s,
674
"%02d: usr_idx:%02d "
675
"op:%x nr_pages:%02d time:%lu.%09lu\n",
676
usr_idx, request->usr_idx,
677
request->operation, request->nr_pages,
678
t.tv_sec, t.tv_usec);
681
s += snprintf(s, end - s, "end pending\n");
688
blktap_ring_init(void)
693
cdev_init(&blktap_ring_cdev, &blktap_ring_file_operations);
694
blktap_ring_cdev.owner = THIS_MODULE;
696
err = alloc_chrdev_region(&dev, 0, MAX_BLKTAP_DEVICE, "blktap2");
698
BTERR("error registering ring devices: %d\n", err);
702
err = cdev_add(&blktap_ring_cdev, dev, MAX_BLKTAP_DEVICE);
704
BTERR("error adding ring device: %d\n", err);
705
unregister_chrdev_region(dev, MAX_BLKTAP_DEVICE);
709
blktap_ring_major = MAJOR(dev);
710
BTINFO("blktap ring major: %d\n", blktap_ring_major);
716
blktap_ring_exit(void)
718
if (!blktap_ring_major)
721
cdev_del(&blktap_ring_cdev);
722
unregister_chrdev_region(MKDEV(blktap_ring_major, 0),
725
blktap_ring_major = 0;