3
* separate disk process, spawned by blktapctrl. Inherits code from driver
6
* Copyright (c) 2005 Julian Chesterfield and Andrew Warfield.
20
#include <sys/types.h>
28
#include <sys/statvfs.h>
29
#include <sys/ioctl.h>
30
#include "blktaplib.h"
35
if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \
36
__LINE__, __FILE__); *(int*)0=0; }
38
#define ASSERT(_p) ((void)0)
44
static int maxfds, fds[2], run = 1;
47
int connected_disks = 0;
48
fd_list_entry_t *fd_start = NULL;
50
int do_cow_read(struct disk_driver *dd, blkif_request_t *req,
51
int sidx, uint64_t sector, int nr_secs);
53
#define td_for_each_disk(tds, drv) \
54
for (drv = tds->disks; drv != NULL; drv = drv->next)
56
static void usage(void)
58
fprintf(stderr, "blktap-utils: v1.0.0\n");
59
fprintf(stderr, "usage: tapdisk <READ fifo> <WRITE fifo>\n");
63
static void daemonize(void)
67
if (getppid()==1) return; /* already a daemon */
68
if (fork() != 0) exit(0);
71
/*Set new program session ID and close all descriptors*/
73
for (i = getdtablesize(); i >= 0; --i) close(i);
75
/*Send all I/O to /dev/null */
76
i = open("/dev/null",O_RDWR);
83
static void free_driver(struct disk_driver *d)
92
static void unmap_disk(struct td_state *s)
94
tapdev_info_t *info = s->ring_info;
95
struct disk_driver *dd, *tmp;
96
fd_list_entry_t *entry;
101
dd->drv->td_close(dd);
106
if (info != NULL && info->mem > 0)
107
munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE);
110
*entry->pprev = entry->next;
112
entry->next->pprev = entry->pprev;
124
static void sig_handler(int sig)
126
/*Received signal to close. If no disks are active, we close app.*/
128
if (connected_disks < 1) run = 0;
131
static inline int LOCAL_FD_SET(fd_set *readfds)
133
fd_list_entry_t *ptr;
134
struct disk_driver *dd;
137
while (ptr != NULL) {
139
FD_SET(ptr->tap_fd, readfds);
140
td_for_each_disk(ptr->s, dd) {
142
FD_SET(dd->io_fd[READ], readfds);
143
maxfds = (dd->io_fd[READ] > maxfds ?
144
dd->io_fd[READ] : maxfds);
146
maxfds = (ptr->tap_fd > maxfds ? ptr->tap_fd : maxfds);
154
static inline fd_list_entry_t *add_fd_entry(int tap_fd, struct td_state *s)
156
fd_list_entry_t **pprev, *entry;
159
DPRINTF("Adding fd_list_entry\n");
161
/*Add to linked list*/
162
s->fd_entry = entry = malloc(sizeof(fd_list_entry_t));
163
entry->tap_fd = tap_fd;
168
while (*pprev != NULL)
169
pprev = &(*pprev)->next;
172
entry->pprev = pprev;
177
static inline struct td_state *get_state(int cookie)
179
fd_list_entry_t *ptr;
182
while (ptr != NULL) {
183
if (ptr->cookie == cookie) return ptr->s;
189
static struct tap_disk *get_driver(int drivertype)
191
/* blktapctrl has passed us the driver type */
193
return dtypes[drivertype]->drv;
196
static struct td_state *state_init(void)
202
s = malloc(sizeof(struct td_state));
203
blkif = s->blkif = malloc(sizeof(blkif_t));
204
s->ring_info = calloc(1, sizeof(tapdev_info_t));
206
for (i = 0; i < MAX_REQUESTS; i++) {
207
blkif->pending_list[i].secs_pending = 0;
208
blkif->pending_list[i].submitting = 0;
214
static int map_new_dev(struct td_state *s, int minor)
217
tapdev_info_t *info = s->ring_info;
219
fd_list_entry_t *ptr;
222
if (asprintf(&devname,"%s/%s%d", BLKTAP_DEV_DIR, BLKTAP_DEV_NAME, minor) == -1)
224
tap_fd = open(devname, O_RDWR);
227
DPRINTF("open failed on dev %s!",devname);
232
/*Map the shared memory*/
233
page_size = getpagesize();
234
info->mem = mmap(0, page_size * BLKTAP_MMAP_REGION_SIZE,
235
PROT_READ | PROT_WRITE, MAP_SHARED, info->fd, 0);
236
if ((long int)info->mem == -1)
238
DPRINTF("mmap failed on dev %s!\n",devname);
242
/* assign the rings to the mapped memory */
243
info->sring = (blkif_sring_t *)((unsigned long)info->mem);
244
BACK_RING_INIT(&info->fe_ring, info->sring, page_size);
247
(unsigned long)info->mem + (BLKTAP_RING_PAGES * page_size);
249
ioctl(info->fd, BLKTAP_IOCTL_SENDPID, process );
250
ioctl(info->fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
253
/*Update the fd entry*/
255
while (ptr != NULL) {
257
ptr->tap_fd = tap_fd;
270
static struct disk_driver *disk_init(struct td_state *s,
271
struct tap_disk *drv,
272
char *name, td_flag_t flags)
274
struct disk_driver *dd;
276
dd = calloc(1, sizeof(struct disk_driver));
280
dd->private = malloc(drv->private_data_size);
294
static int open_disk(struct td_state *s,
295
struct tap_disk *drv, char *path, td_flag_t flags)
301
struct disk_driver *d;
307
memset(&id, 0, sizeof(struct disk_id));
308
s->disks = d = disk_init(s, drv, dup, flags);
312
err = drv->td_open(d, path, flags);
318
pflags = flags | TD_RDONLY;
320
/* load backing files as necessary */
321
while ((err = d->drv->td_get_parent_id(d, &id)) == 0) {
322
struct disk_driver *new;
324
if (id.drivertype > MAX_DISK_TYPES ||
325
!get_driver(id.drivertype) || !id.name)
328
dup = strdup(id.name);
332
new = disk_init(s, get_driver(id.drivertype), dup, pflags);
336
err = new->drv->td_open(new, new->name, pflags);
340
err = d->drv->td_validate_parent(d, new, 0);
350
s->info |= ((flags & TD_RDONLY) ? VDISK_READONLY : 0);
356
DPRINTF("failed opening disk\n");
361
struct disk_driver *tmp = d->next;
370
static int read_msg(char *buf)
372
int length, len, msglen, tap_fd, *io_fd;
376
msg_newdev_t *msg_dev;
378
struct tap_disk *drv;
380
struct td_state *s = NULL;
381
fd_list_entry_t *entry;
383
length = read(fds[READ], buf, MSG_SIZE);
385
if (length > 0 && length >= sizeof(msg_hdr_t))
387
msg = (msg_hdr_t *)buf;
388
DPRINTF("Tapdisk: Received msg, len %d, type %d, UID %d\n",
389
length,msg->type,msg->cookie);
393
ptr = buf + sizeof(msg_hdr_t);
394
len = (length - sizeof(msg_hdr_t));
395
path = calloc(1, len);
397
memcpy(path, ptr, len);
398
DPRINTF("Received CTLMSG_PARAMS: [%s]\n", path);
401
drv = get_driver(msg->drivertype);
405
DPRINTF("Loaded driver: name [%s], type [%d]\n",
406
drv->disk_type, msg->drivertype);
408
/* Allocate the disk structs */
414
ret = open_disk(s, drv, path,
415
((msg->readonly) ? TD_RDONLY : 0));
419
entry = add_fd_entry(0, s);
420
entry->cookie = msg->cookie;
421
DPRINTF("Entered cookie %d\n", entry->cookie);
423
memset(buf, 0x00, MSG_SIZE);
427
msglen = sizeof(msg_hdr_t) + sizeof(image_t);
428
msg->type = CTLMSG_IMG;
429
img = (image_t *)(buf + sizeof(msg_hdr_t));
431
img->secsize = s->sector_size;
434
msglen = sizeof(msg_hdr_t);
435
msg->type = CTLMSG_IMG_FAIL;
438
len = write(fds[WRITE], buf, msglen);
443
msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t));
445
s = get_state(msg->cookie);
446
DPRINTF("Retrieving state, cookie %d.....[%s]\n",
447
msg->cookie, (s == NULL ? "FAIL":"OK"));
449
ret = ((map_new_dev(s, msg_dev->devnum)
450
== msg_dev->devnum ? 0: -1));
454
memset(buf, 0x00, MSG_SIZE);
455
msglen = sizeof(msg_hdr_t);
456
msg->type = (ret == 0 ? CTLMSG_NEWDEV_RSP
457
: CTLMSG_NEWDEV_FAIL);
460
len = write(fds[WRITE], buf, msglen);
464
s = get_state(msg->cookie);
465
if (s) unmap_disk(s);
473
memset(buf, 0x00, MSG_SIZE);
474
msglen = sizeof(msg_hdr_t) + sizeof(msg_pid_t);
475
msg->type = CTLMSG_PID_RSP;
478
msg_pid = (msg_pid_t *)(buf + sizeof(msg_hdr_t));
480
msg_pid->pid = process;
482
len = write(fds[WRITE], buf, msglen);
492
static inline int write_rsp_to_ring(struct td_state *s, blkif_response_t *rsp)
494
tapdev_info_t *info = s->ring_info;
495
blkif_response_t *rsp_d;
497
rsp_d = RING_GET_RESPONSE(&info->fe_ring, info->fe_ring.rsp_prod_pvt);
498
memcpy(rsp_d, rsp, sizeof(blkif_response_t));
499
info->fe_ring.rsp_prod_pvt++;
504
static inline void kick_responses(struct td_state *s)
506
tapdev_info_t *info = s->ring_info;
508
if (info->fe_ring.rsp_prod_pvt != info->fe_ring.sring->rsp_prod)
510
RING_PUSH_RESPONSES(&info->fe_ring);
511
ioctl(info->fd, BLKTAP_IOCTL_KICK_FE);
515
static void io_done(struct disk_driver *dd, int sid)
517
struct tap_disk *drv = dd->drv;
519
if (!run) return; /*We have received signal to close*/
521
if (sid > MAX_IOFD || drv->td_do_callbacks(dd, sid) > 0)
522
kick_responses(dd->td_state);
527
static inline uint64_t
528
segment_start(blkif_request_t *req, int sidx)
531
uint64_t start = req->sector_number;
533
for (i = 0; i < sidx; i++)
534
start += (req->seg[i].last_sect - req->seg[i].first_sect + 1);
539
uint64_t sends, responds;
540
static int send_responses(struct disk_driver *dd, int res,
541
uint64_t sector, int nr_secs, int idx, void *private)
544
blkif_request_t *req;
545
int responses_queued = 0;
546
struct td_state *s = dd->td_state;
547
blkif_t *blkif = s->blkif;
548
int sidx = (int)(long)private, secs_done = nr_secs;
550
if ( (idx > MAX_REQUESTS-1) )
552
DPRINTF("invalid index returned(%u)!\n", idx);
555
preq = &blkif->pending_list[idx];
558
if (res == BLK_NOT_ALLOCATED) {
559
res = do_cow_read(dd, req, sidx, sector, nr_secs);
567
preq->secs_pending -= secs_done;
569
if (res == -EBUSY && preq->submitting)
570
return -EBUSY; /* propagate -EBUSY back to higher layers */
572
preq->status = BLKIF_RSP_ERROR;
574
if (!preq->submitting && preq->secs_pending == 0)
577
blkif_response_t *rsp;
580
rsp = (blkif_response_t *)req;
583
rsp->operation = tmp.operation;
584
rsp->status = preq->status;
586
write_rsp_to_ring(s, rsp);
589
return responses_queued;
592
int do_cow_read(struct disk_driver *dd, blkif_request_t *req,
593
int sidx, uint64_t sector, int nr_secs)
597
uint64_t seg_start, seg_end;
598
struct td_state *s = dd->td_state;
599
tapdev_info_t *info = s->ring_info;
600
struct disk_driver *parent = dd->next;
602
seg_start = segment_start(req, sidx);
603
seg_end = seg_start + req->seg[sidx].last_sect + 1;
605
ASSERT(sector >= seg_start && sector + nr_secs <= seg_end);
607
page = (char *)MMAP_VADDR(info->vstart,
608
(unsigned long)req->id, sidx);
609
page += (req->seg[sidx].first_sect << SECTOR_SHIFT);
610
page += ((sector - seg_start) << SECTOR_SHIFT);
613
memset(page, 0, nr_secs << SECTOR_SHIFT);
617
/* reissue request to backing file */
618
ret = parent->drv->td_queue_read(parent, sector, nr_secs,
619
page, send_responses,
620
req->id, (void *)(long)sidx);
622
parent->early += ret;
624
return ((ret >= 0) ? 0 : ret);
627
static void get_io_request(struct td_state *s)
629
RING_IDX rp, rc, j, i;
630
blkif_request_t *req;
631
int idx, nsects, ret;
634
int early = 0; /* count early completions */
635
struct disk_driver *dd = s->disks;
636
struct tap_disk *drv = dd->drv;
637
blkif_t *blkif = s->blkif;
638
tapdev_info_t *info = s->ring_info;
639
int page_size = getpagesize();
641
if (!run) return; /*We have received signal to close*/
643
rp = info->fe_ring.sring->req_prod;
645
for (j = info->fe_ring.req_cons; j != rp; j++)
647
int done = 0, start_seg = 0;
650
req = RING_GET_REQUEST(&info->fe_ring, j);
651
++info->fe_ring.req_cons;
653
if (req == NULL) continue;
657
if (info->busy.req) {
658
/* continue where we left off last time */
659
ASSERT(info->busy.req == req);
660
start_seg = info->busy.seg_idx;
661
sector_nr = segment_start(req, start_seg);
662
info->busy.seg_idx = 0;
663
info->busy.req = NULL;
665
ASSERT(blkif->pending_list[idx].secs_pending == 0);
666
memcpy(&blkif->pending_list[idx].req,
668
blkif->pending_list[idx].status = BLKIF_RSP_OKAY;
669
blkif->pending_list[idx].submitting = 1;
670
sector_nr = req->sector_number;
673
if ((dd->flags & TD_RDONLY) &&
674
(req->operation == BLKIF_OP_WRITE)) {
675
blkif->pending_list[idx].status = BLKIF_RSP_ERROR;
679
for (i = start_seg; i < req->nr_segments; i++) {
680
nsects = req->seg[i].last_sect -
681
req->seg[i].first_sect + 1;
683
if ((req->seg[i].last_sect >= page_size >> 9) ||
687
page = (char *)MMAP_VADDR(info->vstart,
688
(unsigned long)req->id, i);
689
page += (req->seg[i].first_sect << SECTOR_SHIFT);
691
if (sector_nr >= s->size) {
692
DPRINTF("Sector request failed:\n");
693
DPRINTF("%s request, idx [%d,%d] size [%llu], "
694
"sector [%llu,%llu]\n",
695
(req->operation == BLKIF_OP_WRITE ?
699
nsects<<SECTOR_SHIFT,
701
sector_nr<<SECTOR_SHIFT,
702
(long long unsigned) sector_nr);
706
blkif->pending_list[idx].secs_pending += nsects;
708
switch (req->operation)
711
ret = drv->td_queue_write(dd, sector_nr,
714
idx, (void *)(long)i);
715
if (ret > 0) dd->early += ret;
716
else if (ret == -EBUSY) {
717
/* put req back on queue */
718
--info->fe_ring.req_cons;
719
info->busy.req = req;
720
info->busy.seg_idx = i;
725
ret = drv->td_queue_read(dd, sector_nr,
728
idx, (void *)(long)i);
729
if (ret > 0) dd->early += ret;
730
else if (ret == -EBUSY) {
731
/* put req back on queue */
732
--info->fe_ring.req_cons;
733
info->busy.req = req;
734
info->busy.seg_idx = i;
739
DPRINTF("Unknown block operation\n");
745
blkif->pending_list[idx].submitting = 0;
746
/* force write_rsp_to_ring for synchronous case */
747
if (blkif->pending_list[idx].secs_pending == 0)
748
dd->early += send_responses(dd, 0, 0, 0, idx,
754
td_for_each_disk(s, dd) {
755
dd->early += dd->drv->td_submit(dd);
757
io_done(dd, MAX_IOFD + 1);
765
int main(int argc, char *argv[])
767
int len, msglen, ret;
769
fd_set readfds, writefds;
770
fd_list_entry_t *ptr;
772
char openlogbuf[128];
774
if (argc != 3) usage();
778
snprintf(openlogbuf, sizeof(openlogbuf), "TAPDISK[%d]", getpid());
779
openlog(openlogbuf, LOG_CONS|LOG_ODELAY, LOG_DAEMON);
780
/*Setup signal handlers*/
781
signal (SIGBUS, sig_handler);
782
signal (SIGINT, sig_handler);
784
/*Open the control channel*/
785
fds[READ] = open(argv[1],O_RDWR|O_NONBLOCK);
786
fds[WRITE] = open(argv[2],O_RDWR|O_NONBLOCK);
788
if ( (fds[READ] < 0) || (fds[WRITE] < 0) )
790
DPRINTF("FD open failed [%d,%d]\n", fds[READ], fds[WRITE]);
794
buf = calloc(MSG_SIZE, 1);
798
DPRINTF("ERROR: allocating memory.\n");
806
FD_SET(fds[READ], &readfds);
810
LOCAL_FD_SET(&readfds);
812
/*Wait for incoming messages*/
813
ret = select(maxfds + 1, &readfds, (fd_set *) 0,
819
while (ptr != NULL) {
820
int progress_made = 0;
821
struct disk_driver *dd;
822
tapdev_info_t *info = ptr->s->ring_info;
824
td_for_each_disk(ptr->s, dd) {
825
if (dd->io_fd[READ] &&
826
FD_ISSET(dd->io_fd[READ],
833
/* completed io from above may have
834
* queued new requests on chained disks */
836
td_for_each_disk(ptr->s, dd) {
838
dd->drv->td_submit(dd);
847
if (FD_ISSET(ptr->tap_fd, &readfds) ||
848
(info->busy.req && progress_made))
849
get_io_request(ptr->s);
854
if (FD_ISSET(fds[READ], &readfds))
863
while (ptr != NULL) {