2
* Block driver for the VMDK format
4
* Copyright (c) 2004 Fabrice Bellard
5
* Copyright (c) 2005 Filip Navara
7
* Permission is hereby granted, free of charge, to any person obtaining a copy
8
* of this software and associated documentation files (the "Software"), to deal
9
* in the Software without restriction, including without limitation the rights
10
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
* copies of the Software, and to permit persons to whom the Software is
12
* furnished to do so, subject to the following conditions:
14
* The above copyright notice and this permission notice shall be included in
15
* all copies or substantial portions of the Software.
17
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26
#include "qemu-common.h"
27
#include "block_int.h"
29
#include "migration.h"
32
#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
33
#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
34
#define VMDK4_COMPRESSION_DEFLATE 1
35
#define VMDK4_FLAG_RGD (1 << 1)
36
#define VMDK4_FLAG_COMPRESS (1 << 16)
37
#define VMDK4_FLAG_MARKER (1 << 17)
42
uint32_t disk_sectors;
44
uint32_t l1dir_offset;
46
uint32_t file_sectors;
49
uint32_t sectors_per_track;
59
int32_t num_gtes_per_gte;
65
uint16_t compressAlgorithm;
66
} QEMU_PACKED VMDK4Header;
68
#define L2_CACHE_SIZE 16
70
typedef struct VmdkExtent {
71
BlockDriverState *file;
77
int64_t flat_start_offset;
78
int64_t l1_table_offset;
79
int64_t l1_backup_table_offset;
81
uint32_t *l1_backup_table;
83
uint32_t l1_entry_sectors;
87
uint32_t l2_cache_offsets[L2_CACHE_SIZE];
88
uint32_t l2_cache_counts[L2_CACHE_SIZE];
90
unsigned int cluster_sectors;
93
typedef struct BDRVVmdkState {
99
/* Extent array with num_extents entries, ascend ordered by address */
101
Error *migration_blocker;
104
typedef struct VmdkMetaData {
106
unsigned int l1_index;
107
unsigned int l2_index;
108
unsigned int l2_offset;
112
typedef struct VmdkGrainMarker {
118
static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
125
magic = be32_to_cpu(*(uint32_t *)buf);
126
if (magic == VMDK3_MAGIC ||
127
magic == VMDK4_MAGIC) {
130
const char *p = (const char *)buf;
131
const char *end = p + buf_size;
134
/* skip comment line */
135
while (p < end && *p != '\n') {
142
while (p < end && *p == ' ') {
145
/* skip '\r' if windows line endings used. */
146
if (p < end && *p == '\r') {
149
/* only accept blank lines before 'version=' line */
150
if (p == end || *p != '\n') {
156
if (end - p >= strlen("version=X\n")) {
157
if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 ||
158
strncmp("version=2\n", p, strlen("version=2\n")) == 0) {
162
if (end - p >= strlen("version=X\r\n")) {
163
if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 ||
164
strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0) {
176
#define SECTOR_SIZE 512
177
#define DESC_SIZE (20 * SECTOR_SIZE) /* 20 sectors of 512 bytes each */
178
#define BUF_SIZE 4096
179
#define HEADER_SIZE 512 /* first sector of 512 bytes */
181
static void vmdk_free_extents(BlockDriverState *bs)
184
BDRVVmdkState *s = bs->opaque;
187
for (i = 0; i < s->num_extents; i++) {
191
g_free(e->l1_backup_table);
192
if (e->file != bs->file) {
193
bdrv_delete(e->file);
199
static void vmdk_free_last_extent(BlockDriverState *bs)
201
BDRVVmdkState *s = bs->opaque;
203
if (s->num_extents == 0) {
207
s->extents = g_realloc(s->extents, s->num_extents * sizeof(VmdkExtent));
210
static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
212
char desc[DESC_SIZE];
213
uint32_t cid = 0xffffffff;
214
const char *p_name, *cid_str;
216
BDRVVmdkState *s = bs->opaque;
219
ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
225
cid_str = "parentCID";
226
cid_str_size = sizeof("parentCID");
229
cid_str_size = sizeof("CID");
232
desc[DESC_SIZE - 1] = '\0';
233
p_name = strstr(desc, cid_str);
234
if (p_name != NULL) {
235
p_name += cid_str_size;
236
sscanf(p_name, "%x", &cid);
242
static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
244
char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
245
char *p_name, *tmp_str;
246
BDRVVmdkState *s = bs->opaque;
249
ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
254
desc[DESC_SIZE - 1] = '\0';
255
tmp_str = strstr(desc, "parentCID");
256
if (tmp_str == NULL) {
260
pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
261
p_name = strstr(desc, "CID");
262
if (p_name != NULL) {
263
p_name += sizeof("CID");
264
snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
265
pstrcat(desc, sizeof(desc), tmp_desc);
268
ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE);
276
static int vmdk_is_cid_valid(BlockDriverState *bs)
279
BDRVVmdkState *s = bs->opaque;
280
BlockDriverState *p_bs = bs->backing_hd;
284
cur_pcid = vmdk_read_cid(p_bs, 0);
285
if (s->parent_cid != cur_pcid) {
295
static int vmdk_parent_open(BlockDriverState *bs)
298
char desc[DESC_SIZE + 1];
299
BDRVVmdkState *s = bs->opaque;
302
desc[DESC_SIZE] = '\0';
303
ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
308
p_name = strstr(desc, "parentFileNameHint");
309
if (p_name != NULL) {
312
p_name += sizeof("parentFileNameHint") + 1;
313
end_name = strchr(p_name, '\"');
314
if (end_name == NULL) {
317
if ((end_name - p_name) > sizeof(bs->backing_file) - 1) {
321
pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
327
/* Create and append extent to the extent array. Return the added VmdkExtent
328
* address. return NULL if allocation failed. */
329
static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
330
BlockDriverState *file, bool flat, int64_t sectors,
331
int64_t l1_offset, int64_t l1_backup_offset,
333
int l2_size, unsigned int cluster_sectors)
336
BDRVVmdkState *s = bs->opaque;
338
s->extents = g_realloc(s->extents,
339
(s->num_extents + 1) * sizeof(VmdkExtent));
340
extent = &s->extents[s->num_extents];
343
memset(extent, 0, sizeof(VmdkExtent));
346
extent->sectors = sectors;
347
extent->l1_table_offset = l1_offset;
348
extent->l1_backup_table_offset = l1_backup_offset;
349
extent->l1_size = l1_size;
350
extent->l1_entry_sectors = l2_size * cluster_sectors;
351
extent->l2_size = l2_size;
352
extent->cluster_sectors = cluster_sectors;
354
if (s->num_extents > 1) {
355
extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
357
extent->end_sector = extent->sectors;
359
bs->total_sectors = extent->end_sector;
363
static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent)
368
/* read the L1 table */
369
l1_size = extent->l1_size * sizeof(uint32_t);
370
extent->l1_table = g_malloc(l1_size);
371
ret = bdrv_pread(extent->file,
372
extent->l1_table_offset,
378
for (i = 0; i < extent->l1_size; i++) {
379
le32_to_cpus(&extent->l1_table[i]);
382
if (extent->l1_backup_table_offset) {
383
extent->l1_backup_table = g_malloc(l1_size);
384
ret = bdrv_pread(extent->file,
385
extent->l1_backup_table_offset,
386
extent->l1_backup_table,
391
for (i = 0; i < extent->l1_size; i++) {
392
le32_to_cpus(&extent->l1_backup_table[i]);
397
g_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
400
g_free(extent->l1_backup_table);
402
g_free(extent->l1_table);
406
static int vmdk_open_vmdk3(BlockDriverState *bs,
407
BlockDriverState *file,
415
ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
419
extent = vmdk_add_extent(bs,
421
le32_to_cpu(header.disk_sectors),
422
le32_to_cpu(header.l1dir_offset) << 9,
424
le32_to_cpu(header.granularity));
425
ret = vmdk_init_tables(bs, extent);
427
/* free extent allocated by vmdk_add_extent */
428
vmdk_free_last_extent(bs);
433
static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
434
int64_t desc_offset);
436
static int vmdk_open_vmdk4(BlockDriverState *bs,
437
BlockDriverState *file,
442
uint32_t l1_size, l1_entry_sectors;
445
int64_t l1_backup_offset = 0;
447
ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
451
if (header.capacity == 0 && header.desc_offset) {
452
return vmdk_open_desc_file(bs, flags, header.desc_offset << 9);
454
l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
455
* le64_to_cpu(header.granularity);
456
if (l1_entry_sectors <= 0) {
459
l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
461
if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
462
l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
464
extent = vmdk_add_extent(bs, file, false,
465
le64_to_cpu(header.capacity),
466
le64_to_cpu(header.gd_offset) << 9,
469
le32_to_cpu(header.num_gtes_per_gte),
470
le64_to_cpu(header.granularity));
472
le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
473
extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
474
ret = vmdk_init_tables(bs, extent);
476
/* free extent allocated by vmdk_add_extent */
477
vmdk_free_last_extent(bs);
482
/* find an option value out of descriptor file */
483
static int vmdk_parse_description(const char *desc, const char *opt_name,
484
char *buf, int buf_size)
486
char *opt_pos, *opt_end;
487
const char *end = desc + strlen(desc);
489
opt_pos = strstr(desc, opt_name);
493
/* Skip "=\"" following opt_name */
494
opt_pos += strlen(opt_name) + 2;
495
if (opt_pos >= end) {
499
while (opt_end < end && *opt_end != '"') {
502
if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
505
pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
509
/* Open an extent file and append to bs array */
510
static int vmdk_open_sparse(BlockDriverState *bs,
511
BlockDriverState *file,
516
if (bdrv_pread(file, 0, &magic, sizeof(magic)) != sizeof(magic)) {
520
magic = be32_to_cpu(magic);
523
return vmdk_open_vmdk3(bs, file, flags);
526
return vmdk_open_vmdk4(bs, file, flags);
534
static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
535
const char *desc_file_path)
541
const char *p = desc;
544
char extent_path[PATH_MAX];
545
BlockDriverState *extent_file;
548
/* parse extent line:
549
* RW [size in sectors] FLAT "file-name.vmdk" OFFSET
551
* RW [size in sectors] SPARSE "file-name.vmdk"
554
ret = sscanf(p, "%10s %" SCNd64 " %10s %511s %" SCNd64,
555
access, §ors, type, fname, &flat_offset);
556
if (ret < 4 || strcmp(access, "RW")) {
558
} else if (!strcmp(type, "FLAT")) {
559
if (ret != 5 || flat_offset < 0) {
562
} else if (ret != 4) {
566
/* trim the quotation marks around */
567
if (fname[0] == '"') {
568
memmove(fname, fname + 1, strlen(fname));
569
if (strlen(fname) <= 1 || fname[strlen(fname) - 1] != '"') {
572
fname[strlen(fname) - 1] = '\0';
575
(strcmp(type, "FLAT") && strcmp(type, "SPARSE")) ||
576
(strcmp(access, "RW"))) {
580
path_combine(extent_path, sizeof(extent_path),
581
desc_file_path, fname);
582
ret = bdrv_file_open(&extent_file, extent_path, bs->open_flags);
587
/* save to extents array */
588
if (!strcmp(type, "FLAT")) {
592
extent = vmdk_add_extent(bs, extent_file, true, sectors,
593
0, 0, 0, 0, sectors);
594
extent->flat_start_offset = flat_offset << 9;
595
} else if (!strcmp(type, "SPARSE")) {
597
ret = vmdk_open_sparse(bs, extent_file, bs->open_flags);
599
bdrv_delete(extent_file);
604
"VMDK: Not supported extent type \"%s\""".\n", type);
608
/* move to next line */
609
while (*p && *p != '\n') {
617
static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
623
BDRVVmdkState *s = bs->opaque;
625
ret = bdrv_pread(bs->file, desc_offset, buf, sizeof(buf));
630
if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
633
if (strcmp(ct, "monolithicFlat") &&
634
strcmp(ct, "twoGbMaxExtentSparse") &&
635
strcmp(ct, "twoGbMaxExtentFlat")) {
637
"VMDK: Not supported image type \"%s\""".\n", ct);
641
return vmdk_parse_extents(buf, bs, bs->file->filename);
644
static int vmdk_open(BlockDriverState *bs, int flags)
647
BDRVVmdkState *s = bs->opaque;
649
if (vmdk_open_sparse(bs, bs->file, flags) == 0) {
650
s->desc_offset = 0x200;
652
ret = vmdk_open_desc_file(bs, flags, 0);
657
/* try to open parent images, if exist */
658
ret = vmdk_parent_open(bs);
662
s->parent_cid = vmdk_read_cid(bs, 1);
663
qemu_co_mutex_init(&s->lock);
665
/* Disable migration when VMDK images are used */
666
error_set(&s->migration_blocker,
667
QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
668
"vmdk", bs->device_name, "live migration");
669
migrate_add_blocker(s->migration_blocker);
674
vmdk_free_extents(bs);
678
static int get_whole_cluster(BlockDriverState *bs,
680
uint64_t cluster_offset,
684
/* 128 sectors * 512 bytes each = grain size 64KB */
685
uint8_t whole_grain[extent->cluster_sectors * 512];
687
/* we will be here if it's first write on non-exist grain(cluster).
688
* try to read from parent image, if exist */
689
if (bs->backing_hd) {
692
if (!vmdk_is_cid_valid(bs)) {
696
/* floor offset to cluster */
697
offset -= offset % (extent->cluster_sectors * 512);
698
ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
699
extent->cluster_sectors);
704
/* Write grain only into the active image */
705
ret = bdrv_write(extent->file, cluster_offset, whole_grain,
706
extent->cluster_sectors);
714
static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
716
/* update L2 table */
717
if (bdrv_pwrite_sync(
719
((int64_t)m_data->l2_offset * 512)
720
+ (m_data->l2_index * sizeof(m_data->offset)),
722
sizeof(m_data->offset)
726
/* update backup L2 table */
727
if (extent->l1_backup_table_offset != 0) {
728
m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
729
if (bdrv_pwrite_sync(
731
((int64_t)m_data->l2_offset * 512)
732
+ (m_data->l2_index * sizeof(m_data->offset)),
733
&(m_data->offset), sizeof(m_data->offset)
742
static int get_cluster_offset(BlockDriverState *bs,
744
VmdkMetaData *m_data,
747
uint64_t *cluster_offset)
749
unsigned int l1_index, l2_offset, l2_index;
751
uint32_t min_count, *l2_table, tmp = 0;
757
*cluster_offset = extent->flat_start_offset;
761
offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE;
762
l1_index = (offset >> 9) / extent->l1_entry_sectors;
763
if (l1_index >= extent->l1_size) {
766
l2_offset = extent->l1_table[l1_index];
770
for (i = 0; i < L2_CACHE_SIZE; i++) {
771
if (l2_offset == extent->l2_cache_offsets[i]) {
772
/* increment the hit count */
773
if (++extent->l2_cache_counts[i] == 0xffffffff) {
774
for (j = 0; j < L2_CACHE_SIZE; j++) {
775
extent->l2_cache_counts[j] >>= 1;
778
l2_table = extent->l2_cache + (i * extent->l2_size);
782
/* not found: load a new entry in the least used one */
784
min_count = 0xffffffff;
785
for (i = 0; i < L2_CACHE_SIZE; i++) {
786
if (extent->l2_cache_counts[i] < min_count) {
787
min_count = extent->l2_cache_counts[i];
791
l2_table = extent->l2_cache + (min_index * extent->l2_size);
794
(int64_t)l2_offset * 512,
796
extent->l2_size * sizeof(uint32_t)
797
) != extent->l2_size * sizeof(uint32_t)) {
801
extent->l2_cache_offsets[min_index] = l2_offset;
802
extent->l2_cache_counts[min_index] = 1;
804
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
805
*cluster_offset = le32_to_cpu(l2_table[l2_index]);
807
if (!*cluster_offset) {
812
/* Avoid the L2 tables update for the images that have snapshots. */
813
*cluster_offset = bdrv_getlength(extent->file);
814
if (!extent->compressed) {
817
*cluster_offset + (extent->cluster_sectors << 9)
821
*cluster_offset >>= 9;
822
tmp = cpu_to_le32(*cluster_offset);
823
l2_table[l2_index] = tmp;
825
/* First of all we write grain itself, to avoid race condition
826
* that may to corrupt the image.
827
* This problem may occur because of insufficient space on host disk
828
* or inappropriate VM shutdown.
830
if (get_whole_cluster(
831
bs, extent, *cluster_offset, offset, allocate) == -1) {
836
m_data->offset = tmp;
837
m_data->l1_index = l1_index;
838
m_data->l2_index = l2_index;
839
m_data->l2_offset = l2_offset;
843
*cluster_offset <<= 9;
847
static VmdkExtent *find_extent(BDRVVmdkState *s,
848
int64_t sector_num, VmdkExtent *start_hint)
850
VmdkExtent *extent = start_hint;
853
extent = &s->extents[0];
855
while (extent < &s->extents[s->num_extents]) {
856
if (sector_num < extent->end_sector) {
864
static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
865
int nb_sectors, int *pnum)
867
BDRVVmdkState *s = bs->opaque;
868
int64_t index_in_cluster, n, ret;
872
extent = find_extent(s, sector_num, NULL);
876
ret = get_cluster_offset(bs, extent, NULL,
877
sector_num * 512, 0, &offset);
878
/* get_cluster_offset returning 0 means success */
881
index_in_cluster = sector_num % extent->cluster_sectors;
882
n = extent->cluster_sectors - index_in_cluster;
883
if (n > nb_sectors) {
890
static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
891
int64_t offset_in_cluster, const uint8_t *buf,
892
int nb_sectors, int64_t sector_num)
895
VmdkGrainMarker *data = NULL;
897
const uint8_t *write_buf = buf;
898
int write_len = nb_sectors * 512;
900
if (extent->compressed) {
901
if (!extent->has_marker) {
905
buf_len = (extent->cluster_sectors << 9) * 2;
906
data = g_malloc(buf_len + sizeof(VmdkGrainMarker));
907
if (compress(data->data, &buf_len, buf, nb_sectors << 9) != Z_OK ||
912
data->lba = sector_num;
913
data->size = buf_len;
914
write_buf = (uint8_t *)data;
915
write_len = buf_len + sizeof(VmdkGrainMarker);
917
ret = bdrv_pwrite(extent->file,
918
cluster_offset + offset_in_cluster,
921
if (ret != write_len) {
922
ret = ret < 0 ? ret : -EIO;
931
static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
932
int64_t offset_in_cluster, uint8_t *buf,
936
int cluster_bytes, buf_bytes;
937
uint8_t *cluster_buf, *compressed_data;
940
VmdkGrainMarker *marker;
944
if (!extent->compressed) {
945
ret = bdrv_pread(extent->file,
946
cluster_offset + offset_in_cluster,
947
buf, nb_sectors * 512);
948
if (ret == nb_sectors * 512) {
954
cluster_bytes = extent->cluster_sectors * 512;
955
/* Read two clusters in case GrainMarker + compressed data > one cluster */
956
buf_bytes = cluster_bytes * 2;
957
cluster_buf = g_malloc(buf_bytes);
958
uncomp_buf = g_malloc(cluster_bytes);
959
ret = bdrv_pread(extent->file,
961
cluster_buf, buf_bytes);
965
compressed_data = cluster_buf;
966
buf_len = cluster_bytes;
967
data_len = cluster_bytes;
968
if (extent->has_marker) {
969
marker = (VmdkGrainMarker *)cluster_buf;
970
compressed_data = marker->data;
971
data_len = le32_to_cpu(marker->size);
973
if (!data_len || data_len > buf_bytes) {
977
ret = uncompress(uncomp_buf, &buf_len, compressed_data, data_len);
983
if (offset_in_cluster < 0 ||
984
offset_in_cluster + nb_sectors * 512 > buf_len) {
988
memcpy(buf, uncomp_buf + offset_in_cluster, nb_sectors * 512);
997
static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
998
uint8_t *buf, int nb_sectors)
1000
BDRVVmdkState *s = bs->opaque;
1002
uint64_t n, index_in_cluster;
1003
VmdkExtent *extent = NULL;
1004
uint64_t cluster_offset;
1006
while (nb_sectors > 0) {
1007
extent = find_extent(s, sector_num, extent);
1011
ret = get_cluster_offset(
1013
sector_num << 9, 0, &cluster_offset);
1014
index_in_cluster = sector_num % extent->cluster_sectors;
1015
n = extent->cluster_sectors - index_in_cluster;
1016
if (n > nb_sectors) {
1020
/* if not allocated, try to read from parent image, if exist */
1021
if (bs->backing_hd) {
1022
if (!vmdk_is_cid_valid(bs)) {
1025
ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
1030
memset(buf, 0, 512 * n);
1033
ret = vmdk_read_extent(extent,
1034
cluster_offset, index_in_cluster * 512,
1047
static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
1048
uint8_t *buf, int nb_sectors)
1051
BDRVVmdkState *s = bs->opaque;
1052
qemu_co_mutex_lock(&s->lock);
1053
ret = vmdk_read(bs, sector_num, buf, nb_sectors);
1054
qemu_co_mutex_unlock(&s->lock);
1058
static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
1059
const uint8_t *buf, int nb_sectors)
1061
BDRVVmdkState *s = bs->opaque;
1062
VmdkExtent *extent = NULL;
1064
int64_t index_in_cluster;
1065
uint64_t cluster_offset;
1066
VmdkMetaData m_data;
1068
if (sector_num > bs->total_sectors) {
1070
"(VMDK) Wrong offset: sector_num=0x%" PRIx64
1071
" total_sectors=0x%" PRIx64 "\n",
1072
sector_num, bs->total_sectors);
1076
while (nb_sectors > 0) {
1077
extent = find_extent(s, sector_num, extent);
1081
ret = get_cluster_offset(
1085
sector_num << 9, !extent->compressed,
1087
if (extent->compressed) {
1089
/* Refuse write to allocated cluster for streamOptimized */
1091
"VMDK: can't write to allocated cluster"
1092
" for streamOptimized\n");
1096
ret = get_cluster_offset(
1107
index_in_cluster = sector_num % extent->cluster_sectors;
1108
n = extent->cluster_sectors - index_in_cluster;
1109
if (n > nb_sectors) {
1113
ret = vmdk_write_extent(extent,
1114
cluster_offset, index_in_cluster * 512,
1115
buf, n, sector_num);
1120
/* update L2 tables */
1121
if (vmdk_L2update(extent, &m_data) == -1) {
1129
/* update CID on the first write every time the virtual disk is
1131
if (!s->cid_updated) {
1132
ret = vmdk_write_cid(bs, time(NULL));
1136
s->cid_updated = true;
1142
static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
1143
const uint8_t *buf, int nb_sectors)
1146
BDRVVmdkState *s = bs->opaque;
1147
qemu_co_mutex_lock(&s->lock);
1148
ret = vmdk_write(bs, sector_num, buf, nb_sectors);
1149
qemu_co_mutex_unlock(&s->lock);
1154
static int vmdk_create_extent(const char *filename, int64_t filesize,
1155
bool flat, bool compress)
1160
uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
1164
O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
1170
ret = ftruncate(fd, filesize);
1176
magic = cpu_to_be32(VMDK4_MAGIC);
1177
memset(&header, 0, sizeof(header));
1180
3 | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0);
1181
header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
1182
header.capacity = filesize / 512;
1183
header.granularity = 128;
1184
header.num_gtes_per_gte = 512;
1186
grains = (filesize / 512 + header.granularity - 1) / header.granularity;
1187
gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
1189
(grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
1190
gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
1192
header.desc_offset = 1;
1193
header.desc_size = 20;
1194
header.rgd_offset = header.desc_offset + header.desc_size;
1195
header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
1196
header.grain_offset =
1197
((header.gd_offset + gd_size + (gt_size * gt_count) +
1198
header.granularity - 1) / header.granularity) *
1200
/* swap endianness for all header fields */
1201
header.version = cpu_to_le32(header.version);
1202
header.flags = cpu_to_le32(header.flags);
1203
header.capacity = cpu_to_le64(header.capacity);
1204
header.granularity = cpu_to_le64(header.granularity);
1205
header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte);
1206
header.desc_offset = cpu_to_le64(header.desc_offset);
1207
header.desc_size = cpu_to_le64(header.desc_size);
1208
header.rgd_offset = cpu_to_le64(header.rgd_offset);
1209
header.gd_offset = cpu_to_le64(header.gd_offset);
1210
header.grain_offset = cpu_to_le64(header.grain_offset);
1211
header.compressAlgorithm = cpu_to_le16(header.compressAlgorithm);
1213
header.check_bytes[0] = 0xa;
1214
header.check_bytes[1] = 0x20;
1215
header.check_bytes[2] = 0xd;
1216
header.check_bytes[3] = 0xa;
1218
/* write all the data */
1219
ret = qemu_write_full(fd, &magic, sizeof(magic));
1220
if (ret != sizeof(magic)) {
1224
ret = qemu_write_full(fd, &header, sizeof(header));
1225
if (ret != sizeof(header)) {
1230
ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9);
1236
/* write grain directory */
1237
lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
1238
for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size;
1239
i < gt_count; i++, tmp += gt_size) {
1240
ret = qemu_write_full(fd, &tmp, sizeof(tmp));
1241
if (ret != sizeof(tmp)) {
1247
/* write backup grain directory */
1248
lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
1249
for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_size;
1250
i < gt_count; i++, tmp += gt_size) {
1251
ret = qemu_write_full(fd, &tmp, sizeof(tmp));
1252
if (ret != sizeof(tmp)) {
1264
static int filename_decompose(const char *filename, char *path, char *prefix,
1265
char *postfix, size_t buf_len)
1269
if (filename == NULL || !strlen(filename)) {
1270
fprintf(stderr, "Vmdk: no filename provided.\n");
1273
p = strrchr(filename, '/');
1275
p = strrchr(filename, '\\');
1278
p = strrchr(filename, ':');
1282
if (p - filename >= buf_len) {
1285
pstrcpy(path, p - filename + 1, filename);
1290
q = strrchr(p, '.');
1292
pstrcpy(prefix, buf_len, p);
1295
if (q - p >= buf_len) {
1298
pstrcpy(prefix, q - p + 1, p);
1299
pstrcpy(postfix, buf_len, q);
1304
static int relative_path(char *dest, int dest_size,
1305
const char *base, const char *target)
1311
const char *sep = "\\";
1313
const char *sep = "/";
1316
if (!(dest && base && target)) {
1319
if (path_is_absolute(target)) {
1320
dest[dest_size - 1] = '\0';
1321
strncpy(dest, target, dest_size - 1);
1324
while (base[i] == target[i]) {
1337
pstrcat(dest, dest_size, "..");
1338
pstrcat(dest, dest_size, sep);
1340
pstrcat(dest, dest_size, q);
1344
static int vmdk_create(const char *filename, QEMUOptionParameter *options)
1347
char desc[BUF_SIZE];
1348
int64_t total_size = 0, filesize;
1349
const char *backing_file = NULL;
1350
const char *fmt = NULL;
1353
bool flat, split, compress;
1354
char ext_desc_lines[BUF_SIZE] = "";
1355
char path[PATH_MAX], prefix[PATH_MAX], postfix[PATH_MAX];
1356
const int64_t split_size = 0x80000000; /* VMDK has constant split size */
1357
const char *desc_extent_line;
1358
char parent_desc_line[BUF_SIZE] = "";
1359
uint32_t parent_cid = 0xffffffff;
1360
const char desc_template[] =
1361
"# Disk DescriptorFile\n"
1365
"createType=\"%s\"\n"
1368
"# Extent description\n"
1371
"# The Disk Data Base\n"
1374
"ddb.virtualHWVersion = \"%d\"\n"
1375
"ddb.geometry.cylinders = \"%" PRId64 "\"\n"
1376
"ddb.geometry.heads = \"16\"\n"
1377
"ddb.geometry.sectors = \"63\"\n"
1378
"ddb.adapterType = \"ide\"\n";
1380
if (filename_decompose(filename, path, prefix, postfix, PATH_MAX)) {
1383
/* Read out options */
1384
while (options && options->name) {
1385
if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
1386
total_size = options->value.n;
1387
} else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
1388
backing_file = options->value.s;
1389
} else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
1390
flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0;
1391
} else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) {
1392
fmt = options->value.s;
1397
/* Default format to monolithicSparse */
1398
fmt = "monolithicSparse";
1399
} else if (strcmp(fmt, "monolithicFlat") &&
1400
strcmp(fmt, "monolithicSparse") &&
1401
strcmp(fmt, "twoGbMaxExtentSparse") &&
1402
strcmp(fmt, "twoGbMaxExtentFlat") &&
1403
strcmp(fmt, "streamOptimized")) {
1404
fprintf(stderr, "VMDK: Unknown subformat: %s\n", fmt);
1407
split = !(strcmp(fmt, "twoGbMaxExtentFlat") &&
1408
strcmp(fmt, "twoGbMaxExtentSparse"));
1409
flat = !(strcmp(fmt, "monolithicFlat") &&
1410
strcmp(fmt, "twoGbMaxExtentFlat"));
1411
compress = !strcmp(fmt, "streamOptimized");
1413
desc_extent_line = "RW %lld FLAT \"%s\" 0\n";
1415
desc_extent_line = "RW %lld SPARSE \"%s\"\n";
1417
if (flat && backing_file) {
1418
/* not supporting backing file for flat image */
1422
char parent_filename[PATH_MAX];
1423
BlockDriverState *bs = bdrv_new("");
1424
ret = bdrv_open(bs, backing_file, 0, NULL);
1429
if (strcmp(bs->drv->format_name, "vmdk")) {
1433
parent_cid = vmdk_read_cid(bs, 0);
1435
relative_path(parent_filename, sizeof(parent_filename),
1436
filename, backing_file);
1437
snprintf(parent_desc_line, sizeof(parent_desc_line),
1438
"parentFileNameHint=\"%s\"", parent_filename);
1441
/* Create extents */
1442
filesize = total_size;
1443
while (filesize > 0) {
1444
char desc_line[BUF_SIZE];
1445
char ext_filename[PATH_MAX];
1446
char desc_filename[PATH_MAX];
1447
int64_t size = filesize;
1449
if (split && size > split_size) {
1453
snprintf(desc_filename, sizeof(desc_filename), "%s-%c%03d%s",
1454
prefix, flat ? 'f' : 's', ++idx, postfix);
1456
snprintf(desc_filename, sizeof(desc_filename), "%s-flat%s",
1459
snprintf(desc_filename, sizeof(desc_filename), "%s%s",
1462
snprintf(ext_filename, sizeof(ext_filename), "%s%s",
1463
path, desc_filename);
1465
if (vmdk_create_extent(ext_filename, size, flat, compress)) {
1470
/* Format description line */
1471
snprintf(desc_line, sizeof(desc_line),
1472
desc_extent_line, size / 512, desc_filename);
1473
pstrcat(ext_desc_lines, sizeof(ext_desc_lines), desc_line);
1475
/* generate descriptor file */
1476
snprintf(desc, sizeof(desc), desc_template,
1477
(unsigned int)time(NULL),
1482
(flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
1483
total_size / (int64_t)(63 * 16 * 512));
1484
if (split || flat) {
1487
O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
1492
O_WRONLY | O_BINARY | O_LARGEFILE,
1498
/* the descriptor offset = 0x200 */
1499
if (!split && !flat && 0x200 != lseek(fd, 0x200, SEEK_SET)) {
1503
ret = qemu_write_full(fd, desc, strlen(desc));
1504
if (ret != strlen(desc)) {
1514
static void vmdk_close(BlockDriverState *bs)
1516
BDRVVmdkState *s = bs->opaque;
1518
vmdk_free_extents(bs);
1520
migrate_del_blocker(s->migration_blocker);
1521
error_free(s->migration_blocker);
1524
static coroutine_fn int vmdk_co_flush(BlockDriverState *bs)
1527
BDRVVmdkState *s = bs->opaque;
1529
ret = bdrv_co_flush(bs->file);
1530
for (i = 0; i < s->num_extents; i++) {
1531
err = bdrv_co_flush(s->extents[i].file);
1539
static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
1544
BDRVVmdkState *s = bs->opaque;
1546
ret = bdrv_get_allocated_file_size(bs->file);
1550
for (i = 0; i < s->num_extents; i++) {
1551
if (s->extents[i].file == bs->file) {
1554
r = bdrv_get_allocated_file_size(s->extents[i].file);
1563
static QEMUOptionParameter vmdk_create_options[] = {
1565
.name = BLOCK_OPT_SIZE,
1567
.help = "Virtual disk size"
1570
.name = BLOCK_OPT_BACKING_FILE,
1572
.help = "File name of a base image"
1575
.name = BLOCK_OPT_COMPAT6,
1577
.help = "VMDK version 6 image"
1580
.name = BLOCK_OPT_SUBFMT,
1583
"VMDK flat extent format, can be one of "
1584
"{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
1589
static BlockDriver bdrv_vmdk = {
1590
.format_name = "vmdk",
1591
.instance_size = sizeof(BDRVVmdkState),
1592
.bdrv_probe = vmdk_probe,
1593
.bdrv_open = vmdk_open,
1594
.bdrv_read = vmdk_co_read,
1595
.bdrv_write = vmdk_co_write,
1596
.bdrv_close = vmdk_close,
1597
.bdrv_create = vmdk_create,
1598
.bdrv_co_flush_to_disk = vmdk_co_flush,
1599
.bdrv_is_allocated = vmdk_is_allocated,
1600
.bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
1602
.create_options = vmdk_create_options,
1605
static void bdrv_vmdk_init(void)
1607
bdrv_register(&bdrv_vmdk);
1610
block_init(bdrv_vmdk_init);