vmdk.c 25.6 KB
Newer Older
B
bellard 已提交
1 2
/*
 * Block driver for the VMDK format
3
 *
B
bellard 已提交
4
 * Copyright (c) 2004 Fabrice Bellard
5
 * Copyright (c) 2005 Filip Navara
6
 *
B
bellard 已提交
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
25

P
pbrook 已提交
26
#include "qemu-common.h"
B
bellard 已提交
27
#include "block_int.h"
28
#include "module.h"
B
bellard 已提交
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58

#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')

typedef struct {
    uint32_t version;
    uint32_t flags;
    uint32_t disk_sectors;
    uint32_t granularity;
    uint32_t l1dir_offset;
    uint32_t l1dir_size;
    uint32_t file_sectors;
    uint32_t cylinders;
    uint32_t heads;
    uint32_t sectors_per_track;
} VMDK3Header;

typedef struct {
    uint32_t version;
    uint32_t flags;
    int64_t capacity;
    int64_t granularity;
    int64_t desc_offset;
    int64_t desc_size;
    int32_t num_gtes_per_gte;
    int64_t rgd_offset;
    int64_t gd_offset;
    int64_t grain_offset;
    char filler[1];
    char check_bytes[4];
59
} __attribute__((packed)) VMDK4Header;
B
bellard 已提交
60 61 62 63

#define L2_CACHE_SIZE 16

typedef struct BDRVVmdkState {
64
    BlockDriverState *hd;
B
bellard 已提交
65
    int64_t l1_table_offset;
66
    int64_t l1_backup_table_offset;
B
bellard 已提交
67
    uint32_t *l1_table;
68
    uint32_t *l1_backup_table;
B
bellard 已提交
69 70 71 72 73 74 75 76 77
    unsigned int l1_size;
    uint32_t l1_entry_sectors;

    unsigned int l2_size;
    uint32_t *l2_cache;
    uint32_t l2_cache_offsets[L2_CACHE_SIZE];
    uint32_t l2_cache_counts[L2_CACHE_SIZE];

    unsigned int cluster_sectors;
78
    uint32_t parent_cid;
B
bellard 已提交
79 80
} BDRVVmdkState;

81 82 83 84 85 86 87 88
typedef struct VmdkMetaData {
    uint32_t offset;
    unsigned int l1_index;
    unsigned int l2_index;
    unsigned int l2_offset;
    int valid;
} VmdkMetaData;

B
bellard 已提交
89 90 91 92 93 94 95 96 97 98 99 100 101 102
static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
{
    uint32_t magic;

    if (buf_size < 4)
        return 0;
    magic = be32_to_cpu(*(uint32_t *)buf);
    if (magic == VMDK3_MAGIC ||
        magic == VMDK4_MAGIC)
        return 100;
    else
        return 0;
}

103 104
#define CHECK_CID 1

105
#define SECTOR_SIZE 512
106
#define DESC_SIZE 20*SECTOR_SIZE	// 20 sectors of 512 bytes each
107
#define HEADER_SIZE 512   			// first sector of 512 bytes
108 109

static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
B
bellard 已提交
110 111
{
    BDRVVmdkState *s = bs->opaque;
112 113
    char desc[DESC_SIZE];
    uint32_t cid;
114
    const char *p_name, *cid_str;
115 116 117 118 119 120 121 122 123 124 125 126 127 128
    size_t cid_str_size;

    /* the descriptor offset = 0x200 */
    if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
        return 0;

    if (parent) {
        cid_str = "parentCID";
        cid_str_size = sizeof("parentCID");
    } else {
        cid_str = "CID";
        cid_str_size = sizeof("CID");
    }

129
    if ((p_name = strstr(desc,cid_str)) != NULL) {
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
        p_name += cid_str_size;
        sscanf(p_name,"%x",&cid);
    }

    return cid;
}

static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
{
    BDRVVmdkState *s = bs->opaque;
    char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
    char *p_name, *tmp_str;

    /* the descriptor offset = 0x200 */
    if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
        return -1;

    tmp_str = strstr(desc,"parentCID");
B
blueswir1 已提交
148
    pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
149
    if ((p_name = strstr(desc,"CID")) != NULL) {
150
        p_name += sizeof("CID");
B
blueswir1 已提交
151 152
        snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
        pstrcat(desc, sizeof(desc), tmp_desc);
153 154 155 156 157 158 159 160 161 162 163
    }

    if (bdrv_pwrite(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
        return -1;
    return 0;
}

static int vmdk_is_cid_valid(BlockDriverState *bs)
{
#ifdef CHECK_CID
    BDRVVmdkState *s = bs->opaque;
K
Kevin Wolf 已提交
164
    BlockDriverState *p_bs = bs->backing_hd;
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
    uint32_t cur_pcid;

    if (p_bs) {
        cur_pcid = vmdk_read_cid(p_bs,0);
        if (s->parent_cid != cur_pcid)
            // CID not valid
            return 0;
    }
#endif
    // CID valid
    return 1;
}

static int vmdk_snapshot_create(const char *filename, const char *backing_file)
{
    int snp_fd, p_fd;
181
    int ret;
182
    uint32_t p_cid;
183
    char *p_name, *gd_buf, *rgd_buf;
184 185 186 187 188
    const char *real_filename, *temp_str;
    VMDK4Header header;
    uint32_t gde_entries, gd_size;
    int64_t gd_offset, rgd_offset, capacity, gt_size;
    char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
189
    static const char desc_template[] =
190 191 192 193 194 195 196 197
    "# Disk DescriptorFile\n"
    "version=1\n"
    "CID=%x\n"
    "parentCID=%x\n"
    "createType=\"monolithicSparse\"\n"
    "parentFileNameHint=\"%s\"\n"
    "\n"
    "# Extent description\n"
198
    "RW %u SPARSE \"%s\"\n"
199 200 201 202 203 204 205
    "\n"
    "# The Disk Data Base \n"
    "#DDB\n"
    "\n";

    snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644);
    if (snp_fd < 0)
206
        return -errno;
207 208 209
    p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
    if (p_fd < 0) {
        close(snp_fd);
210
        return -errno;
211 212 213
    }

    /* read the header */
214 215
    if (lseek(p_fd, 0x0, SEEK_SET) == -1) {
        ret = -errno;
216
        goto fail;
217 218 219
    }
    if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE) {
        ret = -errno;
220
        goto fail;
221
    }
222 223

    /* write the header */
224 225
    if (lseek(snp_fd, 0x0, SEEK_SET) == -1) {
        ret = -errno;
226
        goto fail;
227 228 229
    }
    if (write(snp_fd, hdr, HEADER_SIZE) == -1) {
        ret = -errno;
230
        goto fail;
231
    }
232 233 234 235

    memset(&header, 0, sizeof(header));
    memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC

236 237
    if (ftruncate(snp_fd, header.grain_offset << 9)) {
        ret = -errno;
238
        goto fail;
239
    }
240
    /* the descriptor offset = 0x200 */
241 242
    if (lseek(p_fd, 0x200, SEEK_SET) == -1) {
        ret = -errno;
243
        goto fail;
244 245 246
    }
    if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE) {
        ret = -errno;
247
        goto fail;
248
    }
249

250
    if ((p_name = strstr(p_desc,"CID")) != NULL) {
251 252 253 254 255 256 257 258 259 260 261 262
        p_name += sizeof("CID");
        sscanf(p_name,"%x",&p_cid);
    }

    real_filename = filename;
    if ((temp_str = strrchr(real_filename, '\\')) != NULL)
        real_filename = temp_str + 1;
    if ((temp_str = strrchr(real_filename, '/')) != NULL)
        real_filename = temp_str + 1;
    if ((temp_str = strrchr(real_filename, ':')) != NULL)
        real_filename = temp_str + 1;

B
blueswir1 已提交
263 264
    snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file,
             (uint32_t)header.capacity, real_filename);
265 266

    /* write the descriptor */
267 268
    if (lseek(snp_fd, 0x200, SEEK_SET) == -1) {
        ret = -errno;
269
        goto fail;
270 271 272
    }
    if (write(snp_fd, s_desc, strlen(s_desc)) == -1) {
        ret = -errno;
273
        goto fail;
274
    }
B
bellard 已提交
275

276 277 278 279 280 281 282 283
    gd_offset = header.gd_offset * SECTOR_SIZE;     // offset of GD table
    rgd_offset = header.rgd_offset * SECTOR_SIZE;   // offset of RGD table
    capacity = header.capacity * SECTOR_SIZE;       // Extent size
    /*
     * Each GDE span 32M disk, means:
     * 512 GTE per GT, each GTE points to grain
     */
    gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
284 285
    if (!gt_size) {
        ret = -EINVAL;
286
        goto fail;
287
    }
288
    gde_entries = (uint32_t)(capacity / gt_size);  // number of gde/rgde
289 290 291 292
    gd_size = gde_entries * sizeof(uint32_t);

    /* write RGD */
    rgd_buf = qemu_malloc(gd_size);
293 294
    if (lseek(p_fd, rgd_offset, SEEK_SET) == -1) {
        ret = -errno;
295
        goto fail_rgd;
296 297 298
    }
    if (read(p_fd, rgd_buf, gd_size) != gd_size) {
        ret = -errno;
299
        goto fail_rgd;
300 301 302
    }
    if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1) {
        ret = -errno;
303
        goto fail_rgd;
304 305 306
    }
    if (write(snp_fd, rgd_buf, gd_size) == -1) {
        ret = -errno;
307
        goto fail_rgd;
308
    }
309 310 311

    /* write GD */
    gd_buf = qemu_malloc(gd_size);
312 313
    if (lseek(p_fd, gd_offset, SEEK_SET) == -1) {
        ret = -errno;
314
        goto fail_gd;
315 316 317
    }
    if (read(p_fd, gd_buf, gd_size) != gd_size) {
        ret = -errno;
318
        goto fail_gd;
319 320 321
    }
    if (lseek(snp_fd, gd_offset, SEEK_SET) == -1) {
        ret = -errno;
322
        goto fail_gd;
323 324 325
    }
    if (write(snp_fd, gd_buf, gd_size) == -1) {
        ret = -errno;
326
        goto fail_gd;
327
    }
J
Juan Quintela 已提交
328
    ret = 0;
329

J
Juan Quintela 已提交
330
fail_gd:
331
    qemu_free(gd_buf);
J
Juan Quintela 已提交
332
fail_rgd:
333
    qemu_free(rgd_buf);
J
Juan Quintela 已提交
334
fail:
335 336
    close(p_fd);
    close(snp_fd);
337
    return ret;
338 339
}

340
static int vmdk_parent_open(BlockDriverState *bs)
341 342
{
    BDRVVmdkState *s = bs->opaque;
343
    char *p_name;
344 345 346 347 348 349
    char desc[DESC_SIZE];

    /* the descriptor offset = 0x200 */
    if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
        return -1;

350
    if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) {
351 352 353
        char *end_name;

        p_name += sizeof("parentFileNameHint") + 1;
354
        if ((end_name = strchr(p_name,'\"')) == NULL)
355
            return -1;
K
Kevin Wolf 已提交
356
        if ((end_name - p_name) > sizeof (bs->backing_file) - 1)
357
            return -1;
358

K
Kevin Wolf 已提交
359
        pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
360
    }
361 362 363 364 365 366 367 368 369 370

    return 0;
}

static int vmdk_open(BlockDriverState *bs, const char *filename, int flags)
{
    BDRVVmdkState *s = bs->opaque;
    uint32_t magic;
    int l1_size, i, ret;

371
    ret = bdrv_file_open(&s->hd, filename, flags);
372 373 374
    if (ret < 0)
        return ret;
    if (bdrv_pread(s->hd, 0, &magic, sizeof(magic)) != sizeof(magic))
B
bellard 已提交
375
        goto fail;
376

B
bellard 已提交
377
    magic = be32_to_cpu(magic);
B
bellard 已提交
378 379
    if (magic == VMDK3_MAGIC) {
        VMDK3Header header;
380 381

        if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header))
B
bellard 已提交
382 383 384 385 386
            goto fail;
        s->cluster_sectors = le32_to_cpu(header.granularity);
        s->l2_size = 1 << 9;
        s->l1_size = 1 << 6;
        bs->total_sectors = le32_to_cpu(header.disk_sectors);
387 388
        s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
        s->l1_backup_table_offset = 0;
B
bellard 已提交
389 390 391
        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
    } else if (magic == VMDK4_MAGIC) {
        VMDK4Header header;
392 393

        if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header))
B
bellard 已提交
394
            goto fail;
395 396
        bs->total_sectors = le64_to_cpu(header.capacity);
        s->cluster_sectors = le64_to_cpu(header.granularity);
B
bellard 已提交
397 398 399 400
        s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
        if (s->l1_entry_sectors <= 0)
            goto fail;
401
        s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1)
B
bellard 已提交
402
            / s->l1_entry_sectors;
403 404
        s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
        s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
405 406

        // try to open parent images, if exist
407
        if (vmdk_parent_open(bs) != 0)
408 409 410
            goto fail;
        // write the CID once after the image creation
        s->parent_cid = vmdk_read_cid(bs,1);
B
bellard 已提交
411 412 413
    } else {
        goto fail;
    }
414

B
bellard 已提交
415 416 417
    /* read the L1 table */
    l1_size = s->l1_size * sizeof(uint32_t);
    s->l1_table = qemu_malloc(l1_size);
418
    if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, l1_size) != l1_size)
B
bellard 已提交
419 420 421 422 423
        goto fail;
    for(i = 0; i < s->l1_size; i++) {
        le32_to_cpus(&s->l1_table[i]);
    }

424 425
    if (s->l1_backup_table_offset) {
        s->l1_backup_table = qemu_malloc(l1_size);
426
        if (bdrv_pread(s->hd, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size)
427 428 429 430 431 432
            goto fail;
        for(i = 0; i < s->l1_size; i++) {
            le32_to_cpus(&s->l1_backup_table[i]);
        }
    }

B
bellard 已提交
433 434 435
    s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
    return 0;
 fail:
436
    qemu_free(s->l1_backup_table);
B
bellard 已提交
437 438
    qemu_free(s->l1_table);
    qemu_free(s->l2_cache);
439
    bdrv_delete(s->hd);
B
bellard 已提交
440 441 442
    return -1;
}

443 444
static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
                                   uint64_t offset, int allocate);
445 446 447 448 449 450 451 452 453

static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
                             uint64_t offset, int allocate)
{
    BDRVVmdkState *s = bs->opaque;
    uint8_t  whole_grain[s->cluster_sectors*512];        // 128 sectors * 512 bytes each = grain size 64KB

    // we will be here if it's first write on non-exist grain(cluster).
    // try to read from parent image, if exist
K
Kevin Wolf 已提交
454
    if (bs->backing_hd) {
K
Kevin Wolf 已提交
455
        int ret;
456 457 458 459

        if (!vmdk_is_cid_valid(bs))
            return -1;

K
Kevin Wolf 已提交
460 461 462 463 464
        ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
            s->cluster_sectors);
        if (ret < 0) {
            return -1;
        }
465

K
Kevin Wolf 已提交
466 467 468 469 470
        //Write grain only into the active image
        ret = bdrv_write(s->hd, cluster_offset, whole_grain,
            s->cluster_sectors);
        if (ret < 0) {
            return -1;
471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488
        }
    }
    return 0;
}

static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
{
    BDRVVmdkState *s = bs->opaque;

    /* update L2 table */
    if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
                    &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
        return -1;
    /* update backup L2 table */
    if (s->l1_backup_table_offset != 0) {
        m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
        if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
                        &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
489 490
            return -1;
    }
491

492 493 494
    return 0;
}

495
static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
496
                                   uint64_t offset, int allocate)
B
bellard 已提交
497 498 499 500
{
    BDRVVmdkState *s = bs->opaque;
    unsigned int l1_index, l2_offset, l2_index;
    int min_index, i, j;
501
    uint32_t min_count, *l2_table, tmp = 0;
B
bellard 已提交
502
    uint64_t cluster_offset;
503 504 505 506

    if (m_data)
        m_data->valid = 0;

B
bellard 已提交
507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534
    l1_index = (offset >> 9) / s->l1_entry_sectors;
    if (l1_index >= s->l1_size)
        return 0;
    l2_offset = s->l1_table[l1_index];
    if (!l2_offset)
        return 0;
    for(i = 0; i < L2_CACHE_SIZE; i++) {
        if (l2_offset == s->l2_cache_offsets[i]) {
            /* increment the hit count */
            if (++s->l2_cache_counts[i] == 0xffffffff) {
                for(j = 0; j < L2_CACHE_SIZE; j++) {
                    s->l2_cache_counts[j] >>= 1;
                }
            }
            l2_table = s->l2_cache + (i * s->l2_size);
            goto found;
        }
    }
    /* not found: load a new entry in the least used one */
    min_index = 0;
    min_count = 0xffffffff;
    for(i = 0; i < L2_CACHE_SIZE; i++) {
        if (s->l2_cache_counts[i] < min_count) {
            min_count = s->l2_cache_counts[i];
            min_index = i;
        }
    }
    l2_table = s->l2_cache + (min_index * s->l2_size);
535
    if (bdrv_pread(s->hd, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) !=
536
                                                                        s->l2_size * sizeof(uint32_t))
B
bellard 已提交
537
        return 0;
538

B
bellard 已提交
539 540 541 542 543
    s->l2_cache_offsets[min_index] = l2_offset;
    s->l2_cache_counts[min_index] = 1;
 found:
    l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
    cluster_offset = le32_to_cpu(l2_table[l2_index]);
544

545 546 547
    if (!cluster_offset) {
        if (!allocate)
            return 0;
548

549
        // Avoid the L2 tables update for the images that have snapshots.
550 551 552 553 554 555
        cluster_offset = bdrv_getlength(s->hd);
        bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9));

        cluster_offset >>= 9;
        tmp = cpu_to_le32(cluster_offset);
        l2_table[l2_index] = tmp;
556 557 558 559 560 561

        /* First of all we write grain itself, to avoid race condition
         * that may to corrupt the image.
         * This problem may occur because of insufficient space on host disk
         * or inappropriate VM shutdown.
         */
562 563
        if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
            return 0;
564 565 566 567 568 569 570 571

        if (m_data) {
            m_data->offset = tmp;
            m_data->l1_index = l1_index;
            m_data->l2_index = l2_index;
            m_data->l2_offset = l2_offset;
            m_data->valid = 1;
        }
572
    }
B
bellard 已提交
573 574 575 576
    cluster_offset <<= 9;
    return cluster_offset;
}

577
static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
B
bellard 已提交
578 579 580 581 582 583
                             int nb_sectors, int *pnum)
{
    BDRVVmdkState *s = bs->opaque;
    int index_in_cluster, n;
    uint64_t cluster_offset;

584
    cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
B
bellard 已提交
585 586 587 588 589 590 591 592
    index_in_cluster = sector_num % s->cluster_sectors;
    n = s->cluster_sectors - index_in_cluster;
    if (n > nb_sectors)
        n = nb_sectors;
    *pnum = n;
    return (cluster_offset != 0);
}

593
static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
B
bellard 已提交
594 595 596
                    uint8_t *buf, int nb_sectors)
{
    BDRVVmdkState *s = bs->opaque;
597
    int index_in_cluster, n, ret;
B
bellard 已提交
598
    uint64_t cluster_offset;
599

B
bellard 已提交
600
    while (nb_sectors > 0) {
601
        cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
B
bellard 已提交
602 603 604 605 606
        index_in_cluster = sector_num % s->cluster_sectors;
        n = s->cluster_sectors - index_in_cluster;
        if (n > nb_sectors)
            n = nb_sectors;
        if (!cluster_offset) {
607
            // try to read from parent image, if exist
K
Kevin Wolf 已提交
608
            if (bs->backing_hd) {
609 610
                if (!vmdk_is_cid_valid(bs))
                    return -1;
K
Kevin Wolf 已提交
611
                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
612 613 614 615 616
                if (ret < 0)
                    return -1;
            } else {
                memset(buf, 0, 512 * n);
            }
B
bellard 已提交
617
        } else {
618
            if(bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
B
bellard 已提交
619 620 621 622 623 624 625 626 627
                return -1;
        }
        nb_sectors -= n;
        sector_num += n;
        buf += n * 512;
    }
    return 0;
}

628
static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
B
bellard 已提交
629 630
                     const uint8_t *buf, int nb_sectors)
{
631
    BDRVVmdkState *s = bs->opaque;
632
    VmdkMetaData m_data;
633
    int index_in_cluster, n;
634
    uint64_t cluster_offset;
635
    static int cid_update = 0;
636

637 638
    if (sector_num > bs->total_sectors) {
        fprintf(stderr,
639 640
                "(VMDK) Wrong offset: sector_num=0x%" PRIx64
                " total_sectors=0x%" PRIx64 "\n",
641 642 643 644
                sector_num, bs->total_sectors);
        return -1;
    }

645 646 647 648 649
    while (nb_sectors > 0) {
        index_in_cluster = sector_num & (s->cluster_sectors - 1);
        n = s->cluster_sectors - index_in_cluster;
        if (n > nb_sectors)
            n = nb_sectors;
650
        cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
651 652
        if (!cluster_offset)
            return -1;
653

654
        if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
655
            return -1;
656 657 658 659 660
        if (m_data.valid) {
            /* update L2 tables */
            if (vmdk_L2update(bs, &m_data) == -1)
                return -1;
        }
661 662 663
        nb_sectors -= n;
        sector_num += n;
        buf += n * 512;
664 665 666 667 668 669

        // update CID on the first write every time the virtual disk is opened
        if (!cid_update) {
            vmdk_write_cid(bs, time(NULL));
            cid_update++;
        }
670 671
    }
    return 0;
B
bellard 已提交
672 673
}

674
static int vmdk_create(const char *filename, QEMUOptionParameter *options)
675 676 677 678
{
    int fd, i;
    VMDK4Header header;
    uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
679
    static const char desc_template[] =
680 681 682 683 684 685 686
        "# Disk DescriptorFile\n"
        "version=1\n"
        "CID=%x\n"
        "parentCID=ffffffff\n"
        "createType=\"monolithicSparse\"\n"
        "\n"
        "# Extent description\n"
687
        "RW %" PRId64 " SPARSE \"%s\"\n"
688 689 690 691
        "\n"
        "# The Disk Data Base \n"
        "#DDB\n"
        "\n"
692
        "ddb.virtualHWVersion = \"%d\"\n"
693
        "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
694 695 696 697 698
        "ddb.geometry.heads = \"16\"\n"
        "ddb.geometry.sectors = \"63\"\n"
        "ddb.adapterType = \"ide\"\n";
    char desc[1024];
    const char *real_filename, *temp_str;
699 700 701
    int64_t total_size = 0;
    const char *backing_file = NULL;
    int flags = 0;
702
    int ret;
703 704 705 706 707 708 709 710 711 712 713 714

    // Read out options
    while (options && options->name) {
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
            total_size = options->value.n / 512;
        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
            backing_file = options->value.s;
        } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
            flags |= options->value.n ? BLOCK_FLAG_COMPAT6: 0;
        }
        options++;
    }
715 716

    /* XXX: add support for backing file */
717 718 719
    if (backing_file) {
        return vmdk_snapshot_create(filename, backing_file);
    }
720 721 722 723

    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
              0644);
    if (fd < 0)
J
Juan Quintela 已提交
724
        return -errno;
725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756
    magic = cpu_to_be32(VMDK4_MAGIC);
    memset(&header, 0, sizeof(header));
    header.version = cpu_to_le32(1);
    header.flags = cpu_to_le32(3); /* ?? */
    header.capacity = cpu_to_le64(total_size);
    header.granularity = cpu_to_le64(128);
    header.num_gtes_per_gte = cpu_to_le32(512);

    grains = (total_size + header.granularity - 1) / header.granularity;
    gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
    gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
    gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;

    header.desc_offset = 1;
    header.desc_size = 20;
    header.rgd_offset = header.desc_offset + header.desc_size;
    header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
    header.grain_offset =
       ((header.gd_offset + gd_size + (gt_size * gt_count) +
         header.granularity - 1) / header.granularity) *
        header.granularity;

    header.desc_offset = cpu_to_le64(header.desc_offset);
    header.desc_size = cpu_to_le64(header.desc_size);
    header.rgd_offset = cpu_to_le64(header.rgd_offset);
    header.gd_offset = cpu_to_le64(header.gd_offset);
    header.grain_offset = cpu_to_le64(header.grain_offset);

    header.check_bytes[0] = 0xa;
    header.check_bytes[1] = 0x20;
    header.check_bytes[2] = 0xd;
    header.check_bytes[3] = 0xa;
757 758

    /* write all the data */
759 760
    ret = qemu_write_full(fd, &magic, sizeof(magic));
    if (ret != sizeof(magic)) {
J
Juan Quintela 已提交
761
        ret = -errno;
762 763 764 765
        goto exit;
    }
    ret = qemu_write_full(fd, &header, sizeof(header));
    if (ret != sizeof(header)) {
J
Juan Quintela 已提交
766
        ret = -errno;
767 768
        goto exit;
    }
769

770 771
    ret = ftruncate(fd, header.grain_offset << 9);
    if (ret < 0) {
J
Juan Quintela 已提交
772
        ret = -errno;
773 774
        goto exit;
    }
775 776 777 778

    /* write grain directory */
    lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
    for (i = 0, tmp = header.rgd_offset + gd_size;
779 780 781
         i < gt_count; i++, tmp += gt_size) {
        ret = qemu_write_full(fd, &tmp, sizeof(tmp));
        if (ret != sizeof(tmp)) {
J
Juan Quintela 已提交
782
            ret = -errno;
783 784 785
            goto exit;
        }
    }
786

787 788 789
    /* write backup grain directory */
    lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
    for (i = 0, tmp = header.gd_offset + gd_size;
790 791 792
         i < gt_count; i++, tmp += gt_size) {
        ret = qemu_write_full(fd, &tmp, sizeof(tmp));
        if (ret != sizeof(tmp)) {
J
Juan Quintela 已提交
793
            ret = -errno;
794 795 796
            goto exit;
        }
    }
797 798 799 800 801 802 803 804 805

    /* compose the descriptor */
    real_filename = filename;
    if ((temp_str = strrchr(real_filename, '\\')) != NULL)
        real_filename = temp_str + 1;
    if ((temp_str = strrchr(real_filename, '/')) != NULL)
        real_filename = temp_str + 1;
    if ((temp_str = strrchr(real_filename, ':')) != NULL)
        real_filename = temp_str + 1;
806
    snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
807 808 809
             total_size, real_filename,
             (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
             total_size / (int64_t)(63 * 16));
810 811 812

    /* write the descriptor */
    lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
813 814
    ret = qemu_write_full(fd, desc, strlen(desc));
    if (ret != strlen(desc)) {
J
Juan Quintela 已提交
815
        ret = -errno;
816 817
        goto exit;
    }
818

819 820
    ret = 0;
exit:
821
    close(fd);
822
    return ret;
823 824
}

B
bellard 已提交
825
static void vmdk_close(BlockDriverState *bs)
B
bellard 已提交
826 827
{
    BDRVVmdkState *s = bs->opaque;
828

B
bellard 已提交
829 830
    qemu_free(s->l1_table);
    qemu_free(s->l2_cache);
831
    bdrv_delete(s->hd);
B
bellard 已提交
832 833
}

P
pbrook 已提交
834 835 836
static void vmdk_flush(BlockDriverState *bs)
{
    BDRVVmdkState *s = bs->opaque;
837
    bdrv_flush(s->hd);
P
pbrook 已提交
838 839
}

840 841

static QEMUOptionParameter vmdk_create_options[] = {
842 843 844 845 846 847 848 849 850 851 852 853 854 855 856
    {
        .name = BLOCK_OPT_SIZE,
        .type = OPT_SIZE,
        .help = "Virtual disk size"
    },
    {
        .name = BLOCK_OPT_BACKING_FILE,
        .type = OPT_STRING,
        .help = "File name of a base image"
    },
    {
        .name = BLOCK_OPT_COMPAT6,
        .type = OPT_FLAG,
        .help = "VMDK version 6 image"
    },
857 858 859
    { NULL }
};

860
static BlockDriver bdrv_vmdk = {
861 862 863
    .format_name	= "vmdk",
    .instance_size	= sizeof(BDRVVmdkState),
    .bdrv_probe		= vmdk_probe,
864
    .bdrv_file_open	= vmdk_open,
865 866 867 868 869 870
    .bdrv_read		= vmdk_read,
    .bdrv_write		= vmdk_write,
    .bdrv_close		= vmdk_close,
    .bdrv_create	= vmdk_create,
    .bdrv_flush		= vmdk_flush,
    .bdrv_is_allocated	= vmdk_is_allocated,
871 872

    .create_options = vmdk_create_options,
B
bellard 已提交
873
};
874 875 876 877 878 879 880

static void bdrv_vmdk_init(void)
{
    bdrv_register(&bdrv_vmdk);
}

block_init(bdrv_vmdk_init);