qcow.c 27.2 KB
Newer Older
B
bellard 已提交
1 2
/*
 * Block driver for the QCOW format
3
 *
B
bellard 已提交
4
 * Copyright (c) 2004-2006 Fabrice Bellard
5
 *
B
bellard 已提交
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
P
pbrook 已提交
24
#include "qemu-common.h"
25
#include "block/block_int.h"
26
#include "qemu/module.h"
B
bellard 已提交
27
#include <zlib.h>
28
#include "qemu/aes.h"
29
#include "migration/migration.h"
B
bellard 已提交
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76

/**************************************************************/
/* QEMU COW block driver with compression and encryption support */

#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
#define QCOW_VERSION 1

#define QCOW_CRYPT_NONE 0
#define QCOW_CRYPT_AES  1

#define QCOW_OFLAG_COMPRESSED (1LL << 63)

typedef struct QCowHeader {
    uint32_t magic;
    uint32_t version;
    uint64_t backing_file_offset;
    uint32_t backing_file_size;
    uint32_t mtime;
    uint64_t size; /* in bytes */
    uint8_t cluster_bits;
    uint8_t l2_bits;
    uint32_t crypt_method;
    uint64_t l1_table_offset;
} QCowHeader;

#define L2_CACHE_SIZE 16

typedef struct BDRVQcowState {
    int cluster_bits;
    int cluster_size;
    int cluster_sectors;
    int l2_bits;
    int l2_size;
    int l1_size;
    uint64_t cluster_offset_mask;
    uint64_t l1_table_offset;
    uint64_t *l1_table;
    uint64_t *l2_cache;
    uint64_t l2_cache_offsets[L2_CACHE_SIZE];
    uint32_t l2_cache_counts[L2_CACHE_SIZE];
    uint8_t *cluster_cache;
    uint8_t *cluster_data;
    uint64_t cluster_cache_offset;
    uint32_t crypt_method; /* current crypt method, 0 if no key yet */
    uint32_t crypt_method_header;
    AES_KEY aes_encrypt_key;
    AES_KEY aes_decrypt_key;
K
Kevin Wolf 已提交
77
    CoMutex lock;
K
Kevin Wolf 已提交
78
    Error *migration_blocker;
B
bellard 已提交
79 80
} BDRVQcowState;

81
static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
B
bellard 已提交
82 83 84 85

static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
{
    const QCowHeader *cow_header = (const void *)buf;
86

B
bellard 已提交
87 88
    if (buf_size >= sizeof(QCowHeader) &&
        be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
89
        be32_to_cpu(cow_header->version) == QCOW_VERSION)
B
bellard 已提交
90 91 92 93 94
        return 100;
    else
        return 0;
}

95
static int qcow_open(BlockDriverState *bs, QDict *options, int flags)
B
bellard 已提交
96 97
{
    BDRVQcowState *s = bs->opaque;
98
    int len, i, shift, ret;
B
bellard 已提交
99
    QCowHeader header;
B
bellard 已提交
100

101 102
    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
    if (ret < 0) {
B
bellard 已提交
103
        goto fail;
104
    }
B
bellard 已提交
105 106 107 108 109 110 111 112
    be32_to_cpus(&header.magic);
    be32_to_cpus(&header.version);
    be64_to_cpus(&header.backing_file_offset);
    be32_to_cpus(&header.backing_file_size);
    be32_to_cpus(&header.mtime);
    be64_to_cpus(&header.size);
    be32_to_cpus(&header.crypt_method);
    be64_to_cpus(&header.l1_table_offset);
113

114
    if (header.magic != QCOW_MAGIC) {
115
        ret = -EMEDIUMTYPE;
116 117 118 119 120 121 122 123
        goto fail;
    }
    if (header.version != QCOW_VERSION) {
        char version[64];
        snprintf(version, sizeof(version), "QCOW version %d", header.version);
        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
            bs->device_name, "qcow", version);
        ret = -ENOTSUP;
B
bellard 已提交
124
        goto fail;
125 126 127 128
    }

    if (header.size <= 1 || header.cluster_bits < 9) {
        ret = -EINVAL;
B
bellard 已提交
129
        goto fail;
130 131 132
    }
    if (header.crypt_method > QCOW_CRYPT_AES) {
        ret = -EINVAL;
B
bellard 已提交
133
        goto fail;
134
    }
B
bellard 已提交
135
    s->crypt_method_header = header.crypt_method;
136
    if (s->crypt_method_header) {
B
bellard 已提交
137
        bs->encrypted = 1;
138
    }
B
bellard 已提交
139 140 141 142 143 144 145 146 147 148 149 150 151
    s->cluster_bits = header.cluster_bits;
    s->cluster_size = 1 << s->cluster_bits;
    s->cluster_sectors = 1 << (s->cluster_bits - 9);
    s->l2_bits = header.l2_bits;
    s->l2_size = 1 << s->l2_bits;
    bs->total_sectors = header.size / 512;
    s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;

    /* read the level 1 table */
    shift = s->cluster_bits + s->l2_bits;
    s->l1_size = (header.size + (1LL << shift) - 1) >> shift;

    s->l1_table_offset = header.l1_table_offset;
152
    s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
153 154 155 156

    ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
               s->l1_size * sizeof(uint64_t));
    if (ret < 0) {
B
bellard 已提交
157
        goto fail;
158 159
    }

B
bellard 已提交
160 161 162 163
    for(i = 0;i < s->l1_size; i++) {
        be64_to_cpus(&s->l1_table[i]);
    }
    /* alloc L2 cache */
164 165 166
    s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
    s->cluster_cache = g_malloc(s->cluster_size);
    s->cluster_data = g_malloc(s->cluster_size);
B
bellard 已提交
167
    s->cluster_cache_offset = -1;
168

B
bellard 已提交
169 170 171
    /* read the backing file name */
    if (header.backing_file_offset != 0) {
        len = header.backing_file_size;
172
        if (len > 1023) {
B
bellard 已提交
173
            len = 1023;
174 175 176 177
        }
        ret = bdrv_pread(bs->file, header.backing_file_offset,
                   bs->backing_file, len);
        if (ret < 0) {
B
bellard 已提交
178
            goto fail;
179
        }
B
bellard 已提交
180 181
        bs->backing_file[len] = '\0';
    }
S
Scott Wood 已提交
182

K
Kevin Wolf 已提交
183 184 185 186 187 188
    /* Disable migration when qcow images are used */
    error_set(&s->migration_blocker,
              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
              "qcow", bs->device_name, "live migration");
    migrate_add_blocker(s->migration_blocker);

S
Scott Wood 已提交
189
    qemu_co_mutex_init(&s->lock);
B
bellard 已提交
190 191 192
    return 0;

 fail:
193 194 195 196
    g_free(s->l1_table);
    g_free(s->l2_cache);
    g_free(s->cluster_cache);
    g_free(s->cluster_data);
197
    return ret;
B
bellard 已提交
198 199
}

J
Jeff Cody 已提交
200 201 202 203 204 205 206 207 208

/* We have nothing to do for QCOW reopen, stubs just return
 * success */
static int qcow_reopen_prepare(BDRVReopenState *state,
                               BlockReopenQueue *queue, Error **errp)
{
    return 0;
}

B
bellard 已提交
209 210 211 212 213
static int qcow_set_key(BlockDriverState *bs, const char *key)
{
    BDRVQcowState *s = bs->opaque;
    uint8_t keybuf[16];
    int len, i;
214

B
bellard 已提交
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
    memset(keybuf, 0, 16);
    len = strlen(key);
    if (len > 16)
        len = 16;
    /* XXX: we could compress the chars to 7 bits to increase
       entropy */
    for(i = 0;i < len;i++) {
        keybuf[i] = key[i];
    }
    s->crypt_method = s->crypt_method_header;

    if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
        return -1;
    if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
        return -1;
    return 0;
}

/* The crypt function is compatible with the linux cryptoloop
   algorithm for < 4 GB images. NOTE: out_buf == in_buf is
   supported */
static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
                            uint8_t *out_buf, const uint8_t *in_buf,
                            int nb_sectors, int enc,
                            const AES_KEY *key)
{
    union {
        uint64_t ll[2];
        uint8_t b[16];
    } ivec;
    int i;

    for(i = 0; i < nb_sectors; i++) {
        ivec.ll[0] = cpu_to_le64(sector_num);
        ivec.ll[1] = 0;
250
        AES_cbc_encrypt(in_buf, out_buf, 512, key,
B
bellard 已提交
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
                        ivec.b, enc);
        sector_num++;
        in_buf += 512;
        out_buf += 512;
    }
}

/* 'allocate' is:
 *
 * 0 to not allocate.
 *
 * 1 to allocate a normal cluster (for sector indexes 'n_start' to
 * 'n_end')
 *
 * 2 to allocate a compressed cluster of size
 * 'compressed_size'. 'compressed_size' must be > 0 and <
267
 * cluster_size
B
bellard 已提交
268 269 270 271 272 273 274 275 276 277 278 279 280
 *
 * return 0 if not allocated.
 */
static uint64_t get_cluster_offset(BlockDriverState *bs,
                                   uint64_t offset, int allocate,
                                   int compressed_size,
                                   int n_start, int n_end)
{
    BDRVQcowState *s = bs->opaque;
    int min_index, i, j, l1_index, l2_index;
    uint64_t l2_offset, *l2_table, cluster_offset, tmp;
    uint32_t min_count;
    int new_l2_table;
281

B
bellard 已提交
282 283 284 285 286 287 288
    l1_index = offset >> (s->l2_bits + s->cluster_bits);
    l2_offset = s->l1_table[l1_index];
    new_l2_table = 0;
    if (!l2_offset) {
        if (!allocate)
            return 0;
        /* allocate a new l2 entry */
289
        l2_offset = bdrv_getlength(bs->file);
B
bellard 已提交
290 291 292 293 294
        /* round to cluster size */
        l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
        /* update the L1 entry */
        s->l1_table[l1_index] = l2_offset;
        tmp = cpu_to_be64(l2_offset);
295 296 297
        if (bdrv_pwrite_sync(bs->file,
                s->l1_table_offset + l1_index * sizeof(tmp),
                &tmp, sizeof(tmp)) < 0)
B
bellard 已提交
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
            return 0;
        new_l2_table = 1;
    }
    for(i = 0; i < L2_CACHE_SIZE; i++) {
        if (l2_offset == s->l2_cache_offsets[i]) {
            /* increment the hit count */
            if (++s->l2_cache_counts[i] == 0xffffffff) {
                for(j = 0; j < L2_CACHE_SIZE; j++) {
                    s->l2_cache_counts[j] >>= 1;
                }
            }
            l2_table = s->l2_cache + (i << s->l2_bits);
            goto found;
        }
    }
    /* not found: load a new entry in the least used one */
    min_index = 0;
    min_count = 0xffffffff;
    for(i = 0; i < L2_CACHE_SIZE; i++) {
        if (s->l2_cache_counts[i] < min_count) {
            min_count = s->l2_cache_counts[i];
            min_index = i;
        }
    }
    l2_table = s->l2_cache + (min_index << s->l2_bits);
    if (new_l2_table) {
        memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
325 326
        if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
                s->l2_size * sizeof(uint64_t)) < 0)
B
bellard 已提交
327 328
            return 0;
    } else {
329
        if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
B
bellard 已提交
330 331 332 333 334 335 336 337
            s->l2_size * sizeof(uint64_t))
            return 0;
    }
    s->l2_cache_offsets[min_index] = l2_offset;
    s->l2_cache_counts[min_index] = 1;
 found:
    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
    cluster_offset = be64_to_cpu(l2_table[l2_index]);
338
    if (!cluster_offset ||
B
bellard 已提交
339 340 341 342 343 344 345 346 347
        ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) {
        if (!allocate)
            return 0;
        /* allocate a new cluster */
        if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
            (n_end - n_start) < s->cluster_sectors) {
            /* if the cluster is already compressed, we must
               decompress it in the case it is not completely
               overwritten */
348
            if (decompress_cluster(bs, cluster_offset) < 0)
B
bellard 已提交
349
                return 0;
350
            cluster_offset = bdrv_getlength(bs->file);
351
            cluster_offset = (cluster_offset + s->cluster_size - 1) &
B
bellard 已提交
352 353
                ~(s->cluster_size - 1);
            /* write the cluster content */
354
            if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) !=
B
bellard 已提交
355 356 357
                s->cluster_size)
                return -1;
        } else {
358
            cluster_offset = bdrv_getlength(bs->file);
359 360 361 362
            if (allocate == 1) {
                /* round to cluster size */
                cluster_offset = (cluster_offset + s->cluster_size - 1) &
                    ~(s->cluster_size - 1);
363
                bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
364 365 366 367 368 369 370 371 372 373 374 375 376
                /* if encrypted, we must initialize the cluster
                   content which won't be written */
                if (s->crypt_method &&
                    (n_end - n_start) < s->cluster_sectors) {
                    uint64_t start_sect;
                    start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
                    memset(s->cluster_data + 512, 0x00, 512);
                    for(i = 0; i < s->cluster_sectors; i++) {
                        if (i < n_start || i >= n_end) {
                            encrypt_sectors(s, start_sect + i,
                                            s->cluster_data,
                                            s->cluster_data + 512, 1, 1,
                                            &s->aes_encrypt_key);
377
                            if (bdrv_pwrite(bs->file, cluster_offset + i * 512,
378 379 380
                                            s->cluster_data, 512) != 512)
                                return -1;
                        }
B
bellard 已提交
381 382
                    }
                }
383 384 385
            } else if (allocate == 2) {
                cluster_offset |= QCOW_OFLAG_COMPRESSED |
                    (uint64_t)compressed_size << (63 - s->cluster_bits);
B
bellard 已提交
386 387 388 389 390
            }
        }
        /* update L2 table */
        tmp = cpu_to_be64(cluster_offset);
        l2_table[l2_index] = tmp;
391 392
        if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
                &tmp, sizeof(tmp)) < 0)
B
bellard 已提交
393 394 395 396 397
            return 0;
    }
    return cluster_offset;
}

398 399
static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, int *pnum)
B
bellard 已提交
400 401 402 403 404
{
    BDRVQcowState *s = bs->opaque;
    int index_in_cluster, n;
    uint64_t cluster_offset;

405
    qemu_co_mutex_lock(&s->lock);
B
bellard 已提交
406
    cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
407
    qemu_co_mutex_unlock(&s->lock);
B
bellard 已提交
408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441
    index_in_cluster = sector_num & (s->cluster_sectors - 1);
    n = s->cluster_sectors - index_in_cluster;
    if (n > nb_sectors)
        n = nb_sectors;
    *pnum = n;
    return (cluster_offset != 0);
}

static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
                             const uint8_t *buf, int buf_size)
{
    z_stream strm1, *strm = &strm1;
    int ret, out_len;

    memset(strm, 0, sizeof(*strm));

    strm->next_in = (uint8_t *)buf;
    strm->avail_in = buf_size;
    strm->next_out = out_buf;
    strm->avail_out = out_buf_size;

    ret = inflateInit2(strm, -12);
    if (ret != Z_OK)
        return -1;
    ret = inflate(strm, Z_FINISH);
    out_len = strm->next_out - out_buf;
    if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
        out_len != out_buf_size) {
        inflateEnd(strm);
        return -1;
    }
    inflateEnd(strm);
    return 0;
}
442

443
static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
B
bellard 已提交
444
{
445
    BDRVQcowState *s = bs->opaque;
B
bellard 已提交
446 447 448 449 450 451 452
    int ret, csize;
    uint64_t coffset;

    coffset = cluster_offset & s->cluster_offset_mask;
    if (s->cluster_cache_offset != coffset) {
        csize = cluster_offset >> (63 - s->cluster_bits);
        csize &= (s->cluster_size - 1);
453
        ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
454
        if (ret != csize)
B
bellard 已提交
455 456 457 458 459 460 461 462 463 464
            return -1;
        if (decompress_buffer(s->cluster_cache, s->cluster_size,
                              s->cluster_data, csize) < 0) {
            return -1;
        }
        s->cluster_cache_offset = coffset;
    }
    return 0;
}

465
static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
F
Frediano Ziglio 已提交
466
                         int nb_sectors, QEMUIOVector *qiov)
B
bellard 已提交
467 468 469
{
    BDRVQcowState *s = bs->opaque;
    int index_in_cluster;
F
Frediano Ziglio 已提交
470
    int ret = 0, n;
471
    uint64_t cluster_offset;
F
Frediano Ziglio 已提交
472 473
    struct iovec hd_iov;
    QEMUIOVector hd_qiov;
F
Frediano Ziglio 已提交
474 475
    uint8_t *buf;
    void *orig_buf;
B
bellard 已提交
476

F
Frediano Ziglio 已提交
477 478 479 480 481
    if (qiov->niov > 1) {
        buf = orig_buf = qemu_blockalign(bs, qiov->size);
    } else {
        orig_buf = NULL;
        buf = (uint8_t *)qiov->iov->iov_base;
B
bellard 已提交
482
    }
483

F
Frediano Ziglio 已提交
484 485 486 487 488 489 490 491 492 493 494
    qemu_co_mutex_lock(&s->lock);

    while (nb_sectors != 0) {
        /* prepare next request */
        cluster_offset = get_cluster_offset(bs, sector_num << 9,
                                                 0, 0, 0, 0);
        index_in_cluster = sector_num & (s->cluster_sectors - 1);
        n = s->cluster_sectors - index_in_cluster;
        if (n > nb_sectors) {
            n = nb_sectors;
        }
495

F
Frediano Ziglio 已提交
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524
        if (!cluster_offset) {
            if (bs->backing_hd) {
                /* read from the base image */
                hd_iov.iov_base = (void *)buf;
                hd_iov.iov_len = n * 512;
                qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
                qemu_co_mutex_unlock(&s->lock);
                ret = bdrv_co_readv(bs->backing_hd, sector_num,
                                    n, &hd_qiov);
                qemu_co_mutex_lock(&s->lock);
                if (ret < 0) {
                    goto fail;
                }
            } else {
                /* Note: in this case, no need to wait */
                memset(buf, 0, 512 * n);
            }
        } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
            /* add AIO support for compressed blocks ? */
            if (decompress_cluster(bs, cluster_offset) < 0) {
                goto fail;
            }
            memcpy(buf,
                   s->cluster_cache + index_in_cluster * 512, 512 * n);
        } else {
            if ((cluster_offset & 511) != 0) {
                goto fail;
            }
            hd_iov.iov_base = (void *)buf;
F
Frediano Ziglio 已提交
525 526
            hd_iov.iov_len = n * 512;
            qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
K
Kevin Wolf 已提交
527
            qemu_co_mutex_unlock(&s->lock);
F
Frediano Ziglio 已提交
528 529
            ret = bdrv_co_readv(bs->file,
                                (cluster_offset >> 9) + index_in_cluster,
F
Frediano Ziglio 已提交
530
                                n, &hd_qiov);
K
Kevin Wolf 已提交
531 532
            qemu_co_mutex_lock(&s->lock);
            if (ret < 0) {
F
Frediano Ziglio 已提交
533 534 535 536 537 538
                break;
            }
            if (s->crypt_method) {
                encrypt_sectors(s, sector_num, buf, buf,
                                n, 0,
                                &s->aes_decrypt_key);
539 540
            }
        }
F
Frediano Ziglio 已提交
541
        ret = 0;
542

F
Frediano Ziglio 已提交
543 544 545
        nb_sectors -= n;
        sector_num += n;
        buf += n * 512;
546 547
    }

F
Frediano Ziglio 已提交
548
done:
K
Kevin Wolf 已提交
549 550
    qemu_co_mutex_unlock(&s->lock);

F
Frediano Ziglio 已提交
551
    if (qiov->niov > 1) {
552
        qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size);
F
Frediano Ziglio 已提交
553
        qemu_vfree(orig_buf);
K
Kevin Wolf 已提交
554 555
    }

K
Kevin Wolf 已提交
556
    return ret;
F
Frediano Ziglio 已提交
557 558 559 560

fail:
    ret = -EIO;
    goto done;
B
bellard 已提交
561 562
}

563
static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
F
Frediano Ziglio 已提交
564
                          int nb_sectors, QEMUIOVector *qiov)
B
bellard 已提交
565 566 567 568 569
{
    BDRVQcowState *s = bs->opaque;
    int index_in_cluster;
    uint64_t cluster_offset;
    const uint8_t *src_buf;
F
Frediano Ziglio 已提交
570
    int ret = 0, n;
F
Frediano Ziglio 已提交
571 572 573
    uint8_t *cluster_data = NULL;
    struct iovec hd_iov;
    QEMUIOVector hd_qiov;
F
Frediano Ziglio 已提交
574 575
    uint8_t *buf;
    void *orig_buf;
576

F
Frediano Ziglio 已提交
577
    s->cluster_cache_offset = -1; /* disable compressed cache */
578

F
Frediano Ziglio 已提交
579 580
    if (qiov->niov > 1) {
        buf = orig_buf = qemu_blockalign(bs, qiov->size);
581
        qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
B
bellard 已提交
582
    } else {
F
Frediano Ziglio 已提交
583 584
        orig_buf = NULL;
        buf = (uint8_t *)qiov->iov->iov_base;
B
bellard 已提交
585
    }
586

K
Kevin Wolf 已提交
587
    qemu_co_mutex_lock(&s->lock);
588

F
Frediano Ziglio 已提交
589
    while (nb_sectors != 0) {
B
bellard 已提交
590

F
Frediano Ziglio 已提交
591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
        index_in_cluster = sector_num & (s->cluster_sectors - 1);
        n = s->cluster_sectors - index_in_cluster;
        if (n > nb_sectors) {
            n = nb_sectors;
        }
        cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
                                            index_in_cluster,
                                            index_in_cluster + n);
        if (!cluster_offset || (cluster_offset & 511) != 0) {
            ret = -EIO;
            break;
        }
        if (s->crypt_method) {
            if (!cluster_data) {
                cluster_data = g_malloc0(s->cluster_size);
            }
            encrypt_sectors(s, sector_num, cluster_data, buf,
                            n, 1, &s->aes_encrypt_key);
            src_buf = cluster_data;
        } else {
            src_buf = buf;
        }
B
bellard 已提交
613

F
Frediano Ziglio 已提交
614 615 616 617 618 619 620 621 622 623 624 625
        hd_iov.iov_base = (void *)src_buf;
        hd_iov.iov_len = n * 512;
        qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
        qemu_co_mutex_unlock(&s->lock);
        ret = bdrv_co_writev(bs->file,
                             (cluster_offset >> 9) + index_in_cluster,
                             n, &hd_qiov);
        qemu_co_mutex_lock(&s->lock);
        if (ret < 0) {
            break;
        }
        ret = 0;
626

F
Frediano Ziglio 已提交
627 628 629 630
        nb_sectors -= n;
        sector_num += n;
        buf += n * 512;
    }
K
Kevin Wolf 已提交
631
    qemu_co_mutex_unlock(&s->lock);
632

F
Frediano Ziglio 已提交
633 634
    if (qiov->niov > 1) {
        qemu_vfree(orig_buf);
K
Kevin Wolf 已提交
635
    }
636
    g_free(cluster_data);
K
Kevin Wolf 已提交
637

K
Kevin Wolf 已提交
638
    return ret;
B
bellard 已提交
639 640
}

B
bellard 已提交
641
static void qcow_close(BlockDriverState *bs)
B
bellard 已提交
642 643
{
    BDRVQcowState *s = bs->opaque;
K
Kevin Wolf 已提交
644

645 646 647 648
    g_free(s->l1_table);
    g_free(s->l2_cache);
    g_free(s->cluster_cache);
    g_free(s->cluster_data);
K
Kevin Wolf 已提交
649 650 651

    migrate_del_blocker(s->migration_blocker);
    error_free(s->migration_blocker);
B
bellard 已提交
652 653
}

654
static int qcow_create(const char *filename, QEMUOptionParameter *options)
B
bellard 已提交
655
{
656
    int header_size, backing_filename_len, l1_size, shift, i;
B
bellard 已提交
657
    QCowHeader header;
658
    uint8_t *tmp;
659 660 661
    int64_t total_size = 0;
    const char *backing_file = NULL;
    int flags = 0;
662
    int ret;
663
    BlockDriverState *qcow_bs;
664 665 666 667 668 669 670 671 672 673 674 675

    /* Read out options */
    while (options && options->name) {
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
            total_size = options->value.n / 512;
        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
            backing_file = options->value.s;
        } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
            flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
        }
        options++;
    }
B
bellard 已提交
676

677 678 679 680 681
    ret = bdrv_create_file(filename, options);
    if (ret < 0) {
        return ret;
    }

682
    ret = bdrv_file_open(&qcow_bs, filename, NULL, BDRV_O_RDWR);
683 684 685 686 687 688 689 690 691
    if (ret < 0) {
        return ret;
    }

    ret = bdrv_truncate(qcow_bs, 0);
    if (ret < 0) {
        goto exit;
    }

B
bellard 已提交
692 693 694 695 696 697 698
    memset(&header, 0, sizeof(header));
    header.magic = cpu_to_be32(QCOW_MAGIC);
    header.version = cpu_to_be32(QCOW_VERSION);
    header.size = cpu_to_be64(total_size * 512);
    header_size = sizeof(header);
    backing_filename_len = 0;
    if (backing_file) {
A
aurel32 已提交
699 700 701 702 703 704 705 706 707
        if (strcmp(backing_file, "fat:")) {
            header.backing_file_offset = cpu_to_be64(header_size);
            backing_filename_len = strlen(backing_file);
            header.backing_file_size = cpu_to_be32(backing_filename_len);
            header_size += backing_filename_len;
        } else {
            /* special backing file for vvfat */
            backing_file = NULL;
        }
B
bellard 已提交
708 709 710 711 712 713 714 715 716 717 718 719
        header.cluster_bits = 9; /* 512 byte cluster to avoid copying
                                    unmodifyed sectors */
        header.l2_bits = 12; /* 32 KB L2 tables */
    } else {
        header.cluster_bits = 12; /* 4 KB clusters */
        header.l2_bits = 9; /* 4 KB L2 tables */
    }
    header_size = (header_size + 7) & ~7;
    shift = header.cluster_bits + header.l2_bits;
    l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;

    header.l1_table_offset = cpu_to_be64(header_size);
720
    if (flags & BLOCK_FLAG_ENCRYPT) {
B
bellard 已提交
721 722 723 724
        header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
    } else {
        header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
    }
725

B
bellard 已提交
726
    /* write all the data */
727
    ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
728 729 730 731
    if (ret != sizeof(header)) {
        goto exit;
    }

B
bellard 已提交
732
    if (backing_file) {
733 734
        ret = bdrv_pwrite(qcow_bs, sizeof(header),
            backing_file, backing_filename_len);
735 736 737
        if (ret != backing_filename_len) {
            goto exit;
        }
B
bellard 已提交
738
    }
739 740 741 742 743 744 745 746

    tmp = g_malloc0(BDRV_SECTOR_SIZE);
    for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
        BDRV_SECTOR_SIZE); i++) {
        ret = bdrv_pwrite(qcow_bs, header_size +
            BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
        if (ret != BDRV_SECTOR_SIZE) {
            g_free(tmp);
747 748
            goto exit;
        }
B
bellard 已提交
749
    }
750

751
    g_free(tmp);
752 753
    ret = 0;
exit:
754
    bdrv_delete(qcow_bs);
755
    return ret;
B
bellard 已提交
756 757
}

B
bellard 已提交
758
static int qcow_make_empty(BlockDriverState *bs)
759 760 761
{
    BDRVQcowState *s = bs->opaque;
    uint32_t l1_length = s->l1_size * sizeof(uint64_t);
B
bellard 已提交
762
    int ret;
763 764

    memset(s->l1_table, 0, l1_length);
765 766 767
    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
            l1_length) < 0)
        return -1;
768
    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
B
bellard 已提交
769 770
    if (ret < 0)
        return ret;
771 772 773 774 775 776 777 778

    memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
    memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
    memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));

    return 0;
}

B
bellard 已提交
779 780
/* XXX: put compressed sectors first, then all the cluster aligned
   tables to avoid losing bytes in alignment */
781
static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
B
bellard 已提交
782
                                 const uint8_t *buf, int nb_sectors)
B
bellard 已提交
783 784 785 786 787 788 789
{
    BDRVQcowState *s = bs->opaque;
    z_stream strm;
    int ret, out_len;
    uint8_t *out_buf;
    uint64_t cluster_offset;

B
bellard 已提交
790 791
    if (nb_sectors != s->cluster_sectors)
        return -EINVAL;
B
bellard 已提交
792

793
    out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
B
bellard 已提交
794 795 796 797

    /* best compression, small window, no zlib header */
    memset(&strm, 0, sizeof(strm));
    ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
798
                       Z_DEFLATED, -12,
B
bellard 已提交
799 800
                       9, Z_DEFAULT_STRATEGY);
    if (ret != 0) {
801 802
        ret = -EINVAL;
        goto fail;
B
bellard 已提交
803 804 805 806 807 808 809 810 811 812
    }

    strm.avail_in = s->cluster_size;
    strm.next_in = (uint8_t *)buf;
    strm.avail_out = s->cluster_size;
    strm.next_out = out_buf;

    ret = deflate(&strm, Z_FINISH);
    if (ret != Z_STREAM_END && ret != Z_OK) {
        deflateEnd(&strm);
813 814
        ret = -EINVAL;
        goto fail;
B
bellard 已提交
815 816 817 818 819 820 821
    }
    out_len = strm.next_out - out_buf;

    deflateEnd(&strm);

    if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
        /* could not compress: write normal cluster */
822 823 824 825
        ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
        if (ret < 0) {
            goto fail;
        }
B
bellard 已提交
826
    } else {
827
        cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
B
bellard 已提交
828
                                            out_len, 0, 0);
829 830 831 832 833
        if (cluster_offset == 0) {
            ret = -EIO;
            goto fail;
        }

B
bellard 已提交
834
        cluster_offset &= s->cluster_offset_mask;
835 836 837
        ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
        if (ret < 0) {
            goto fail;
B
bellard 已提交
838 839
        }
    }
840

841 842
    ret = 0;
fail:
843
    g_free(out_buf);
844
    return ret;
B
bellard 已提交
845 846
}

B
bellard 已提交
847 848 849 850 851 852 853
static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
    BDRVQcowState *s = bs->opaque;
    bdi->cluster_size = s->cluster_size;
    return 0;
}

854 855

static QEMUOptionParameter qcow_create_options[] = {
856 857 858 859 860 861 862 863 864 865 866 867 868 869 870
    {
        .name = BLOCK_OPT_SIZE,
        .type = OPT_SIZE,
        .help = "Virtual disk size"
    },
    {
        .name = BLOCK_OPT_BACKING_FILE,
        .type = OPT_STRING,
        .help = "File name of a base image"
    },
    {
        .name = BLOCK_OPT_ENCRYPT,
        .type = OPT_FLAG,
        .help = "Encrypt the image"
    },
871 872 873
    { NULL }
};

874
static BlockDriver bdrv_qcow = {
875 876 877 878 879
    .format_name	= "qcow",
    .instance_size	= sizeof(BDRVQcowState),
    .bdrv_probe		= qcow_probe,
    .bdrv_open		= qcow_open,
    .bdrv_close		= qcow_close,
J
Jeff Cody 已提交
880
    .bdrv_reopen_prepare = qcow_reopen_prepare,
881
    .bdrv_create	= qcow_create,
882 883 884

    .bdrv_co_readv          = qcow_co_readv,
    .bdrv_co_writev         = qcow_co_writev,
885
    .bdrv_co_is_allocated   = qcow_co_is_allocated,
886 887 888 889 890

    .bdrv_set_key           = qcow_set_key,
    .bdrv_make_empty        = qcow_make_empty,
    .bdrv_write_compressed  = qcow_write_compressed,
    .bdrv_get_info          = qcow_get_info,
891 892

    .create_options = qcow_create_options,
B
bellard 已提交
893
};
894 895 896 897 898 899 900

static void bdrv_qcow_init(void)
{
    bdrv_register(&bdrv_qcow);
}

block_init(bdrv_qcow_init);