qcow.c 26.9 KB
Newer Older
B
bellard 已提交
1 2
/*
 * Block driver for the QCOW format
3
 *
B
bellard 已提交
4
 * Copyright (c) 2004-2006 Fabrice Bellard
5
 *
B
bellard 已提交
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
P
pbrook 已提交
24
#include "qemu-common.h"
B
bellard 已提交
25
#include "block_int.h"
26
#include "module.h"
B
bellard 已提交
27
#include <zlib.h>
B
bellard 已提交
28
#include "aes.h"
K
Kevin Wolf 已提交
29
#include "migration.h"
B
bellard 已提交
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76

/**************************************************************/
/* QEMU COW block driver with compression and encryption support */

#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
#define QCOW_VERSION 1

#define QCOW_CRYPT_NONE 0
#define QCOW_CRYPT_AES  1

#define QCOW_OFLAG_COMPRESSED (1LL << 63)

typedef struct QCowHeader {
    uint32_t magic;
    uint32_t version;
    uint64_t backing_file_offset;
    uint32_t backing_file_size;
    uint32_t mtime;
    uint64_t size; /* in bytes */
    uint8_t cluster_bits;
    uint8_t l2_bits;
    uint32_t crypt_method;
    uint64_t l1_table_offset;
} QCowHeader;

#define L2_CACHE_SIZE 16

typedef struct BDRVQcowState {
    int cluster_bits;
    int cluster_size;
    int cluster_sectors;
    int l2_bits;
    int l2_size;
    int l1_size;
    uint64_t cluster_offset_mask;
    uint64_t l1_table_offset;
    uint64_t *l1_table;
    uint64_t *l2_cache;
    uint64_t l2_cache_offsets[L2_CACHE_SIZE];
    uint32_t l2_cache_counts[L2_CACHE_SIZE];
    uint8_t *cluster_cache;
    uint8_t *cluster_data;
    uint64_t cluster_cache_offset;
    uint32_t crypt_method; /* current crypt method, 0 if no key yet */
    uint32_t crypt_method_header;
    AES_KEY aes_encrypt_key;
    AES_KEY aes_decrypt_key;
K
Kevin Wolf 已提交
77
    CoMutex lock;
K
Kevin Wolf 已提交
78
    Error *migration_blocker;
B
bellard 已提交
79 80
} BDRVQcowState;

81
static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
B
bellard 已提交
82 83 84 85

static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
{
    const QCowHeader *cow_header = (const void *)buf;
86

B
bellard 已提交
87 88
    if (buf_size >= sizeof(QCowHeader) &&
        be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
89
        be32_to_cpu(cow_header->version) == QCOW_VERSION)
B
bellard 已提交
90 91 92 93 94
        return 100;
    else
        return 0;
}

95
static int qcow_open(BlockDriverState *bs, int flags)
B
bellard 已提交
96 97
{
    BDRVQcowState *s = bs->opaque;
98
    int len, i, shift, ret;
B
bellard 已提交
99
    QCowHeader header;
B
bellard 已提交
100

101 102
    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
    if (ret < 0) {
B
bellard 已提交
103
        goto fail;
104
    }
B
bellard 已提交
105 106 107 108 109 110 111 112
    be32_to_cpus(&header.magic);
    be32_to_cpus(&header.version);
    be64_to_cpus(&header.backing_file_offset);
    be32_to_cpus(&header.backing_file_size);
    be32_to_cpus(&header.mtime);
    be64_to_cpus(&header.size);
    be32_to_cpus(&header.crypt_method);
    be64_to_cpus(&header.l1_table_offset);
113

114 115 116 117 118 119 120 121 122 123
    if (header.magic != QCOW_MAGIC) {
        ret = -EINVAL;
        goto fail;
    }
    if (header.version != QCOW_VERSION) {
        char version[64];
        snprintf(version, sizeof(version), "QCOW version %d", header.version);
        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
            bs->device_name, "qcow", version);
        ret = -ENOTSUP;
B
bellard 已提交
124
        goto fail;
125 126 127 128
    }

    if (header.size <= 1 || header.cluster_bits < 9) {
        ret = -EINVAL;
B
bellard 已提交
129
        goto fail;
130 131 132
    }
    if (header.crypt_method > QCOW_CRYPT_AES) {
        ret = -EINVAL;
B
bellard 已提交
133
        goto fail;
134
    }
B
bellard 已提交
135
    s->crypt_method_header = header.crypt_method;
136
    if (s->crypt_method_header) {
B
bellard 已提交
137
        bs->encrypted = 1;
138
    }
B
bellard 已提交
139 140 141 142 143 144 145 146 147 148 149 150 151
    s->cluster_bits = header.cluster_bits;
    s->cluster_size = 1 << s->cluster_bits;
    s->cluster_sectors = 1 << (s->cluster_bits - 9);
    s->l2_bits = header.l2_bits;
    s->l2_size = 1 << s->l2_bits;
    bs->total_sectors = header.size / 512;
    s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;

    /* read the level 1 table */
    shift = s->cluster_bits + s->l2_bits;
    s->l1_size = (header.size + (1LL << shift) - 1) >> shift;

    s->l1_table_offset = header.l1_table_offset;
152
    s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
153 154 155 156

    ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
               s->l1_size * sizeof(uint64_t));
    if (ret < 0) {
B
bellard 已提交
157
        goto fail;
158 159
    }

B
bellard 已提交
160 161 162 163
    for(i = 0;i < s->l1_size; i++) {
        be64_to_cpus(&s->l1_table[i]);
    }
    /* alloc L2 cache */
164 165 166
    s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
    s->cluster_cache = g_malloc(s->cluster_size);
    s->cluster_data = g_malloc(s->cluster_size);
B
bellard 已提交
167
    s->cluster_cache_offset = -1;
168

B
bellard 已提交
169 170 171
    /* read the backing file name */
    if (header.backing_file_offset != 0) {
        len = header.backing_file_size;
172
        if (len > 1023) {
B
bellard 已提交
173
            len = 1023;
174 175 176 177
        }
        ret = bdrv_pread(bs->file, header.backing_file_offset,
                   bs->backing_file, len);
        if (ret < 0) {
B
bellard 已提交
178
            goto fail;
179
        }
B
bellard 已提交
180 181
        bs->backing_file[len] = '\0';
    }
S
Scott Wood 已提交
182

K
Kevin Wolf 已提交
183 184 185 186 187 188
    /* Disable migration when qcow images are used */
    error_set(&s->migration_blocker,
              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
              "qcow", bs->device_name, "live migration");
    migrate_add_blocker(s->migration_blocker);

S
Scott Wood 已提交
189
    qemu_co_mutex_init(&s->lock);
B
bellard 已提交
190 191 192
    return 0;

 fail:
193 194 195 196
    g_free(s->l1_table);
    g_free(s->l2_cache);
    g_free(s->cluster_cache);
    g_free(s->cluster_data);
197
    return ret;
B
bellard 已提交
198 199 200 201 202 203 204
}

static int qcow_set_key(BlockDriverState *bs, const char *key)
{
    BDRVQcowState *s = bs->opaque;
    uint8_t keybuf[16];
    int len, i;
205

B
bellard 已提交
206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
    memset(keybuf, 0, 16);
    len = strlen(key);
    if (len > 16)
        len = 16;
    /* XXX: we could compress the chars to 7 bits to increase
       entropy */
    for(i = 0;i < len;i++) {
        keybuf[i] = key[i];
    }
    s->crypt_method = s->crypt_method_header;

    if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
        return -1;
    if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
        return -1;
    return 0;
}

/* The crypt function is compatible with the linux cryptoloop
   algorithm for < 4 GB images. NOTE: out_buf == in_buf is
   supported */
static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
                            uint8_t *out_buf, const uint8_t *in_buf,
                            int nb_sectors, int enc,
                            const AES_KEY *key)
{
    union {
        uint64_t ll[2];
        uint8_t b[16];
    } ivec;
    int i;

    for(i = 0; i < nb_sectors; i++) {
        ivec.ll[0] = cpu_to_le64(sector_num);
        ivec.ll[1] = 0;
241
        AES_cbc_encrypt(in_buf, out_buf, 512, key,
B
bellard 已提交
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
                        ivec.b, enc);
        sector_num++;
        in_buf += 512;
        out_buf += 512;
    }
}

/* 'allocate' is:
 *
 * 0 to not allocate.
 *
 * 1 to allocate a normal cluster (for sector indexes 'n_start' to
 * 'n_end')
 *
 * 2 to allocate a compressed cluster of size
 * 'compressed_size'. 'compressed_size' must be > 0 and <
258
 * cluster_size
B
bellard 已提交
259 260 261 262 263 264 265 266 267 268 269 270 271
 *
 * return 0 if not allocated.
 */
static uint64_t get_cluster_offset(BlockDriverState *bs,
                                   uint64_t offset, int allocate,
                                   int compressed_size,
                                   int n_start, int n_end)
{
    BDRVQcowState *s = bs->opaque;
    int min_index, i, j, l1_index, l2_index;
    uint64_t l2_offset, *l2_table, cluster_offset, tmp;
    uint32_t min_count;
    int new_l2_table;
272

B
bellard 已提交
273 274 275 276 277 278 279
    l1_index = offset >> (s->l2_bits + s->cluster_bits);
    l2_offset = s->l1_table[l1_index];
    new_l2_table = 0;
    if (!l2_offset) {
        if (!allocate)
            return 0;
        /* allocate a new l2 entry */
280
        l2_offset = bdrv_getlength(bs->file);
B
bellard 已提交
281 282 283 284 285
        /* round to cluster size */
        l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
        /* update the L1 entry */
        s->l1_table[l1_index] = l2_offset;
        tmp = cpu_to_be64(l2_offset);
286 287 288
        if (bdrv_pwrite_sync(bs->file,
                s->l1_table_offset + l1_index * sizeof(tmp),
                &tmp, sizeof(tmp)) < 0)
B
bellard 已提交
289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
            return 0;
        new_l2_table = 1;
    }
    for(i = 0; i < L2_CACHE_SIZE; i++) {
        if (l2_offset == s->l2_cache_offsets[i]) {
            /* increment the hit count */
            if (++s->l2_cache_counts[i] == 0xffffffff) {
                for(j = 0; j < L2_CACHE_SIZE; j++) {
                    s->l2_cache_counts[j] >>= 1;
                }
            }
            l2_table = s->l2_cache + (i << s->l2_bits);
            goto found;
        }
    }
    /* not found: load a new entry in the least used one */
    min_index = 0;
    min_count = 0xffffffff;
    for(i = 0; i < L2_CACHE_SIZE; i++) {
        if (s->l2_cache_counts[i] < min_count) {
            min_count = s->l2_cache_counts[i];
            min_index = i;
        }
    }
    l2_table = s->l2_cache + (min_index << s->l2_bits);
    if (new_l2_table) {
        memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
316 317
        if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
                s->l2_size * sizeof(uint64_t)) < 0)
B
bellard 已提交
318 319
            return 0;
    } else {
320
        if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
B
bellard 已提交
321 322 323 324 325 326 327 328
            s->l2_size * sizeof(uint64_t))
            return 0;
    }
    s->l2_cache_offsets[min_index] = l2_offset;
    s->l2_cache_counts[min_index] = 1;
 found:
    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
    cluster_offset = be64_to_cpu(l2_table[l2_index]);
329
    if (!cluster_offset ||
B
bellard 已提交
330 331 332 333 334 335 336 337 338
        ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) {
        if (!allocate)
            return 0;
        /* allocate a new cluster */
        if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
            (n_end - n_start) < s->cluster_sectors) {
            /* if the cluster is already compressed, we must
               decompress it in the case it is not completely
               overwritten */
339
            if (decompress_cluster(bs, cluster_offset) < 0)
B
bellard 已提交
340
                return 0;
341
            cluster_offset = bdrv_getlength(bs->file);
342
            cluster_offset = (cluster_offset + s->cluster_size - 1) &
B
bellard 已提交
343 344
                ~(s->cluster_size - 1);
            /* write the cluster content */
345
            if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) !=
B
bellard 已提交
346 347 348
                s->cluster_size)
                return -1;
        } else {
349
            cluster_offset = bdrv_getlength(bs->file);
350 351 352 353
            if (allocate == 1) {
                /* round to cluster size */
                cluster_offset = (cluster_offset + s->cluster_size - 1) &
                    ~(s->cluster_size - 1);
354
                bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
355 356 357 358 359 360 361 362 363 364 365 366 367
                /* if encrypted, we must initialize the cluster
                   content which won't be written */
                if (s->crypt_method &&
                    (n_end - n_start) < s->cluster_sectors) {
                    uint64_t start_sect;
                    start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
                    memset(s->cluster_data + 512, 0x00, 512);
                    for(i = 0; i < s->cluster_sectors; i++) {
                        if (i < n_start || i >= n_end) {
                            encrypt_sectors(s, start_sect + i,
                                            s->cluster_data,
                                            s->cluster_data + 512, 1, 1,
                                            &s->aes_encrypt_key);
368
                            if (bdrv_pwrite(bs->file, cluster_offset + i * 512,
369 370 371
                                            s->cluster_data, 512) != 512)
                                return -1;
                        }
B
bellard 已提交
372 373
                    }
                }
374 375 376
            } else if (allocate == 2) {
                cluster_offset |= QCOW_OFLAG_COMPRESSED |
                    (uint64_t)compressed_size << (63 - s->cluster_bits);
B
bellard 已提交
377 378 379 380 381
            }
        }
        /* update L2 table */
        tmp = cpu_to_be64(cluster_offset);
        l2_table[l2_index] = tmp;
382 383
        if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
                &tmp, sizeof(tmp)) < 0)
B
bellard 已提交
384 385 386 387 388
            return 0;
    }
    return cluster_offset;
}

389 390
static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, int *pnum)
B
bellard 已提交
391 392 393 394 395
{
    BDRVQcowState *s = bs->opaque;
    int index_in_cluster, n;
    uint64_t cluster_offset;

396
    qemu_co_mutex_lock(&s->lock);
B
bellard 已提交
397
    cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
398
    qemu_co_mutex_unlock(&s->lock);
B
bellard 已提交
399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
    index_in_cluster = sector_num & (s->cluster_sectors - 1);
    n = s->cluster_sectors - index_in_cluster;
    if (n > nb_sectors)
        n = nb_sectors;
    *pnum = n;
    return (cluster_offset != 0);
}

static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
                             const uint8_t *buf, int buf_size)
{
    z_stream strm1, *strm = &strm1;
    int ret, out_len;

    memset(strm, 0, sizeof(*strm));

    strm->next_in = (uint8_t *)buf;
    strm->avail_in = buf_size;
    strm->next_out = out_buf;
    strm->avail_out = out_buf_size;

    ret = inflateInit2(strm, -12);
    if (ret != Z_OK)
        return -1;
    ret = inflate(strm, Z_FINISH);
    out_len = strm->next_out - out_buf;
    if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
        out_len != out_buf_size) {
        inflateEnd(strm);
        return -1;
    }
    inflateEnd(strm);
    return 0;
}
433

434
static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
B
bellard 已提交
435
{
436
    BDRVQcowState *s = bs->opaque;
B
bellard 已提交
437 438 439 440 441 442 443
    int ret, csize;
    uint64_t coffset;

    coffset = cluster_offset & s->cluster_offset_mask;
    if (s->cluster_cache_offset != coffset) {
        csize = cluster_offset >> (63 - s->cluster_bits);
        csize &= (s->cluster_size - 1);
444
        ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
445
        if (ret != csize)
B
bellard 已提交
446 447 448 449 450 451 452 453 454 455
            return -1;
        if (decompress_buffer(s->cluster_cache, s->cluster_size,
                              s->cluster_data, csize) < 0) {
            return -1;
        }
        s->cluster_cache_offset = coffset;
    }
    return 0;
}

456
static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
F
Frediano Ziglio 已提交
457
                         int nb_sectors, QEMUIOVector *qiov)
B
bellard 已提交
458 459 460
{
    BDRVQcowState *s = bs->opaque;
    int index_in_cluster;
F
Frediano Ziglio 已提交
461
    int ret = 0, n;
462
    uint64_t cluster_offset;
F
Frediano Ziglio 已提交
463 464
    struct iovec hd_iov;
    QEMUIOVector hd_qiov;
F
Frediano Ziglio 已提交
465 466
    uint8_t *buf;
    void *orig_buf;
B
bellard 已提交
467

F
Frediano Ziglio 已提交
468 469 470 471 472
    if (qiov->niov > 1) {
        buf = orig_buf = qemu_blockalign(bs, qiov->size);
    } else {
        orig_buf = NULL;
        buf = (uint8_t *)qiov->iov->iov_base;
B
bellard 已提交
473
    }
474

F
Frediano Ziglio 已提交
475 476 477 478 479 480 481 482 483 484 485
    qemu_co_mutex_lock(&s->lock);

    while (nb_sectors != 0) {
        /* prepare next request */
        cluster_offset = get_cluster_offset(bs, sector_num << 9,
                                                 0, 0, 0, 0);
        index_in_cluster = sector_num & (s->cluster_sectors - 1);
        n = s->cluster_sectors - index_in_cluster;
        if (n > nb_sectors) {
            n = nb_sectors;
        }
486

F
Frediano Ziglio 已提交
487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
        if (!cluster_offset) {
            if (bs->backing_hd) {
                /* read from the base image */
                hd_iov.iov_base = (void *)buf;
                hd_iov.iov_len = n * 512;
                qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
                qemu_co_mutex_unlock(&s->lock);
                ret = bdrv_co_readv(bs->backing_hd, sector_num,
                                    n, &hd_qiov);
                qemu_co_mutex_lock(&s->lock);
                if (ret < 0) {
                    goto fail;
                }
            } else {
                /* Note: in this case, no need to wait */
                memset(buf, 0, 512 * n);
            }
        } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
            /* add AIO support for compressed blocks ? */
            if (decompress_cluster(bs, cluster_offset) < 0) {
                goto fail;
            }
            memcpy(buf,
                   s->cluster_cache + index_in_cluster * 512, 512 * n);
        } else {
            if ((cluster_offset & 511) != 0) {
                goto fail;
            }
            hd_iov.iov_base = (void *)buf;
F
Frediano Ziglio 已提交
516 517
            hd_iov.iov_len = n * 512;
            qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
K
Kevin Wolf 已提交
518
            qemu_co_mutex_unlock(&s->lock);
F
Frediano Ziglio 已提交
519 520
            ret = bdrv_co_readv(bs->file,
                                (cluster_offset >> 9) + index_in_cluster,
F
Frediano Ziglio 已提交
521
                                n, &hd_qiov);
K
Kevin Wolf 已提交
522 523
            qemu_co_mutex_lock(&s->lock);
            if (ret < 0) {
F
Frediano Ziglio 已提交
524 525 526 527 528 529
                break;
            }
            if (s->crypt_method) {
                encrypt_sectors(s, sector_num, buf, buf,
                                n, 0,
                                &s->aes_decrypt_key);
530 531
            }
        }
F
Frediano Ziglio 已提交
532
        ret = 0;
533

F
Frediano Ziglio 已提交
534 535 536
        nb_sectors -= n;
        sector_num += n;
        buf += n * 512;
537 538
    }

F
Frediano Ziglio 已提交
539
done:
K
Kevin Wolf 已提交
540 541
    qemu_co_mutex_unlock(&s->lock);

F
Frediano Ziglio 已提交
542
    if (qiov->niov > 1) {
543
        qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size);
F
Frediano Ziglio 已提交
544
        qemu_vfree(orig_buf);
K
Kevin Wolf 已提交
545 546
    }

K
Kevin Wolf 已提交
547
    return ret;
F
Frediano Ziglio 已提交
548 549 550 551

fail:
    ret = -EIO;
    goto done;
B
bellard 已提交
552 553
}

554
static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
F
Frediano Ziglio 已提交
555
                          int nb_sectors, QEMUIOVector *qiov)
B
bellard 已提交
556 557 558 559 560
{
    BDRVQcowState *s = bs->opaque;
    int index_in_cluster;
    uint64_t cluster_offset;
    const uint8_t *src_buf;
F
Frediano Ziglio 已提交
561
    int ret = 0, n;
F
Frediano Ziglio 已提交
562 563 564
    uint8_t *cluster_data = NULL;
    struct iovec hd_iov;
    QEMUIOVector hd_qiov;
F
Frediano Ziglio 已提交
565 566
    uint8_t *buf;
    void *orig_buf;
567

F
Frediano Ziglio 已提交
568
    s->cluster_cache_offset = -1; /* disable compressed cache */
569

F
Frediano Ziglio 已提交
570 571
    if (qiov->niov > 1) {
        buf = orig_buf = qemu_blockalign(bs, qiov->size);
572
        qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
B
bellard 已提交
573
    } else {
F
Frediano Ziglio 已提交
574 575
        orig_buf = NULL;
        buf = (uint8_t *)qiov->iov->iov_base;
B
bellard 已提交
576
    }
577

K
Kevin Wolf 已提交
578
    qemu_co_mutex_lock(&s->lock);
579

F
Frediano Ziglio 已提交
580
    while (nb_sectors != 0) {
B
bellard 已提交
581

F
Frediano Ziglio 已提交
582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603
        index_in_cluster = sector_num & (s->cluster_sectors - 1);
        n = s->cluster_sectors - index_in_cluster;
        if (n > nb_sectors) {
            n = nb_sectors;
        }
        cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
                                            index_in_cluster,
                                            index_in_cluster + n);
        if (!cluster_offset || (cluster_offset & 511) != 0) {
            ret = -EIO;
            break;
        }
        if (s->crypt_method) {
            if (!cluster_data) {
                cluster_data = g_malloc0(s->cluster_size);
            }
            encrypt_sectors(s, sector_num, cluster_data, buf,
                            n, 1, &s->aes_encrypt_key);
            src_buf = cluster_data;
        } else {
            src_buf = buf;
        }
B
bellard 已提交
604

F
Frediano Ziglio 已提交
605 606 607 608 609 610 611 612 613 614 615 616
        hd_iov.iov_base = (void *)src_buf;
        hd_iov.iov_len = n * 512;
        qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
        qemu_co_mutex_unlock(&s->lock);
        ret = bdrv_co_writev(bs->file,
                             (cluster_offset >> 9) + index_in_cluster,
                             n, &hd_qiov);
        qemu_co_mutex_lock(&s->lock);
        if (ret < 0) {
            break;
        }
        ret = 0;
617

F
Frediano Ziglio 已提交
618 619 620 621
        nb_sectors -= n;
        sector_num += n;
        buf += n * 512;
    }
K
Kevin Wolf 已提交
622
    qemu_co_mutex_unlock(&s->lock);
623

F
Frediano Ziglio 已提交
624 625
    if (qiov->niov > 1) {
        qemu_vfree(orig_buf);
K
Kevin Wolf 已提交
626
    }
627
    g_free(cluster_data);
K
Kevin Wolf 已提交
628

K
Kevin Wolf 已提交
629
    return ret;
B
bellard 已提交
630 631
}

B
bellard 已提交
632
static void qcow_close(BlockDriverState *bs)
B
bellard 已提交
633 634
{
    BDRVQcowState *s = bs->opaque;
K
Kevin Wolf 已提交
635

636 637 638 639
    g_free(s->l1_table);
    g_free(s->l2_cache);
    g_free(s->cluster_cache);
    g_free(s->cluster_data);
K
Kevin Wolf 已提交
640 641 642

    migrate_del_blocker(s->migration_blocker);
    error_free(s->migration_blocker);
B
bellard 已提交
643 644
}

645
static int qcow_create(const char *filename, QEMUOptionParameter *options)
B
bellard 已提交
646
{
647
    int header_size, backing_filename_len, l1_size, shift, i;
B
bellard 已提交
648
    QCowHeader header;
649
    uint8_t *tmp;
650 651 652
    int64_t total_size = 0;
    const char *backing_file = NULL;
    int flags = 0;
653
    int ret;
654
    BlockDriverState *qcow_bs;
655 656 657 658 659 660 661 662 663 664 665 666

    /* Read out options */
    while (options && options->name) {
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
            total_size = options->value.n / 512;
        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
            backing_file = options->value.s;
        } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
            flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
        }
        options++;
    }
B
bellard 已提交
667

668 669 670 671 672 673 674 675 676 677 678 679 680 681 682
    ret = bdrv_create_file(filename, options);
    if (ret < 0) {
        return ret;
    }

    ret = bdrv_file_open(&qcow_bs, filename, BDRV_O_RDWR);
    if (ret < 0) {
        return ret;
    }

    ret = bdrv_truncate(qcow_bs, 0);
    if (ret < 0) {
        goto exit;
    }

B
bellard 已提交
683 684 685 686 687 688 689
    memset(&header, 0, sizeof(header));
    header.magic = cpu_to_be32(QCOW_MAGIC);
    header.version = cpu_to_be32(QCOW_VERSION);
    header.size = cpu_to_be64(total_size * 512);
    header_size = sizeof(header);
    backing_filename_len = 0;
    if (backing_file) {
A
aurel32 已提交
690 691 692 693 694 695 696 697 698
        if (strcmp(backing_file, "fat:")) {
            header.backing_file_offset = cpu_to_be64(header_size);
            backing_filename_len = strlen(backing_file);
            header.backing_file_size = cpu_to_be32(backing_filename_len);
            header_size += backing_filename_len;
        } else {
            /* special backing file for vvfat */
            backing_file = NULL;
        }
B
bellard 已提交
699 700 701 702 703 704 705 706 707 708 709 710
        header.cluster_bits = 9; /* 512 byte cluster to avoid copying
                                    unmodifyed sectors */
        header.l2_bits = 12; /* 32 KB L2 tables */
    } else {
        header.cluster_bits = 12; /* 4 KB clusters */
        header.l2_bits = 9; /* 4 KB L2 tables */
    }
    header_size = (header_size + 7) & ~7;
    shift = header.cluster_bits + header.l2_bits;
    l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;

    header.l1_table_offset = cpu_to_be64(header_size);
711
    if (flags & BLOCK_FLAG_ENCRYPT) {
B
bellard 已提交
712 713 714 715
        header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
    } else {
        header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
    }
716

B
bellard 已提交
717
    /* write all the data */
718
    ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
719 720 721 722
    if (ret != sizeof(header)) {
        goto exit;
    }

B
bellard 已提交
723
    if (backing_file) {
724 725
        ret = bdrv_pwrite(qcow_bs, sizeof(header),
            backing_file, backing_filename_len);
726 727 728
        if (ret != backing_filename_len) {
            goto exit;
        }
B
bellard 已提交
729
    }
730 731 732 733 734 735 736 737

    tmp = g_malloc0(BDRV_SECTOR_SIZE);
    for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
        BDRV_SECTOR_SIZE); i++) {
        ret = bdrv_pwrite(qcow_bs, header_size +
            BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
        if (ret != BDRV_SECTOR_SIZE) {
            g_free(tmp);
738 739
            goto exit;
        }
B
bellard 已提交
740
    }
741

742
    g_free(tmp);
743 744
    ret = 0;
exit:
745
    bdrv_delete(qcow_bs);
746
    return ret;
B
bellard 已提交
747 748
}

B
bellard 已提交
749
static int qcow_make_empty(BlockDriverState *bs)
750 751 752
{
    BDRVQcowState *s = bs->opaque;
    uint32_t l1_length = s->l1_size * sizeof(uint64_t);
B
bellard 已提交
753
    int ret;
754 755

    memset(s->l1_table, 0, l1_length);
756 757 758
    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
            l1_length) < 0)
        return -1;
759
    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
B
bellard 已提交
760 761
    if (ret < 0)
        return ret;
762 763 764 765 766 767 768 769

    memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
    memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
    memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));

    return 0;
}

B
bellard 已提交
770 771
/* XXX: put compressed sectors first, then all the cluster aligned
   tables to avoid losing bytes in alignment */
772
static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
B
bellard 已提交
773
                                 const uint8_t *buf, int nb_sectors)
B
bellard 已提交
774 775 776 777 778 779 780
{
    BDRVQcowState *s = bs->opaque;
    z_stream strm;
    int ret, out_len;
    uint8_t *out_buf;
    uint64_t cluster_offset;

B
bellard 已提交
781 782
    if (nb_sectors != s->cluster_sectors)
        return -EINVAL;
B
bellard 已提交
783

784
    out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
B
bellard 已提交
785 786 787 788

    /* best compression, small window, no zlib header */
    memset(&strm, 0, sizeof(strm));
    ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
789
                       Z_DEFLATED, -12,
B
bellard 已提交
790 791
                       9, Z_DEFAULT_STRATEGY);
    if (ret != 0) {
792 793
        ret = -EINVAL;
        goto fail;
B
bellard 已提交
794 795 796 797 798 799 800 801 802 803
    }

    strm.avail_in = s->cluster_size;
    strm.next_in = (uint8_t *)buf;
    strm.avail_out = s->cluster_size;
    strm.next_out = out_buf;

    ret = deflate(&strm, Z_FINISH);
    if (ret != Z_STREAM_END && ret != Z_OK) {
        deflateEnd(&strm);
804 805
        ret = -EINVAL;
        goto fail;
B
bellard 已提交
806 807 808 809 810 811 812
    }
    out_len = strm.next_out - out_buf;

    deflateEnd(&strm);

    if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
        /* could not compress: write normal cluster */
813 814 815 816
        ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
        if (ret < 0) {
            goto fail;
        }
B
bellard 已提交
817
    } else {
818
        cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
B
bellard 已提交
819
                                            out_len, 0, 0);
820 821 822 823 824
        if (cluster_offset == 0) {
            ret = -EIO;
            goto fail;
        }

B
bellard 已提交
825
        cluster_offset &= s->cluster_offset_mask;
826 827 828
        ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
        if (ret < 0) {
            goto fail;
B
bellard 已提交
829 830
        }
    }
831

832 833
    ret = 0;
fail:
834
    g_free(out_buf);
835
    return ret;
B
bellard 已提交
836 837
}

B
bellard 已提交
838 839 840 841 842 843 844
static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
    BDRVQcowState *s = bs->opaque;
    bdi->cluster_size = s->cluster_size;
    return 0;
}

845 846

static QEMUOptionParameter qcow_create_options[] = {
847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
    {
        .name = BLOCK_OPT_SIZE,
        .type = OPT_SIZE,
        .help = "Virtual disk size"
    },
    {
        .name = BLOCK_OPT_BACKING_FILE,
        .type = OPT_STRING,
        .help = "File name of a base image"
    },
    {
        .name = BLOCK_OPT_ENCRYPT,
        .type = OPT_FLAG,
        .help = "Encrypt the image"
    },
862 863 864
    { NULL }
};

865
static BlockDriver bdrv_qcow = {
866 867 868 869 870 871
    .format_name	= "qcow",
    .instance_size	= sizeof(BDRVQcowState),
    .bdrv_probe		= qcow_probe,
    .bdrv_open		= qcow_open,
    .bdrv_close		= qcow_close,
    .bdrv_create	= qcow_create,
872 873 874

    .bdrv_co_readv          = qcow_co_readv,
    .bdrv_co_writev         = qcow_co_writev,
875
    .bdrv_co_is_allocated   = qcow_co_is_allocated,
876 877 878 879 880

    .bdrv_set_key           = qcow_set_key,
    .bdrv_make_empty        = qcow_make_empty,
    .bdrv_write_compressed  = qcow_write_compressed,
    .bdrv_get_info          = qcow_get_info,
881 882

    .create_options = qcow_create_options,
B
bellard 已提交
883
};
884 885 886 887 888 889 890

static void bdrv_qcow_init(void)
{
    bdrv_register(&bdrv_qcow);
}

block_init(bdrv_qcow_init);