提交 8eb779e4 编写于 作者: P Peter Maydell

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer patches

# gpg: Signature made Mon 22 Feb 2016 15:59:25 GMT using RSA key ID C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"

* remotes/kevin/tags/for-upstream: (34 commits)
  qemu-iotests: 140: make description slightly more verbose
  qemu-iotests: 140: don't use IDE device
  qemu-iotests: 067: ignore QMP events
  blockdev: unset inappropriate flags when changing medium
  MAINTAINERS: Add myself as maintainer of the throttling code
  docs: Document the throttling infrastructure
  qapi: Correct the name of the iops_rd parameter
  qemu-iotests: Extend iotest 093 to test bursts
  throttle: Test throttle_compute_wait() during bursts
  throttle: Check that burst_level leaks correctly
  qapi: Add burst length fields to BlockDeviceInfo
  qapi: Add burst length parameters to block_set_io_throttle
  throttle: Add command-line settings to define the burst periods
  throttle: Add support for burst periods
  throttle: Use throttle_config_init() to initialize ThrottleConfig
  throttle: Merge all functions that check the configuration into one
  throttle: Set always an average value when setting a maximum value
  throttle: Make throttle_is_valid() set errp
  throttle: Make throttle_max_is_missing_limit() set errp
  throttle: Make throttle_conflicting() set errp
  ...
Signed-off-by: NPeter Maydell <peter.maydell@linaro.org>
......@@ -1283,6 +1283,15 @@ S: Maintained
F: include/qemu/sockets.h
F: util/qemu-sockets.c
Throttling infrastructure
M: Alberto Garcia <berto@igalia.com>
S: Supported
F: block/throttle-groups.c
F: include/block/throttle-groups.h
F: include/qemu/throttle.h
F: util/throttle.c
L: qemu-block@nongnu.org
Usermode Emulation
------------------
Overall
......
......@@ -1191,10 +1191,6 @@ static int bdrv_fill_options(QDict **options, const char *filename,
}
}
if (runstate_check(RUN_STATE_INMIGRATE)) {
*flags |= BDRV_O_INACTIVE;
}
return 0;
}
......
......@@ -92,6 +92,26 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
info->has_iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max;
info->iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max;
info->has_bps_max_length = info->has_bps_max;
info->bps_max_length =
cfg.buckets[THROTTLE_BPS_TOTAL].burst_length;
info->has_bps_rd_max_length = info->has_bps_rd_max;
info->bps_rd_max_length =
cfg.buckets[THROTTLE_BPS_READ].burst_length;
info->has_bps_wr_max_length = info->has_bps_wr_max;
info->bps_wr_max_length =
cfg.buckets[THROTTLE_BPS_WRITE].burst_length;
info->has_iops_max_length = info->has_iops_max;
info->iops_max_length =
cfg.buckets[THROTTLE_OPS_TOTAL].burst_length;
info->has_iops_rd_max_length = info->has_iops_rd_max;
info->iops_rd_max_length =
cfg.buckets[THROTTLE_OPS_READ].burst_length;
info->has_iops_wr_max_length = info->has_iops_wr_max;
info->iops_wr_max_length =
cfg.buckets[THROTTLE_OPS_WRITE].burst_length;
info->has_iops_size = cfg.op_size;
info->iops_size = cfg.op_size;
......
......@@ -286,7 +286,8 @@ static void quorum_aio_cb(void *opaque, int ret)
if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) {
/* We try to read next child in FIFO order if we fail to read */
if (ret < 0 && ++acb->child_iter < s->num_children) {
if (ret < 0 && (acb->child_iter + 1) < s->num_children) {
acb->child_iter++;
read_fifo_child(acb);
return;
}
......
......@@ -343,29 +343,6 @@ static bool parse_stats_intervals(BlockAcctStats *stats, QList *intervals,
return true;
}
static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
{
if (throttle_conflicting(cfg)) {
error_setg(errp, "bps/iops/max total values and read/write values"
" cannot be used at the same time");
return false;
}
if (!throttle_is_valid(cfg)) {
error_setg(errp, "bps/iops/max values must be within [0, %lld]",
THROTTLE_VALUE_MAX);
return false;
}
if (throttle_max_is_missing_limit(cfg)) {
error_setg(errp, "bps_max/iops_max require corresponding"
" bps/iops values");
return false;
}
return true;
}
typedef enum { MEDIA_DISK, MEDIA_CDROM } DriveMediaType;
/* All parameters but @opts are optional and may be set to NULL. */
......@@ -410,7 +387,7 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
}
if (throttle_cfg) {
memset(throttle_cfg, 0, sizeof(*throttle_cfg));
throttle_config_init(throttle_cfg);
throttle_cfg->buckets[THROTTLE_BPS_TOTAL].avg =
qemu_opt_get_number(opts, "throttling.bps-total", 0);
throttle_cfg->buckets[THROTTLE_BPS_READ].avg =
......@@ -437,10 +414,23 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
throttle_cfg->buckets[THROTTLE_OPS_WRITE].max =
qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
throttle_cfg->buckets[THROTTLE_BPS_TOTAL].burst_length =
qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1);
throttle_cfg->buckets[THROTTLE_BPS_READ].burst_length =
qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1);
throttle_cfg->buckets[THROTTLE_BPS_WRITE].burst_length =
qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1);
throttle_cfg->buckets[THROTTLE_OPS_TOTAL].burst_length =
qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1);
throttle_cfg->buckets[THROTTLE_OPS_READ].burst_length =
qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1);
throttle_cfg->buckets[THROTTLE_OPS_WRITE].burst_length =
qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1);
throttle_cfg->op_size =
qemu_opt_get_number(opts, "throttling.iops-size", 0);
if (!check_throttle_config(throttle_cfg, errp)) {
if (!throttle_is_valid(throttle_cfg, errp)) {
return;
}
}
......@@ -610,6 +600,10 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
qdict_put(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, qstring_from_str("on"));
}
if (runstate_check(RUN_STATE_INMIGRATE)) {
bdrv_flags |= BDRV_O_INACTIVE;
}
blk = blk_new_open(qemu_opts_id(opts), file, NULL, bs_opts, bdrv_flags,
errp);
if (!blk) {
......@@ -688,6 +682,10 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
goto fail;
}
if (runstate_check(RUN_STATE_INMIGRATE)) {
bdrv_flags |= BDRV_O_INACTIVE;
}
bs = NULL;
ret = bdrv_open(&bs, NULL, NULL, bs_opts, bdrv_flags, errp);
if (ret < 0) {
......@@ -2515,6 +2513,8 @@ void qmp_blockdev_change_medium(const char *device, const char *filename,
}
bdrv_flags = blk_get_open_flags_from_root_state(blk);
bdrv_flags &= ~(BDRV_O_TEMPORARY | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING |
BDRV_O_PROTOCOL);
if (!has_read_only) {
read_only = BLOCKDEV_CHANGE_READ_ONLY_MODE_RETAIN;
......@@ -2600,6 +2600,18 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
int64_t iops_rd_max,
bool has_iops_wr_max,
int64_t iops_wr_max,
bool has_bps_max_length,
int64_t bps_max_length,
bool has_bps_rd_max_length,
int64_t bps_rd_max_length,
bool has_bps_wr_max_length,
int64_t bps_wr_max_length,
bool has_iops_max_length,
int64_t iops_max_length,
bool has_iops_rd_max_length,
int64_t iops_rd_max_length,
bool has_iops_wr_max_length,
int64_t iops_wr_max_length,
bool has_iops_size,
int64_t iops_size,
bool has_group,
......@@ -2626,7 +2638,7 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
goto out;
}
memset(&cfg, 0, sizeof(cfg));
throttle_config_init(&cfg);
cfg.buckets[THROTTLE_BPS_TOTAL].avg = bps;
cfg.buckets[THROTTLE_BPS_READ].avg = bps_rd;
cfg.buckets[THROTTLE_BPS_WRITE].avg = bps_wr;
......@@ -2654,11 +2666,30 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
cfg.buckets[THROTTLE_OPS_WRITE].max = iops_wr_max;
}
if (has_bps_max_length) {
cfg.buckets[THROTTLE_BPS_TOTAL].burst_length = bps_max_length;
}
if (has_bps_rd_max_length) {
cfg.buckets[THROTTLE_BPS_READ].burst_length = bps_rd_max_length;
}
if (has_bps_wr_max_length) {
cfg.buckets[THROTTLE_BPS_WRITE].burst_length = bps_wr_max_length;
}
if (has_iops_max_length) {
cfg.buckets[THROTTLE_OPS_TOTAL].burst_length = iops_max_length;
}
if (has_iops_rd_max_length) {
cfg.buckets[THROTTLE_OPS_READ].burst_length = iops_rd_max_length;
}
if (has_iops_wr_max_length) {
cfg.buckets[THROTTLE_OPS_WRITE].burst_length = iops_wr_max_length;
}
if (has_iops_size) {
cfg.op_size = iops_size;
}
if (!check_throttle_config(&cfg, errp)) {
if (!throttle_is_valid(&cfg, errp)) {
goto out;
}
......@@ -4086,6 +4117,30 @@ QemuOptsList qemu_common_drive_opts = {
.name = "throttling.bps-write-max",
.type = QEMU_OPT_NUMBER,
.help = "total bytes write burst",
},{
.name = "throttling.iops-total-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the iops-total-max burst period, in seconds",
},{
.name = "throttling.iops-read-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the iops-read-max burst period, in seconds",
},{
.name = "throttling.iops-write-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the iops-write-max burst period, in seconds",
},{
.name = "throttling.bps-total-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the bps-total-max burst period, in seconds",
},{
.name = "throttling.bps-read-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the bps-read-max burst period, in seconds",
},{
.name = "throttling.bps-write-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the bps-write-max burst period, in seconds",
},{
.name = "throttling.iops-size",
.type = QEMU_OPT_NUMBER,
......
......@@ -103,7 +103,18 @@ in the description of a field.
write to an image with unknown auto-clear features if it
clears the respective bits from this field first.
Bits 0-63: Reserved (set to 0)
Bit 0: Bitmaps extension bit
This bit indicates consistency for the bitmaps
extension data.
It is an error if this bit is set without the
bitmaps extension present.
If the bitmaps extension is present but this
bit is unset, the bitmaps extension data must be
considered inconsistent.
Bits 1-63: Reserved (set to 0)
96 - 99: refcount_order
Describes the width of a reference count block entry (width
......@@ -123,6 +134,7 @@ be stored. Each extension has a structure like the following:
0x00000000 - End of the header extension area
0xE2792ACA - Backing file format name
0x6803f857 - Feature name table
0x23852875 - Bitmaps extension
other - Unknown header extension, can be safely
ignored
......@@ -166,6 +178,36 @@ the header extension data. Each entry look like this:
terminated if it has full length)
== Bitmaps extension ==
The bitmaps extension is an optional header extension. It provides the ability
to store bitmaps related to a virtual disk. For now, there is only one bitmap
type: the dirty tracking bitmap, which tracks virtual disk changes from some
point in time.
The data of the extension should be considered consistent only if the
corresponding auto-clear feature bit is set, see autoclear_features above.
The fields of the bitmaps extension are:
Byte 0 - 3: nb_bitmaps
The number of bitmaps contained in the image. Must be
greater than or equal to 1.
Note: Qemu currently only supports up to 65535 bitmaps per
image.
4 - 7: Reserved, must be zero.
8 - 15: bitmap_directory_size
Size of the bitmap directory in bytes. It is the cumulative
size of all (nb_bitmaps) bitmap headers.
16 - 23: bitmap_directory_offset
Offset into the image file at which the bitmap directory
starts. Must be aligned to a cluster boundary.
== Host cluster management ==
qcow2 manages the allocation of host clusters by maintaining a reference count
......@@ -360,3 +402,180 @@ Snapshot table entry:
variable: Padding to round up the snapshot table entry size to the
next multiple of 8.
== Bitmaps ==
As mentioned above, the bitmaps extension provides the ability to store bitmaps
related to a virtual disk. This section describes how these bitmaps are stored.
All stored bitmaps are related to the virtual disk stored in the same image, so
each bitmap size is equal to the virtual disk size.
Each bit of the bitmap is responsible for strictly defined range of the virtual
disk. For bit number bit_nr the corresponding range (in bytes) will be:
[bit_nr * bitmap_granularity .. (bit_nr + 1) * bitmap_granularity - 1]
Granularity is a property of the concrete bitmap, see below.
=== Bitmap directory ===
Each bitmap saved in the image is described in a bitmap directory entry. The
bitmap directory is a contiguous area in the image file, whose starting offset
and length are given by the header extension fields bitmap_directory_offset and
bitmap_directory_size. The entries of the bitmap directory have variable
length, depending on the lengths of the bitmap name and extra data. These
entries are also called bitmap headers.
Structure of a bitmap directory entry:
Byte 0 - 7: bitmap_table_offset
Offset into the image file at which the bitmap table
(described below) for the bitmap starts. Must be aligned to
a cluster boundary.
8 - 11: bitmap_table_size
Number of entries in the bitmap table of the bitmap.
12 - 15: flags
Bit
0: in_use
The bitmap was not saved correctly and may be
inconsistent.
1: auto
The bitmap must reflect all changes of the virtual
disk by any application that would write to this qcow2
file (including writes, snapshot switching, etc.). The
type of this bitmap must be 'dirty tracking bitmap'.
2: extra_data_compatible
This flags is meaningful when the extra data is
unknown to the software (currently any extra data is
unknown to Qemu).
If it is set, the bitmap may be used as expected, extra
data must be left as is.
If it is not set, the bitmap must not be used, but
both it and its extra data be left as is.
Bits 3 - 31 are reserved and must be 0.
16: type
This field describes the sort of the bitmap.
Values:
1: Dirty tracking bitmap
Values 0, 2 - 255 are reserved.
17: granularity_bits
Granularity bits. Valid values: 0 - 63.
Note: Qemu currently doesn't support granularity_bits
greater than 31.
Granularity is calculated as
granularity = 1 << granularity_bits
A bitmap's granularity is how many bytes of the image
accounts for one bit of the bitmap.
18 - 19: name_size
Size of the bitmap name. Must be non-zero.
Note: Qemu currently doesn't support values greater than
1023.
20 - 23: extra_data_size
Size of type-specific extra data.
For now, as no extra data is defined, extra_data_size is
reserved and should be zero. If it is non-zero the
behavior is defined by extra_data_compatible flag.
variable: extra_data
Extra data for the bitmap, occupying extra_data_size bytes.
Extra data must never contain references to clusters or in
some other way allocate additional clusters.
variable: name
The name of the bitmap (not null terminated), occupying
name_size bytes. Must be unique among all bitmap names
within the bitmaps extension.
variable: Padding to round up the bitmap directory entry size to the
next multiple of 8. All bytes of the padding must be zero.
=== Bitmap table ===
Each bitmap is stored using a one-level structure (as opposed to two-level
structures like for refcounts and guest clusters mapping) for the mapping of
bitmap data to host clusters. This structure is called the bitmap table.
Each bitmap table has a variable size (stored in the bitmap directory entry)
and may use multiple clusters, however, it must be contiguous in the image
file.
Structure of a bitmap table entry:
Bit 0: Reserved and must be zero if bits 9 - 55 are non-zero.
If bits 9 - 55 are zero:
0: Cluster should be read as all zeros.
1: Cluster should be read as all ones.
1 - 8: Reserved and must be zero.
9 - 55: Bits 9 - 55 of the host cluster offset. Must be aligned to
a cluster boundary. If the offset is 0, the cluster is
unallocated; in that case, bit 0 determines how this
cluster should be treated during reads.
56 - 63: Reserved and must be zero.
=== Bitmap data ===
As noted above, bitmap data is stored in separate clusters, described by the
bitmap table. Given an offset (in bytes) into the bitmap data, the offset into
the image file can be obtained as follows:
image_offset(bitmap_data_offset) =
bitmap_table[bitmap_data_offset / cluster_size] +
(bitmap_data_offset % cluster_size)
This offset is not defined if bits 9 - 55 of bitmap table entry are zero (see
above).
Given an offset byte_nr into the virtual disk and the bitmap's granularity, the
bit offset into the image file to the corresponding bit of the bitmap can be
calculated like this:
bit_offset(byte_nr) =
image_offset(byte_nr / granularity / 8) * 8 +
(byte_nr / granularity) % 8
If the size of the bitmap data is not a multiple of the cluster size then the
last cluster of the bitmap data contains some unused tail bits. These bits must
be zero.
=== Dirty tracking bitmaps ===
Bitmaps with 'type' field equal to one are dirty tracking bitmaps.
When the virtual disk is in use dirty tracking bitmap may be 'enabled' or
'disabled'. While the bitmap is 'enabled', all writes to the virtual disk
should be reflected in the bitmap. A set bit in the bitmap means that the
corresponding range of the virtual disk (see above) was written to while the
bitmap was 'enabled'. An unset bit means that this range was not written to.
The software doesn't have to sync the bitmap in the image file with its
representation in RAM after each write. Flag 'in_use' should be set while the
bitmap is not synced.
In the image file the 'enabled' state is reflected by the 'auto' flag. If this
flag is set, the software must consider the bitmap as 'enabled' and start
tracking virtual disk changes to this bitmap from the first write to the
virtual disk. If this flag is not set then the bitmap is disabled.
The QEMU throttling infrastructure
==================================
Copyright (C) 2016 Igalia, S.L.
Author: Alberto Garcia <berto@igalia.com>
This work is licensed under the terms of the GNU GPL, version 2 or
later. See the COPYING file in the top-level directory.
Introduction
------------
QEMU includes a throttling module that can be used to set limits to
I/O operations. The code itself is generic and independent of the I/O
units, but it is currenly used to limit the number of bytes per second
and operations per second (IOPS) when performing disk I/O.
This document explains how to use the throttling code in QEMU, and how
it works internally. The implementation is in throttle.c.
Using throttling to limit disk I/O
----------------------------------
Two aspects of the disk I/O can be limited: the number of bytes per
second and the number of operations per second (IOPS). For each one of
them the user can set a global limit or separate limits for read and
write operations. This gives us a total of six different parameters.
I/O limits can be set using the throttling.* parameters of -drive, or
using the QMP 'block_set_io_throttle' command. These are the names of
the parameters for both cases:
|-----------------------+-----------------------|
| -drive | block_set_io_throttle |
|-----------------------+-----------------------|
| throttling.iops-total | iops |
| throttling.iops-read | iops_rd |
| throttling.iops-write | iops_wr |
| throttling.bps-total | bps |
| throttling.bps-read | bps_rd |
| throttling.bps-write | bps_wr |
|-----------------------+-----------------------|
It is possible to set limits for both IOPS and bps and the same time,
and for each case we can decide whether to have separate read and
write limits or not, but note that if iops-total is set then neither
iops-read nor iops-write can be set. The same applies to bps-total and
bps-read/write.
The default value of these parameters is 0, and it means 'unlimited'.
In its most basic usage, the user can add a drive to QEMU with a limit
of 100 IOPS with the following -drive line:
-drive file=hd0.qcow2,throttling.iops-total=100
We can do the same using QMP. In this case all these parameters are
mandatory, so we must set to 0 the ones that we don't want to limit:
{ "execute": "block_set_io_throttle",
"arguments": {
"device": "virtio0",
"iops": 100,
"iops_rd": 0,
"iops_wr": 0,
"bps": 0,
"bps_rd": 0,
"bps_wr": 0
}
}
I/O bursts
----------
In addition to the basic limits we have just seen, QEMU allows the
user to do bursts of I/O for a configurable amount of time. A burst is
an amount of I/O that can exceed the basic limit. Bursts are useful to
allow better performance when there are peaks of activity (the OS
boots, a service needs to be restarted) while keeping the average
limits lower the rest of the time.
Two parameters control bursts: their length and the maximum amount of
I/O they allow. These two can be configured separately for each one of
the six basic parameters described in the previous section, but in
this section we'll use 'iops-total' as an example.
The I/O limit during bursts is set using 'iops-total-max', and the
maximum length (in seconds) is set with 'iops-total-max-length'. So if
we want to configure a drive with a basic limit of 100 IOPS and allow
bursts of 2000 IOPS for 60 seconds, we would do it like this (the line
is split for clarity):
-drive file=hd0.qcow2,
throttling.iops-total=100,
throttling.iops-total-max=2000,
throttling.iops-total-max-length=60
Or, with QMP:
{ "execute": "block_set_io_throttle",
"arguments": {
"device": "virtio0",
"iops": 100,
"iops_rd": 0,
"iops_wr": 0,
"bps": 0,
"bps_rd": 0,
"bps_wr": 0,
"iops_max": 2000,
"iops_max_length": 60,
}
}
With this, the user can perform I/O on hd0.qcow2 at a rate of 2000
IOPS for 1 minute before it's throttled down to 100 IOPS.
The user will be able to do bursts again if there's a sufficiently
long period of time with unused I/O (see below for details).
The default value for 'iops-total-max' is 0 and it means that bursts
are not allowed. 'iops-total-max-length' can only be set if
'iops-total-max' is set as well, and its default value is 1 second.
Here's the complete list of parameters for configuring bursts:
|----------------------------------+-----------------------|
| -drive | block_set_io_throttle |
|----------------------------------+-----------------------|
| throttling.iops-total-max | iops_max |
| throttling.iops-total-max-length | iops_max_length |
| throttling.iops-read-max | iops_rd_max |
| throttling.iops-read-max-length | iops_rd_max_length |
| throttling.iops-write-max | iops_wr_max |
| throttling.iops-write-max-length | iops_wr_max_length |
| throttling.bps-total-max | bps_max |
| throttling.bps-total-max-length | bps_max_length |
| throttling.bps-read-max | bps_rd_max |
| throttling.bps-read-max-length | bps_rd_max_length |
| throttling.bps-write-max | bps_wr_max |
| throttling.bps-write-max-length | bps_wr_max_length |
|----------------------------------+-----------------------|
Controlling the size of I/O operations
--------------------------------------
When applying IOPS limits all I/O operations are treated equally
regardless of their size. This means that the user can take advantage
of this in order to circumvent the limits and submit one huge I/O
request instead of several smaller ones.
QEMU provides a setting called throttling.iops-size to prevent this
from happening. This setting specifies the size (in bytes) of an I/O
request for accounting purposes. Larger requests will be counted
proportionally to this size.
For example, if iops-size is set to 4096 then an 8KB request will be
counted as two, and a 6KB request will be counted as one and a
half. This only applies to requests larger than iops-size: smaller
requests will be always counted as one, no matter their size.
The default value of iops-size is 0 and it means that the size of the
requests is never taken into account when applying IOPS limits.
Applying I/O limits to groups of disks
--------------------------------------
In all the examples so far we have seen how to apply limits to the I/O
performed on individual drives, but QEMU allows grouping drives so
they all share the same limits.
The way it works is that each drive with I/O limits is assigned to a
group named using the throttling.group parameter. If this parameter is
not specified, then the device name (i.e. 'virtio0', 'ide0-hd0') will
be used as the group name.
Limits set using the throttling.* parameters discussed earlier in this
document apply to the combined I/O of all members of a group.
Consider this example:
-drive file=hd1.qcow2,throttling.iops-total=6000,throttling.group=foo
-drive file=hd2.qcow2,throttling.iops-total=6000,throttling.group=foo
-drive file=hd3.qcow2,throttling.iops-total=3000,throttling.group=bar
-drive file=hd4.qcow2,throttling.iops-total=6000,throttling.group=foo
-drive file=hd5.qcow2,throttling.iops-total=3000,throttling.group=bar
-drive file=hd6.qcow2,throttling.iops-total=5000
Here hd1, hd2 and hd4 are all members of a group named 'foo' with a
combined IOPS limit of 6000, and hd3 and hd5 are members of 'bar'. hd6
is left alone (technically it is part of a 1-member group).
Limits are applied in a round-robin fashion so if there are concurrent
I/O requests on several drives of the same group they will be
distributed evenly.
When I/O limits are applied to an existing drive using the QMP command
'block_set_io_throttle', the following things need to be taken into
account:
- I/O limits are shared within the same group, so new values will
affect all members and overwrite the previous settings. In other
words: if different limits are applied to members of the same
group, the last one wins.
- If 'group' is unset it is assumed to be the current group of that
drive. If the drive is not in a group yet, it will be added to a
group named after the device name.
- If 'group' is set then the drive will be moved to that group if
it was member of a different one. In this case the limits
specified in the parameters will be applied to the new group
only.
- I/O limits can be disabled by setting all of them to 0. In this
case the device will be removed from its group and the rest of
its members will not be affected. The 'group' parameter is
ignored.
The Leaky Bucket algorithm
--------------------------
I/O limits in QEMU are implemented using the leaky bucket algorithm
(specifically the "Leaky bucket as a meter" variant).
This algorithm uses the analogy of a bucket that leaks water
constantly. The water that gets into the bucket represents the I/O
that has been performed, and no more I/O is allowed once the bucket is
full.
To see the way this corresponds to the throttling parameters in QEMU,
consider the following values:
iops-total=100
iops-total-max=2000
iops-total-max-length=60
- Water leaks from the bucket at a rate of 100 IOPS.
- Water can be added to the bucket at a rate of 2000 IOPS.
- The size of the bucket is 2000 x 60 = 120000
- If 'iops-total-max-length' is unset then the bucket size is 100.
The bucket is initially empty, therefore water can be added until it's
full at a rate of 2000 IOPS (the burst rate). Once the bucket is full
we can only add as much water as it leaks, therefore the I/O rate is
reduced to 100 IOPS. If we add less water than it leaks then the
bucket will start to empty, allowing for bursts again.
Note that since water is leaking from the bucket even during bursts,
it will take a bit more than 60 seconds at 2000 IOPS to fill it
up. After those 60 seconds the bucket will have leaked 60 x 100 =
6000, allowing for 3 more seconds of I/O at 2000 IOPS.
Also, due to the way the algorithm works, longer burst can be done at
a lower I/O rate, e.g. 1000 IOPS during 120 seconds.
......@@ -1414,6 +1414,18 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict)
0,
false,
0,
false, /* no burst length via HMP */
0,
false,
0,
false,
0,
false,
0,
false,
0,
false,
0,
false, /* No default I/O size */
0,
false,
......
......@@ -2,7 +2,7 @@
* QEMU throttling infrastructure
*
* Copyright (C) Nodalink, EURL. 2013-2014
* Copyright (C) Igalia, S.L. 2015
* Copyright (C) Igalia, S.L. 2015-2016
*
* Authors:
* Benoît Canet <benoit.canet@nodalink.com>
......@@ -42,16 +42,47 @@ typedef enum {
} BucketType;
/*
* The max parameter of the leaky bucket throttling algorithm can be used to
* allow the guest to do bursts.
* The max value is a pool of I/O that the guest can use without being throttled
* at all. Throttling is triggered once this pool is empty.
* This module implements I/O limits using the leaky bucket
* algorithm. The code is independent of the I/O units, but it is
* currently used for bytes per second and operations per second.
*
* Three parameters can be set by the user:
*
* - avg: the desired I/O limits in units per second.
* - max: the limit during bursts, also in units per second.
* - burst_length: the maximum length of the burst period, in seconds.
*
* Here's how it works:
*
* - The bucket level (number of performed I/O units) is kept in
* bkt.level and leaks at a rate of bkt.avg units per second.
*
* - The size of the bucket is bkt.max * bkt.burst_length. Once the
* bucket is full no more I/O is performed until the bucket leaks
* again. This is what makes the I/O rate bkt.avg.
*
* - The bkt.avg rate does not apply until the bucket is full,
* allowing the user to do bursts until then. The I/O limit during
* bursts is bkt.max. To enforce this limit we keep an additional
* bucket in bkt.burst_length that leaks at a rate of bkt.max units
* per second.
*
* - Because of all of the above, the user can perform I/O at a
* maximum of bkt.max units per second for at most bkt.burst_length
* seconds in a row. After that the bucket will be full and the I/O
* rate will go down to bkt.avg.
*
* - Since the bucket always leaks at a rate of bkt.avg, this also
* determines how much the user needs to wait before being able to
* do bursts again.
*/
typedef struct LeakyBucket {
double avg; /* average goal in units per second */
double max; /* leaky bucket max burst in units */
double level; /* bucket level in units */
double burst_level; /* bucket level in units (for computing bursts) */
unsigned burst_length; /* max length of the burst period, in seconds */
} LeakyBucket;
/* The following structure is used to configure a ThrottleState
......@@ -84,12 +115,6 @@ void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta);
int64_t throttle_compute_wait(LeakyBucket *bkt);
/* expose timer computation function for unit tests */
bool throttle_compute_timer(ThrottleState *ts,
bool is_write,
int64_t now,
int64_t *next_timestamp);
/* init/destroy cycle */
void throttle_init(ThrottleState *ts);
......@@ -112,11 +137,7 @@ bool throttle_timers_are_initialized(ThrottleTimers *tt);
/* configuration */
bool throttle_enabled(ThrottleConfig *cfg);
bool throttle_conflicting(ThrottleConfig *cfg);
bool throttle_is_valid(ThrottleConfig *cfg);
bool throttle_max_is_missing_limit(ThrottleConfig *cfg);
bool throttle_is_valid(ThrottleConfig *cfg, Error **errp);
void throttle_config(ThrottleState *ts,
ThrottleTimers *tt,
......@@ -124,6 +145,8 @@ void throttle_config(ThrottleState *ts,
void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg);
void throttle_config_init(ThrottleConfig *cfg);
/* usage */
bool throttle_schedule_timer(ThrottleState *ts,
ThrottleTimers *tt,
......
......@@ -786,6 +786,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
int64_t addr;
BlockDriverState *bs, *bs_prev = NULL;
BlockBackend *blk;
Error *local_err = NULL;
uint8_t *buf;
int64_t total_sectors = 0;
int nr_sectors;
......@@ -824,6 +825,12 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
device_name);
return -EINVAL;
}
bdrv_invalidate_cache(bs, &local_err);
if (local_err) {
error_report_err(local_err);
return -EINVAL;
}
}
if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
......
......@@ -273,17 +273,41 @@
#
# @image: the info of image used (since: 1.6)
#
# @bps_max: #optional total max in bytes (Since 1.7)
# @bps_max: #optional total throughput limit during bursts,
# in bytes (Since 1.7)
#
# @bps_rd_max: #optional read max in bytes (Since 1.7)
# @bps_rd_max: #optional read throughput limit during bursts,
# in bytes (Since 1.7)
#
# @bps_wr_max: #optional write max in bytes (Since 1.7)
# @bps_wr_max: #optional write throughput limit during bursts,
# in bytes (Since 1.7)
#
# @iops_max: #optional total I/O operations max (Since 1.7)
# @iops_max: #optional total I/O operations per second during bursts,
# in bytes (Since 1.7)
#
# @iops_rd_max: #optional read I/O operations max (Since 1.7)
# @iops_rd_max: #optional read I/O operations per second during bursts,
# in bytes (Since 1.7)
#
# @iops_wr_max: #optional write I/O operations max (Since 1.7)
# @iops_wr_max: #optional write I/O operations per second during bursts,
# in bytes (Since 1.7)
#
# @bps_max_length: #optional maximum length of the @bps_max burst
# period, in seconds. (Since 2.6)
#
# @bps_rd_max_length: #optional maximum length of the @bps_rd_max
# burst period, in seconds. (Since 2.6)
#
# @bps_wr_max_length: #optional maximum length of the @bps_wr_max
# burst period, in seconds. (Since 2.6)
#
# @iops_max_length: #optional maximum length of the @iops burst
# period, in seconds. (Since 2.6)
#
# @iops_rd_max_length: #optional maximum length of the @iops_rd_max
# burst period, in seconds. (Since 2.6)
#
# @iops_wr_max_length: #optional maximum length of the @iops_wr_max
# burst period, in seconds. (Since 2.6)
#
# @iops_size: #optional an I/O size in bytes (Since 1.7)
#
......@@ -308,6 +332,9 @@
'*bps_max': 'int', '*bps_rd_max': 'int',
'*bps_wr_max': 'int', '*iops_max': 'int',
'*iops_rd_max': 'int', '*iops_wr_max': 'int',
'*bps_max_length': 'int', '*bps_rd_max_length': 'int',
'*bps_wr_max_length': 'int', '*iops_max_length': 'int',
'*iops_rd_max_length': 'int', '*iops_wr_max_length': 'int',
'*iops_size': 'int', '*group': 'str', 'cache': 'BlockdevCacheInfo',
'write_threshold': 'int' } }
......@@ -1294,21 +1321,57 @@
#
# @iops: total I/O operations per second
#
# @ops_rd: read I/O operations per second
# @iops_rd: read I/O operations per second
#
# @iops_wr: write I/O operations per second
#
# @bps_max: #optional total max in bytes (Since 1.7)
# @bps_max: #optional total throughput limit during bursts,
# in bytes (Since 1.7)
#
# @bps_rd_max: #optional read throughput limit during bursts,
# in bytes (Since 1.7)
#
# @bps_wr_max: #optional write throughput limit during bursts,
# in bytes (Since 1.7)
#
# @iops_max: #optional total I/O operations per second during bursts,
# in bytes (Since 1.7)
#
# @iops_rd_max: #optional read I/O operations per second during bursts,
# in bytes (Since 1.7)
#
# @iops_wr_max: #optional write I/O operations per second during bursts,
# in bytes (Since 1.7)
#
# @bps_max_length: #optional maximum length of the @bps_max burst
# period, in seconds. It must only
# be set if @bps_max is set as well.
# Defaults to 1. (Since 2.6)
#
# @bps_rd_max: #optional read max in bytes (Since 1.7)
# @bps_rd_max_length: #optional maximum length of the @bps_rd_max
# burst period, in seconds. It must only
# be set if @bps_rd_max is set as well.
# Defaults to 1. (Since 2.6)
#
# @bps_wr_max: #optional write max in bytes (Since 1.7)
# @bps_wr_max_length: #optional maximum length of the @bps_wr_max
# burst period, in seconds. It must only
# be set if @bps_wr_max is set as well.
# Defaults to 1. (Since 2.6)
#
# @iops_max: #optional total I/O operations max (Since 1.7)
# @iops_max_length: #optional maximum length of the @iops burst
# period, in seconds. It must only
# be set if @iops_max is set as well.
# Defaults to 1. (Since 2.6)
#
# @iops_rd_max: #optional read I/O operations max (Since 1.7)
# @iops_rd_max_length: #optional maximum length of the @iops_rd_max
# burst period, in seconds. It must only
# be set if @iops_rd_max is set as well.
# Defaults to 1. (Since 2.6)
#
# @iops_wr_max: #optional write I/O operations max (Since 1.7)
# @iops_wr_max_length: #optional maximum length of the @iops_wr_max
# burst period, in seconds. It must only
# be set if @iops_wr_max is set as well.
# Defaults to 1. (Since 2.6)
#
# @iops_size: #optional an I/O size in bytes (Since 1.7)
#
......@@ -1325,6 +1388,9 @@
'*bps_max': 'int', '*bps_rd_max': 'int',
'*bps_wr_max': 'int', '*iops_max': 'int',
'*iops_rd_max': 'int', '*iops_wr_max': 'int',
'*bps_max_length': 'int', '*bps_rd_max_length': 'int',
'*bps_wr_max_length': 'int', '*iops_max_length': 'int',
'*iops_rd_max_length': 'int', '*iops_wr_max_length': 'int',
'*iops_size': 'int', '*group': 'str' } }
##
......
......@@ -10,68 +10,68 @@ STEXI
ETEXI
DEF("check", img_check,
"check [-q] [-f fmt] [--output=ofmt] [-r [leaks | all]] [-T src_cache] filename")
"check [-q] [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [-r [leaks | all]] [-T src_cache] filename")
STEXI
@item check [-q] [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] [-T @var{src_cache}] @var{filename}
@item check [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] [-T @var{src_cache}] @var{filename}
ETEXI
DEF("create", img_create,
"create [-q] [-f fmt] [-o options] filename [size]")
"create [-q] [--object objectdef] [--image-opts] [-f fmt] [-o options] filename [size]")
STEXI
@item create [-q] [-f @var{fmt}] [-o @var{options}] @var{filename} [@var{size}]
@item create [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [-o @var{options}] @var{filename} [@var{size}]
ETEXI
DEF("commit", img_commit,
"commit [-q] [-f fmt] [-t cache] [-b base] [-d] [-p] filename")
"commit [-q] [--object objectdef] [--image-opts] [-f fmt] [-t cache] [-b base] [-d] [-p] filename")
STEXI
@item commit [-q] [-f @var{fmt}] [-t @var{cache}] [-b @var{base}] [-d] [-p] @var{filename}
@item commit [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [-t @var{cache}] [-b @var{base}] [-d] [-p] @var{filename}
ETEXI
DEF("compare", img_compare,
"compare [-f fmt] [-F fmt] [-T src_cache] [-p] [-q] [-s] filename1 filename2")
"compare [--object objectdef] [--image-opts] [-f fmt] [-F fmt] [-T src_cache] [-p] [-q] [-s] filename1 filename2")
STEXI
@item compare [-f @var{fmt}] [-F @var{fmt}] [-T @var{src_cache}] [-p] [-q] [-s] @var{filename1} @var{filename2}
@item compare [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [-F @var{fmt}] [-T @var{src_cache}] [-p] [-q] [-s] @var{filename1} @var{filename2}
ETEXI
DEF("convert", img_convert,
"convert [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] filename [filename2 [...]] output_filename")
"convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] filename [filename2 [...]] output_filename")
STEXI
@item convert [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
ETEXI
DEF("info", img_info,
"info [-f fmt] [--output=ofmt] [--backing-chain] filename")
"info [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [--backing-chain] filename")
STEXI
@item info [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename}
@item info [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename}
ETEXI
DEF("map", img_map,
"map [-f fmt] [--output=ofmt] filename")
"map [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] filename")
STEXI
@item map [-f @var{fmt}] [--output=@var{ofmt}] @var{filename}
@item map [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] @var{filename}
ETEXI
DEF("snapshot", img_snapshot,
"snapshot [-q] [-l | -a snapshot | -c snapshot | -d snapshot] filename")
"snapshot [--object objectdef] [--image-opts] [-q] [-l | -a snapshot | -c snapshot | -d snapshot] filename")
STEXI
@item snapshot [-q] [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot}] @var{filename}
@item snapshot [--object @var{objectdef}] [--image-opts] [-q] [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot}] @var{filename}
ETEXI
DEF("rebase", img_rebase,
"rebase [-q] [-f fmt] [-t cache] [-T src_cache] [-p] [-u] -b backing_file [-F backing_fmt] filename")
"rebase [--object objectdef] [--image-opts] [-q] [-f fmt] [-t cache] [-T src_cache] [-p] [-u] -b backing_file [-F backing_fmt] filename")
STEXI
@item rebase [-q] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-p] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename}
@item rebase [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-p] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename}
ETEXI
DEF("resize", img_resize,
"resize [-q] filename [+ | -]size")
"resize [--object objectdef] [--image-opts] [-q] filename [+ | -]size")
STEXI
@item resize [-q] @var{filename} [+ | -]@var{size}
@item resize [--object @var{objectdef}] [--image-opts] [-q] @var{filename} [+ | -]@var{size}
ETEXI
DEF("amend", img_amend,
"amend [-p] [-q] [-f fmt] [-t cache] -o options filename")
"amend [--object objectdef] [--image-opts] [-p] [-q] [-f fmt] [-t cache] -o options filename")
STEXI
@item amend [-p] [-q] [-f @var{fmt}] [-t @var{cache}] -o @var{options} @var{filename}
@item amend [--object @var{objectdef}] [--image-opts] [-p] [-q] [-f @var{fmt}] [-t @var{cache}] -o @var{options} @var{filename}
@end table
ETEXI
此差异已折叠。
......@@ -24,6 +24,20 @@ Command parameters:
@table @var
@item filename
is a disk image filename
@item --object @var{objectdef}
is a QEMU user creatable object definition. See the @code{qemu(1)} manual
page for a description of the object properties. The most common object
type is a @code{secret}, which is used to supply passwords and/or encryption
keys.
@item --image-opts
Indicates that the @var{filename} parameter is to be interpreted as a
full option string, not a plain filename. This parameter is mutually
exclusive with the @var{-f} and @var{-F} parameters.
@item fmt
is the disk image format. It is guessed automatically in most cases. See below
for a description of the supported disk formats.
......
......@@ -18,6 +18,7 @@
#include "qemu/config-file.h"
#include "qemu/readline.h"
#include "qapi/qmp/qstring.h"
#include "qom/object_interfaces.h"
#include "sysemu/block-backend.h"
#include "block/block_int.h"
#include "trace/control.h"
......@@ -31,6 +32,7 @@ static BlockBackend *qemuio_blk;
/* qemu-io commands passed using -c */
static int ncmdline;
static char **cmdline;
static bool imageOpts;
static ReadLineState *readline_state;
......@@ -150,6 +152,10 @@ static int open_f(BlockBackend *blk, int argc, char **argv)
readonly = 1;
break;
case 'o':
if (imageOpts) {
printf("--image-opts and 'open -o' are mutually exclusive\n");
return 0;
}
if (!qemu_opts_parse_noisily(&empty_opts, optarg, false)) {
qemu_opts_reset(&empty_opts);
return 0;
......@@ -165,6 +171,14 @@ static int open_f(BlockBackend *blk, int argc, char **argv)
flags |= BDRV_O_RDWR;
}
if (imageOpts && (optind == argc - 1)) {
if (!qemu_opts_parse_noisily(&empty_opts, argv[optind], false)) {
qemu_opts_reset(&empty_opts);
return 0;
}
optind++;
}
qopts = qemu_opts_find(&empty_opts, NULL);
opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : NULL;
qemu_opts_reset(&empty_opts);
......@@ -200,6 +214,8 @@ static void usage(const char *name)
"Usage: %s [-h] [-V] [-rsnm] [-f FMT] [-c STRING] ... [file]\n"
"QEMU Disk exerciser\n"
"\n"
" --object OBJECTDEF define an object such as 'secret' for\n"
" passwords and/or encryption keys\n"
" -c, --cmd STRING execute command with its arguments\n"
" from the given string\n"
" -f, --format FMT specifies the block driver to use\n"
......@@ -361,24 +377,51 @@ static void reenable_tty_echo(void)
qemu_set_tty_echo(STDIN_FILENO, true);
}
enum {
OPTION_OBJECT = 256,
OPTION_IMAGE_OPTS = 257,
};
static QemuOptsList qemu_object_opts = {
.name = "object",
.implied_opt_name = "qom-type",
.head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
.desc = {
{ }
},
};
static QemuOptsList file_opts = {
.name = "file",
.implied_opt_name = "file",
.head = QTAILQ_HEAD_INITIALIZER(file_opts.head),
.desc = {
/* no elements => accept any params */
{ /* end of list */ }
},
};
int main(int argc, char **argv)
{
int readonly = 0;
const char *sopt = "hVc:d:f:rsnmgkt:T:";
const struct option lopt[] = {
{ "help", 0, NULL, 'h' },
{ "version", 0, NULL, 'V' },
{ "offset", 1, NULL, 'o' },
{ "cmd", 1, NULL, 'c' },
{ "format", 1, NULL, 'f' },
{ "read-only", 0, NULL, 'r' },
{ "snapshot", 0, NULL, 's' },
{ "nocache", 0, NULL, 'n' },
{ "misalign", 0, NULL, 'm' },
{ "native-aio", 0, NULL, 'k' },
{ "discard", 1, NULL, 'd' },
{ "cache", 1, NULL, 't' },
{ "trace", 1, NULL, 'T' },
{ "help", no_argument, NULL, 'h' },
{ "version", no_argument, NULL, 'V' },
{ "offset", required_argument, NULL, 'o' },
{ "cmd", required_argument, NULL, 'c' },
{ "format", required_argument, NULL, 'f' },
{ "read-only", no_argument, NULL, 'r' },
{ "snapshot", no_argument, NULL, 's' },
{ "nocache", no_argument, NULL, 'n' },
{ "misalign", no_argument, NULL, 'm' },
{ "native-aio", no_argument, NULL, 'k' },
{ "discard", required_argument, NULL, 'd' },
{ "cache", required_argument, NULL, 't' },
{ "trace", required_argument, NULL, 'T' },
{ "object", required_argument, NULL, OPTION_OBJECT },
{ "image-opts", no_argument, NULL, OPTION_IMAGE_OPTS },
{ NULL, 0, NULL, 0 }
};
int c;
......@@ -386,6 +429,7 @@ int main(int argc, char **argv)
int flags = BDRV_O_UNMAP;
Error *local_error = NULL;
QDict *opts = NULL;
const char *format = NULL;
#ifdef CONFIG_POSIX
signal(SIGPIPE, SIG_IGN);
......@@ -395,6 +439,7 @@ int main(int argc, char **argv)
qemu_init_exec_dir(argv[0]);
module_call_init(MODULE_INIT_QOM);
qemu_add_opts(&qemu_object_opts);
bdrv_init();
while ((c = getopt_long(argc, argv, sopt, lopt, &opt_index)) != -1) {
......@@ -412,10 +457,7 @@ int main(int argc, char **argv)
}
break;
case 'f':
if (!opts) {
opts = qdict_new();
}
qdict_put(opts, "driver", qstring_from_str(optarg));
format = optarg;
break;
case 'c':
add_user_command(optarg);
......@@ -446,6 +488,17 @@ int main(int argc, char **argv)
case 'h':
usage(progname);
exit(0);
case OPTION_OBJECT: {
QemuOpts *qopts;
qopts = qemu_opts_parse_noisily(&qemu_object_opts,
optarg, true);
if (!qopts) {
exit(1);
}
} break;
case OPTION_IMAGE_OPTS:
imageOpts = true;
break;
default:
usage(progname);
exit(1);
......@@ -457,11 +510,23 @@ int main(int argc, char **argv)
exit(1);
}
if (format && imageOpts) {
error_report("--image-opts and -f are mutually exclusive");
exit(1);
}
if (qemu_init_main_loop(&local_error)) {
error_report_err(local_error);
exit(1);
}
if (qemu_opts_foreach(&qemu_object_opts,
user_creatable_add_opts_foreach,
NULL, &local_error)) {
error_report_err(local_error);
exit(1);
}
/* initialize commands */
qemuio_add_command(&quit_cmd);
qemuio_add_command(&open_cmd);
......@@ -482,7 +547,21 @@ int main(int argc, char **argv)
}
if ((argc - optind) == 1) {
openfile(argv[optind], flags, opts);
if (imageOpts) {
QemuOpts *qopts = NULL;
qopts = qemu_opts_parse_noisily(&file_opts, argv[optind], false);
if (!qopts) {
exit(1);
}
opts = qemu_opts_to_qdict(qopts, NULL);
openfile(NULL, flags, opts);
} else {
if (format) {
opts = qdict_new();
qdict_put(opts, "driver", qstring_from_str(format));
}
openfile(argv[optind], flags, opts);
}
}
command_loop();
......
......@@ -37,12 +37,13 @@
#include <pthread.h>
#define SOCKET_PATH "/var/lock/qemu-nbd-%s"
#define QEMU_NBD_OPT_CACHE 1
#define QEMU_NBD_OPT_AIO 2
#define QEMU_NBD_OPT_DISCARD 3
#define QEMU_NBD_OPT_DETECT_ZEROES 4
#define QEMU_NBD_OPT_OBJECT 5
#define QEMU_NBD_OPT_TLSCREDS 6
#define QEMU_NBD_OPT_CACHE 256
#define QEMU_NBD_OPT_AIO 257
#define QEMU_NBD_OPT_DISCARD 258
#define QEMU_NBD_OPT_DETECT_ZEROES 259
#define QEMU_NBD_OPT_OBJECT 260
#define QEMU_NBD_OPT_TLSCREDS 261
#define QEMU_NBD_OPT_IMAGE_OPTS 262
static NBDExport *exp;
static bool newproto;
......@@ -105,6 +106,7 @@ static void usage(const char *name)
" --aio=MODE set AIO mode (native or threads)\n"
" --discard=MODE set discard mode (ignore, unmap)\n"
" --detect-zeroes=MODE set detect-zeroes mode (off, on, unmap)\n"
" --image-opts treat FILE as a full set of image options\n"
"\n"
"Report bugs to <qemu-devel@nongnu.org>\n"
, name, NBD_DEFAULT_PORT, "DEVICE");
......@@ -394,6 +396,16 @@ static SocketAddress *nbd_build_socket_address(const char *sockpath,
}
static QemuOptsList file_opts = {
.name = "file",
.implied_opt_name = "file",
.head = QTAILQ_HEAD_INITIALIZER(file_opts.head),
.desc = {
/* no elements => accept any params */
{ /* end of list */ }
},
};
static QemuOptsList qemu_object_opts = {
.name = "object",
.implied_opt_name = "qom-type",
......@@ -451,30 +463,32 @@ int main(int argc, char **argv)
const char *sn_id_or_name = NULL;
const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:x:";
struct option lopt[] = {
{ "help", 0, NULL, 'h' },
{ "version", 0, NULL, 'V' },
{ "bind", 1, NULL, 'b' },
{ "port", 1, NULL, 'p' },
{ "socket", 1, NULL, 'k' },
{ "offset", 1, NULL, 'o' },
{ "read-only", 0, NULL, 'r' },
{ "partition", 1, NULL, 'P' },
{ "connect", 1, NULL, 'c' },
{ "disconnect", 0, NULL, 'd' },
{ "snapshot", 0, NULL, 's' },
{ "load-snapshot", 1, NULL, 'l' },
{ "nocache", 0, NULL, 'n' },
{ "cache", 1, NULL, QEMU_NBD_OPT_CACHE },
{ "aio", 1, NULL, QEMU_NBD_OPT_AIO },
{ "discard", 1, NULL, QEMU_NBD_OPT_DISCARD },
{ "detect-zeroes", 1, NULL, QEMU_NBD_OPT_DETECT_ZEROES },
{ "shared", 1, NULL, 'e' },
{ "format", 1, NULL, 'f' },
{ "persistent", 0, NULL, 't' },
{ "verbose", 0, NULL, 'v' },
{ "object", 1, NULL, QEMU_NBD_OPT_OBJECT },
{ "export-name", 1, NULL, 'x' },
{ "tls-creds", 1, NULL, QEMU_NBD_OPT_TLSCREDS },
{ "help", no_argument, NULL, 'h' },
{ "version", no_argument, NULL, 'V' },
{ "bind", required_argument, NULL, 'b' },
{ "port", required_argument, NULL, 'p' },
{ "socket", required_argument, NULL, 'k' },
{ "offset", required_argument, NULL, 'o' },
{ "read-only", no_argument, NULL, 'r' },
{ "partition", required_argument, NULL, 'P' },
{ "connect", required_argument, NULL, 'c' },
{ "disconnect", no_argument, NULL, 'd' },
{ "snapshot", no_argument, NULL, 's' },
{ "load-snapshot", required_argument, NULL, 'l' },
{ "nocache", no_argument, NULL, 'n' },
{ "cache", required_argument, NULL, QEMU_NBD_OPT_CACHE },
{ "aio", required_argument, NULL, QEMU_NBD_OPT_AIO },
{ "discard", required_argument, NULL, QEMU_NBD_OPT_DISCARD },
{ "detect-zeroes", required_argument, NULL,
QEMU_NBD_OPT_DETECT_ZEROES },
{ "shared", required_argument, NULL, 'e' },
{ "format", required_argument, NULL, 'f' },
{ "persistent", no_argument, NULL, 't' },
{ "verbose", no_argument, NULL, 'v' },
{ "object", required_argument, NULL, QEMU_NBD_OPT_OBJECT },
{ "export-name", required_argument, NULL, 'x' },
{ "tls-creds", required_argument, NULL, QEMU_NBD_OPT_TLSCREDS },
{ "image-opts", no_argument, NULL, QEMU_NBD_OPT_IMAGE_OPTS },
{ NULL, 0, NULL, 0 }
};
int ch;
......@@ -493,6 +507,7 @@ int main(int argc, char **argv)
QDict *options = NULL;
const char *export_name = NULL;
const char *tlscredsid = NULL;
bool imageOpts = false;
/* The client thread uses SIGTERM to interrupt the server. A signal
* handler ensures that "qemu-nbd -v -c" exits with a nice status code.
......@@ -672,6 +687,9 @@ int main(int argc, char **argv)
case QEMU_NBD_OPT_TLSCREDS:
tlscredsid = optarg;
break;
case QEMU_NBD_OPT_IMAGE_OPTS:
imageOpts = true;
break;
}
}
......@@ -800,13 +818,29 @@ int main(int argc, char **argv)
bdrv_init();
atexit(bdrv_close_all);
if (fmt) {
options = qdict_new();
qdict_put(options, "driver", qstring_from_str(fmt));
srcpath = argv[optind];
if (imageOpts) {
QemuOpts *opts;
if (fmt) {
error_report("--image-opts and -f are mutually exclusive");
exit(EXIT_FAILURE);
}
opts = qemu_opts_parse_noisily(&file_opts, srcpath, true);
if (!opts) {
qemu_opts_reset(&file_opts);
exit(EXIT_FAILURE);
}
options = qemu_opts_to_qdict(opts, NULL);
qemu_opts_reset(&file_opts);
blk = blk_new_open("hda", NULL, NULL, options, flags, &local_err);
} else {
if (fmt) {
options = qdict_new();
qdict_put(options, "driver", qstring_from_str(fmt));
}
blk = blk_new_open("hda", srcpath, NULL, options, flags, &local_err);
}
srcpath = argv[optind];
blk = blk_new_open("hda", srcpath, NULL, options, flags, &local_err);
if (!blk) {
error_reportf_err(local_err, "Failed to blk_new_open '%s': ",
argv[optind]);
......
......@@ -13,7 +13,8 @@ Export a QEMU disk image using the NBD protocol.
@c man end
@c man begin OPTIONS
@var{filename} is a disk image filename.
@var{filename} is a disk image filename, or a set of block
driver options if @var{--image-opts} is specified.
@var{dev} is an NBD device.
......@@ -33,6 +34,10 @@ The offset into the image
The interface to bind to (default @samp{0.0.0.0})
@item -k, --socket=@var{path}
Use a unix socket with path @var{path}
@item --image-opts
Treat @var{filename} as a set of image options, instead of a plain
filename. If this flag is specified, the @var{-f} flag should
not be used, instead the '@code{format=}' option should be set.
@item -f, --format=@var{fmt}
Force the use of the block driver for format @var{fmt} instead of
auto-detecting
......
......@@ -2006,7 +2006,7 @@ EQMP
{
.name = "block_set_io_throttle",
.args_type = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l,bps_max:l?,bps_rd_max:l?,bps_wr_max:l?,iops_max:l?,iops_rd_max:l?,iops_wr_max:l?,iops_size:l?,group:s?",
.args_type = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l,bps_max:l?,bps_rd_max:l?,bps_wr_max:l?,iops_max:l?,iops_rd_max:l?,iops_wr_max:l?,bps_max_length:l?,bps_rd_max_length:l?,bps_wr_max_length:l?,iops_max_length:l?,iops_rd_max_length:l?,iops_wr_max_length:l?,iops_size:l?,group:s?",
.mhandler.cmd_new = qmp_marshal_block_set_io_throttle,
},
......@@ -2025,14 +2025,20 @@ Arguments:
- "iops": total I/O operations per second (json-int)
- "iops_rd": read I/O operations per second (json-int)
- "iops_wr": write I/O operations per second (json-int)
- "bps_max": total max in bytes (json-int)
- "bps_rd_max": read max in bytes (json-int)
- "bps_wr_max": write max in bytes (json-int)
- "iops_max": total I/O operations max (json-int)
- "iops_rd_max": read I/O operations max (json-int)
- "iops_wr_max": write I/O operations max (json-int)
- "iops_size": I/O size in bytes when limiting (json-int)
- "group": throttle group name (json-string)
- "bps_max": total throughput limit during bursts, in bytes (json-int, optional)
- "bps_rd_max": read throughput limit during bursts, in bytes (json-int, optional)
- "bps_wr_max": write throughput limit during bursts, in bytes (json-int, optional)
- "iops_max": total I/O operations per second during bursts (json-int, optional)
- "iops_rd_max": read I/O operations per second during bursts (json-int, optional)
- "iops_wr_max": write I/O operations per second during bursts (json-int, optional)
- "bps_max_length": maximum length of the @bps_max burst period, in seconds (json-int, optional)
- "bps_rd_max_length": maximum length of the @bps_rd_max burst period, in seconds (json-int, optional)
- "bps_wr_max_length": maximum length of the @bps_wr_max burst period, in seconds (json-int, optional)
- "iops_max_length": maximum length of the @iops_max burst period, in seconds (json-int, optional)
- "iops_rd_max_length": maximum length of the @iops_rd_max burst period, in seconds (json-int, optional)
- "iops_wr_max_length": maximum length of the @iops_wr_max burst period, in seconds (json-int, optional)
- "iops_size": I/O size in bytes when limiting (json-int, optional)
- "group": throttle group name (json-string, optional)
Example:
......@@ -2049,6 +2055,7 @@ Example:
"iops_max": 0,
"iops_rd_max": 0,
"iops_wr_max": 0,
"bps_max_length": 60,
"iops_size": 0 } }
<- { "return": {} }
......
......@@ -45,11 +45,20 @@ function do_run_qemu()
echo
}
# Remove QMP events from (pretty-printed) output. Doesn't handle
# nested dicts correctly, but we don't get any of those in this test.
_filter_qmp_events()
{
tr '\n' '\t' | sed -e \
's/{\s*"timestamp":\s*{[^}]*},\s*"event":[^,}]*\(,\s*"data":\s*{[^}]*}\)\?\s*}\s*//g' \
| tr '\t' '\n'
}
function run_qemu()
{
do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu \
| sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g' \
| _filter_generated_node_ids
| _filter_generated_node_ids | _filter_qmp_events
}
size=128M
......
......@@ -69,34 +69,6 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk -device virti
"return": {
}
}
{
"timestamp": {
"seconds": TIMESTAMP,
"microseconds": TIMESTAMP
},
"event": "DEVICE_DELETED",
"data": {
"path": "/machine/peripheral/virtio0/virtio-backend"
}
}
{
"timestamp": {
"seconds": TIMESTAMP,
"microseconds": TIMESTAMP
},
"event": "DEVICE_DELETED",
"data": {
"device": "virtio0",
"path": "/machine/peripheral/virtio0"
}
}
{
"timestamp": {
"seconds": TIMESTAMP,
"microseconds": TIMESTAMP
},
"event": "RESET"
}
{
"return": [
]
......@@ -105,14 +77,6 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk -device virti
"return": {
}
}
{
"timestamp": {
"seconds": TIMESTAMP,
"microseconds": TIMESTAMP
},
"event": "SHUTDOWN"
}
=== -drive/device_add and device_del ===
......@@ -185,34 +149,6 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk
"return": {
}
}
{
"timestamp": {
"seconds": TIMESTAMP,
"microseconds": TIMESTAMP
},
"event": "DEVICE_DELETED",
"data": {
"path": "/machine/peripheral/virtio0/virtio-backend"
}
}
{
"timestamp": {
"seconds": TIMESTAMP,
"microseconds": TIMESTAMP
},
"event": "DEVICE_DELETED",
"data": {
"device": "virtio0",
"path": "/machine/peripheral/virtio0"
}
}
{
"timestamp": {
"seconds": TIMESTAMP,
"microseconds": TIMESTAMP
},
"event": "RESET"
}
{
"return": [
]
......@@ -221,14 +157,6 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk
"return": {
}
}
{
"timestamp": {
"seconds": TIMESTAMP,
"microseconds": TIMESTAMP
},
"event": "SHUTDOWN"
}
=== drive_add/device_add and device_del ===
......@@ -304,34 +232,6 @@ Testing:
"return": {
}
}
{
"timestamp": {
"seconds": TIMESTAMP,
"microseconds": TIMESTAMP
},
"event": "DEVICE_DELETED",
"data": {
"path": "/machine/peripheral/virtio0/virtio-backend"
}
}
{
"timestamp": {
"seconds": TIMESTAMP,
"microseconds": TIMESTAMP
},
"event": "DEVICE_DELETED",
"data": {
"device": "virtio0",
"path": "/machine/peripheral/virtio0"
}
}
{
"timestamp": {
"seconds": TIMESTAMP,
"microseconds": TIMESTAMP
},
"event": "RESET"
}
{
"return": [
]
......@@ -340,14 +240,6 @@ Testing:
"return": {
}
}
{
"timestamp": {
"seconds": TIMESTAMP,
"microseconds": TIMESTAMP
},
"event": "SHUTDOWN"
}
=== blockdev_add/device_add and device_del ===
......@@ -424,34 +316,6 @@ Testing:
"return": {
}
}
{
"timestamp": {
"seconds": TIMESTAMP,
"microseconds": TIMESTAMP
},
"event": "DEVICE_DELETED",
"data": {
"path": "/machine/peripheral/virtio0/virtio-backend"
}
}
{
"timestamp": {
"seconds": TIMESTAMP,
"microseconds": TIMESTAMP
},
"event": "DEVICE_DELETED",
"data": {
"device": "virtio0",
"path": "/machine/peripheral/virtio0"
}
}
{
"timestamp": {
"seconds": TIMESTAMP,
"microseconds": TIMESTAMP
},
"event": "RESET"
}
{
"return": [
{
......@@ -506,12 +370,4 @@ Testing:
"return": {
}
}
{
"timestamp": {
"seconds": TIMESTAMP,
"microseconds": TIMESTAMP
},
"event": "SHUTDOWN"
}
*** done
......@@ -3,7 +3,7 @@
# Tests for IO throttling
#
# Copyright (C) 2015 Red Hat, Inc.
# Copyright (C) 2015 Igalia, S.L.
# Copyright (C) 2015-2016 Igalia, S.L.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
......@@ -21,6 +21,8 @@
import iotests
nsec_per_sec = 1000000000
class ThrottleTestCase(iotests.QMPTestCase):
test_img = "null-aio://"
max_drives = 3
......@@ -42,16 +44,7 @@ class ThrottleTestCase(iotests.QMPTestCase):
def tearDown(self):
self.vm.shutdown()
def do_test_throttle(self, ndrives, seconds, params):
def check_limit(limit, num):
# IO throttling algorithm is discrete, allow 10% error so the test
# is more robust
return limit == 0 or \
(num < seconds * limit * 1.1 / ndrives
and num > seconds * limit * 0.9 / ndrives)
nsec_per_sec = 1000000000
def configure_throttle(self, ndrives, params):
params['group'] = 'test'
# Set the I/O throttling parameters to all drives
......@@ -60,13 +53,21 @@ class ThrottleTestCase(iotests.QMPTestCase):
result = self.vm.qmp("block_set_io_throttle", conv_keys=False, **params)
self.assert_qmp(result, 'return', {})
def do_test_throttle(self, ndrives, seconds, params):
def check_limit(limit, num):
# IO throttling algorithm is discrete, allow 10% error so the test
# is more robust
return limit == 0 or \
(num < seconds * limit * 1.1 / ndrives
and num > seconds * limit * 0.9 / ndrives)
# Set vm clock to a known value
ns = seconds * nsec_per_sec
self.vm.qtest("clock_step %d" % ns)
# Submit enough requests. They will drain bps_max and iops_max, but the
# rest requests won't get executed until we advance the virtual clock
# with qtest interface
# Submit enough requests so the throttling mechanism kicks
# in. The throttled requests won't be executed until we
# advance the virtual clock.
rq_size = 512
rd_nr = max(params['bps'] / rq_size / 2,
params['bps_rd'] / rq_size,
......@@ -142,8 +143,44 @@ class ThrottleTestCase(iotests.QMPTestCase):
for tk in params:
limits = dict([(k, 0) for k in params])
limits[tk] = params[tk] * ndrives
self.configure_throttle(ndrives, limits)
self.do_test_throttle(ndrives, 5, limits)
def test_burst(self):
params = {"bps": 4096,
"bps_rd": 4096,
"bps_wr": 4096,
"iops": 10,
"iops_rd": 10,
"iops_wr": 10,
}
ndrives = 1
# Pick each out of all possible params and test
for tk in params:
rate = params[tk] * ndrives
burst_rate = rate * 7
burst_length = 4
# Configure the throttling settings
settings = dict([(k, 0) for k in params])
settings[tk] = rate
settings['%s_max' % tk] = burst_rate
settings['%s_max_length' % tk] = burst_length
self.configure_throttle(ndrives, settings)
# Wait for the bucket to empty so we can do bursts
wait_ns = nsec_per_sec * burst_length * burst_rate / rate
self.vm.qtest("clock_step %d" % wait_ns)
# Test I/O at the max burst rate
limits = dict([(k, 0) for k in params])
limits[tk] = burst_rate
self.do_test_throttle(ndrives, burst_length, limits)
# Now test I/O at the normal rate
limits[tk] = rate
self.do_test_throttle(ndrives, 5, limits)
class ThrottleTestCoroutine(ThrottleTestCase):
test_img = "null-co://"
......
..
....
----------------------------------------------------------------------
Ran 2 tests
Ran 4 tests
OK
#!/bin/bash
#
# Test case for ejecting a BB with an NBD server attached to it
# Test case for ejecting a BlockBackend with an NBD server attached to it
#
# Verify that the NBD server stops offering the drive when ejecting a
# BlockDriverState tree from a BlockBackend (that is, a medium from a
# drive) exposed via an NBD server.
#
# Copyright (C) 2016 Red Hat, Inc.
#
......@@ -49,7 +53,7 @@ _make_test_img 64k
$QEMU_IO -c 'write -P 42 0 64k' "$TEST_IMG" | _filter_qemu_io
keep_stderr=y \
_launch_qemu -drive if=ide,media=cdrom,id=drv,file="$TEST_IMG",format=$IMGFMT \
_launch_qemu -drive if=none,media=cdrom,id=drv,file="$TEST_IMG",format=$IMGFMT \
2> >(_filter_nbd)
_send_qemu_cmd $QEMU_HANDLE \
......
......@@ -7,7 +7,6 @@ wrote 65536/65536 bytes at offset 0
{"return": {}}
read 65536/65536 bytes at offset 0
64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "drv", "tray-open": true}}
{"return": {}}
can't open device nbd+unix:///drv?socket=TEST_DIR/nbd: No export with name 'drv' available
no file open, try 'help open'
......
#!/bin/bash
#
# Test the combination of -incoming and snapshot=on
#
# Copyright (C) 2016 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# creator
owner=kwolf@redhat.com
seq=`basename $0`
echo "QA output created by $seq"
here=`pwd`
tmp=/tmp/$$
status=1 # failure is the default!
_cleanup()
{
_cleanup_test_img
true
}
trap "_cleanup; exit \$status" 0 1 2 3 15
# get standard environment, filters and checks
. ./common.rc
. ./common.filter
_supported_fmt generic
_supported_proto generic
_supported_os Linux
_make_test_img 1M
echo quit | $QEMU -nographic -hda "$TEST_IMG" -incoming 'exec:true' -snapshot -serial none -monitor stdio | _filter_qemu
# success, all done
echo "*** done"
rm -f $seq.full
status=0
QA output created by 145
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
QEMU X.Y.Z monitor - type 'help' for more information
(qemu) qququiquit
*** done
......@@ -147,3 +147,4 @@
142 auto
143 auto quick
144 rw auto quick
145 auto quick
......@@ -35,6 +35,9 @@ static bool double_cmp(double x, double y)
/* tests for single bucket operations */
static void test_leak_bucket(void)
{
throttle_config_init(&cfg);
bkt = cfg.buckets[THROTTLE_BPS_TOTAL];
/* set initial value */
bkt.avg = 150;
bkt.max = 15;
......@@ -57,13 +60,33 @@ static void test_leak_bucket(void)
g_assert(bkt.avg == 150);
g_assert(bkt.max == 15);
g_assert(double_cmp(bkt.level, 0));
/* check that burst_level leaks correctly */
bkt.burst_level = 6;
bkt.max = 250;
bkt.burst_length = 2; /* otherwise burst_level will not leak */
throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 100);
g_assert(double_cmp(bkt.burst_level, 3.5));
throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 100);
g_assert(double_cmp(bkt.burst_level, 1));
throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 100);
g_assert(double_cmp(bkt.burst_level, 0));
throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 100);
g_assert(double_cmp(bkt.burst_level, 0));
}
static void test_compute_wait(void)
{
unsigned i;
int64_t wait;
int64_t result;
throttle_config_init(&cfg);
bkt = cfg.buckets[THROTTLE_BPS_TOTAL];
/* no operation limit set */
bkt.avg = 0;
bkt.max = 15;
......@@ -93,6 +116,27 @@ static void test_compute_wait(void)
/* time required to do half an operation */
result = (int64_t) NANOSECONDS_PER_SECOND / 150 / 2;
g_assert(wait == result);
/* Perform I/O for 2.2 seconds at a rate of bkt.max */
bkt.burst_length = 2;
bkt.level = 0;
bkt.avg = 10;
bkt.max = 200;
for (i = 0; i < 22; i++) {
double units = bkt.max / 10;
bkt.level += units;
bkt.burst_level += units;
throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 10);
wait = throttle_compute_wait(&bkt);
g_assert(double_cmp(bkt.burst_level, 0));
g_assert(double_cmp(bkt.level, (i + 1) * (bkt.max - bkt.avg) / 10));
/* We can do bursts for the 2 seconds we have configured in
* burst_length. We have 100 extra miliseconds of burst
* because bkt.level has been leaking during this time.
* After that, we have to wait. */
result = i < 21 ? 0 : 1.8 * NANOSECONDS_PER_SECOND;
g_assert(wait == result);
}
}
/* functions to test ThrottleState initialization/destroy methods */
......@@ -222,6 +266,8 @@ static void set_cfg_value(bool is_max, int index, int value)
{
if (is_max) {
cfg.buckets[index].max = value;
/* If max is set, avg should never be 0 */
cfg.buckets[index].avg = MAX(cfg.buckets[index].avg, 1);
} else {
cfg.buckets[index].avg = value;
}
......@@ -231,17 +277,17 @@ static void test_enabled(void)
{
int i;
memset(&cfg, 0, sizeof(cfg));
throttle_config_init(&cfg);
g_assert(!throttle_enabled(&cfg));
for (i = 0; i < BUCKETS_COUNT; i++) {
memset(&cfg, 0, sizeof(cfg));
throttle_config_init(&cfg);
set_cfg_value(false, i, 150);
g_assert(throttle_enabled(&cfg));
}
for (i = 0; i < BUCKETS_COUNT; i++) {
memset(&cfg, 0, sizeof(cfg));
throttle_config_init(&cfg);
set_cfg_value(false, i, -150);
g_assert(!throttle_enabled(&cfg));
}
......@@ -254,32 +300,32 @@ static void test_conflicts_for_one_set(bool is_max,
int read,
int write)
{
memset(&cfg, 0, sizeof(cfg));
g_assert(!throttle_conflicting(&cfg));
throttle_config_init(&cfg);
g_assert(throttle_is_valid(&cfg, NULL));
set_cfg_value(is_max, total, 1);
set_cfg_value(is_max, read, 1);
g_assert(throttle_conflicting(&cfg));
g_assert(!throttle_is_valid(&cfg, NULL));
memset(&cfg, 0, sizeof(cfg));
throttle_config_init(&cfg);
set_cfg_value(is_max, total, 1);
set_cfg_value(is_max, write, 1);
g_assert(throttle_conflicting(&cfg));
g_assert(!throttle_is_valid(&cfg, NULL));
memset(&cfg, 0, sizeof(cfg));
throttle_config_init(&cfg);
set_cfg_value(is_max, total, 1);
set_cfg_value(is_max, read, 1);
set_cfg_value(is_max, write, 1);
g_assert(throttle_conflicting(&cfg));
g_assert(!throttle_is_valid(&cfg, NULL));
memset(&cfg, 0, sizeof(cfg));
throttle_config_init(&cfg);
set_cfg_value(is_max, total, 1);
g_assert(!throttle_conflicting(&cfg));
g_assert(throttle_is_valid(&cfg, NULL));
memset(&cfg, 0, sizeof(cfg));
throttle_config_init(&cfg);
set_cfg_value(is_max, read, 1);
set_cfg_value(is_max, write, 1);
g_assert(!throttle_conflicting(&cfg));
g_assert(throttle_is_valid(&cfg, NULL));
}
static void test_conflicting_config(void)
......@@ -313,9 +359,9 @@ static void test_is_valid_for_value(int value, bool should_be_valid)
int is_max, index;
for (is_max = 0; is_max < 2; is_max++) {
for (index = 0; index < BUCKETS_COUNT; index++) {
memset(&cfg, 0, sizeof(cfg));
throttle_config_init(&cfg);
set_cfg_value(is_max, index, value);
g_assert(throttle_is_valid(&cfg) == should_be_valid);
g_assert(throttle_is_valid(&cfg, NULL) == should_be_valid);
}
}
}
......@@ -335,18 +381,18 @@ static void test_max_is_missing_limit(void)
int i;
for (i = 0; i < BUCKETS_COUNT; i++) {
memset(&cfg, 0, sizeof(cfg));
throttle_config_init(&cfg);
cfg.buckets[i].max = 100;
cfg.buckets[i].avg = 0;
g_assert(throttle_max_is_missing_limit(&cfg));
g_assert(!throttle_is_valid(&cfg, NULL));
cfg.buckets[i].max = 0;
cfg.buckets[i].avg = 0;
g_assert(!throttle_max_is_missing_limit(&cfg));
g_assert(throttle_is_valid(&cfg, NULL));
cfg.buckets[i].max = 0;
cfg.buckets[i].avg = 100;
g_assert(!throttle_max_is_missing_limit(&cfg));
g_assert(throttle_is_valid(&cfg, NULL));
}
}
......@@ -550,7 +596,7 @@ static void test_groups(void)
g_assert(bdrv1->throttle_state == bdrv3->throttle_state);
/* Setting the config of a group member affects the whole group */
memset(&cfg1, 0, sizeof(cfg1));
throttle_config_init(&cfg1);
cfg1.buckets[THROTTLE_BPS_READ].avg = 500000;
cfg1.buckets[THROTTLE_BPS_WRITE].avg = 285000;
cfg1.buckets[THROTTLE_OPS_READ].avg = 20000;
......
......@@ -41,6 +41,14 @@ void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns)
/* make the bucket leak */
bkt->level = MAX(bkt->level - leak, 0);
/* if we allow bursts for more than one second we also need to
* keep track of bkt->burst_level so the bkt->max goal per second
* is attained */
if (bkt->burst_length > 1) {
leak = (bkt->max * (double) delta_ns) / NANOSECONDS_PER_SECOND;
bkt->burst_level = MAX(bkt->burst_level - leak, 0);
}
}
/* Calculate the time delta since last leak and make proportionals leaks
......@@ -91,13 +99,24 @@ int64_t throttle_compute_wait(LeakyBucket *bkt)
return 0;
}
extra = bkt->level - bkt->max;
/* If the bucket is full then we have to wait */
extra = bkt->level - bkt->max * bkt->burst_length;
if (extra > 0) {
return throttle_do_compute_wait(bkt->avg, extra);
}
if (extra <= 0) {
return 0;
/* If the bucket is not full yet we have to make sure that we
* fulfill the goal of bkt->max units per second. */
if (bkt->burst_length > 1) {
/* We use 1/10 of the max value to smooth the throttling.
* See throttle_fix_bucket() for more details. */
extra = bkt->burst_level - bkt->max / 10;
if (extra > 0) {
return throttle_do_compute_wait(bkt->max, extra);
}
}
return throttle_do_compute_wait(bkt->avg, extra);
return 0;
}
/* This function compute the time that must be waited while this IO
......@@ -137,10 +156,10 @@ static int64_t throttle_compute_wait_for(ThrottleState *ts,
* @next_timestamp: the resulting timer
* @ret: true if a timer must be set
*/
bool throttle_compute_timer(ThrottleState *ts,
bool is_write,
int64_t now,
int64_t *next_timestamp)
static bool throttle_compute_timer(ThrottleState *ts,
bool is_write,
int64_t now,
int64_t *next_timestamp)
{
int64_t wait;
......@@ -171,10 +190,24 @@ void throttle_timers_attach_aio_context(ThrottleTimers *tt,
tt->write_timer_cb, tt->timer_opaque);
}
/*
* Initialize the ThrottleConfig structure to a valid state
* @cfg: the config to initialize
*/
void throttle_config_init(ThrottleConfig *cfg)
{
unsigned i;
memset(cfg, 0, sizeof(*cfg));
for (i = 0; i < BUCKETS_COUNT; i++) {
cfg->buckets[i].burst_length = 1;
}
}
/* To be called first on the ThrottleState */
void throttle_init(ThrottleState *ts)
{
memset(ts, 0, sizeof(ThrottleState));
throttle_config_init(&ts->cfg);
}
/* To be called first on the ThrottleTimers */
......@@ -248,13 +281,14 @@ bool throttle_enabled(ThrottleConfig *cfg)
return false;
}
/* return true if any two throttling parameters conflicts
*
/* check if a throttling configuration is valid
* @cfg: the throttling configuration to inspect
* @ret: true if any conflict detected else false
* @ret: true if valid else false
* @errp: error object
*/
bool throttle_conflicting(ThrottleConfig *cfg)
bool throttle_is_valid(ThrottleConfig *cfg, Error **errp)
{
int i;
bool bps_flag, ops_flag;
bool bps_max_flag, ops_max_flag;
......@@ -274,42 +308,40 @@ bool throttle_conflicting(ThrottleConfig *cfg)
(cfg->buckets[THROTTLE_OPS_READ].max ||
cfg->buckets[THROTTLE_OPS_WRITE].max);
return bps_flag || ops_flag || bps_max_flag || ops_max_flag;
}
/* check if a throttling configuration is valid
* @cfg: the throttling configuration to inspect
* @ret: true if valid else false
*/
bool throttle_is_valid(ThrottleConfig *cfg)
{
int i;
if (bps_flag || ops_flag || bps_max_flag || ops_max_flag) {
error_setg(errp, "bps/iops/max total values and read/write values"
" cannot be used at the same time");
return false;
}
for (i = 0; i < BUCKETS_COUNT; i++) {
if (cfg->buckets[i].avg < 0 ||
cfg->buckets[i].max < 0 ||
cfg->buckets[i].avg > THROTTLE_VALUE_MAX ||
cfg->buckets[i].max > THROTTLE_VALUE_MAX) {
error_setg(errp, "bps/iops/max values must be within [0, %lld]",
THROTTLE_VALUE_MAX);
return false;
}
}
return true;
}
if (!cfg->buckets[i].burst_length) {
error_setg(errp, "the burst length cannot be 0");
return false;
}
/* check if bps_max/iops_max is used without bps/iops
* @cfg: the throttling configuration to inspect
*/
bool throttle_max_is_missing_limit(ThrottleConfig *cfg)
{
int i;
if (cfg->buckets[i].burst_length > 1 && !cfg->buckets[i].max) {
error_setg(errp, "burst length set without burst rate");
return false;
}
for (i = 0; i < BUCKETS_COUNT; i++) {
if (cfg->buckets[i].max && !cfg->buckets[i].avg) {
return true;
error_setg(errp, "bps_max/iops_max require corresponding"
" bps/iops values");
return false;
}
}
return false;
return true;
}
/* fix bucket parameters */
......@@ -318,7 +350,7 @@ static void throttle_fix_bucket(LeakyBucket *bkt)
double min;
/* zero bucket level */
bkt->level = 0;
bkt->level = bkt->burst_level = 0;
/* The following is done to cope with the Linux CFQ block scheduler
* which regroup reads and writes by block of 100ms in the guest.
......@@ -421,22 +453,36 @@ bool throttle_schedule_timer(ThrottleState *ts,
*/
void throttle_account(ThrottleState *ts, bool is_write, uint64_t size)
{
const BucketType bucket_types_size[2][2] = {
{ THROTTLE_BPS_TOTAL, THROTTLE_BPS_READ },
{ THROTTLE_BPS_TOTAL, THROTTLE_BPS_WRITE }
};
const BucketType bucket_types_units[2][2] = {
{ THROTTLE_OPS_TOTAL, THROTTLE_OPS_READ },
{ THROTTLE_OPS_TOTAL, THROTTLE_OPS_WRITE }
};
double units = 1.0;
unsigned i;
/* if cfg.op_size is defined and smaller than size we compute unit count */
if (ts->cfg.op_size && size > ts->cfg.op_size) {
units = (double) size / ts->cfg.op_size;
}
ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size;
ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units;
for (i = 0; i < 2; i++) {
LeakyBucket *bkt;
if (is_write) {
ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size;
ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units;
} else {
ts->cfg.buckets[THROTTLE_BPS_READ].level += size;
ts->cfg.buckets[THROTTLE_OPS_READ].level += units;
bkt = &ts->cfg.buckets[bucket_types_size[is_write][i]];
bkt->level += size;
if (bkt->burst_length > 1) {
bkt->burst_level += size;
}
bkt = &ts->cfg.buckets[bucket_types_units[is_write][i]];
bkt->level += units;
if (bkt->burst_length > 1) {
bkt->burst_level += units;
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册