提交 fa500928 编写于 作者: P Peter Maydell

Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20150930' into staging

migration/next for 20150930

# gpg: Signature made Wed 30 Sep 2015 09:24:02 BST using RSA key ID 5872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>"
# gpg:                 aka "Juan Quintela <quintela@trasno.org>"

* remotes/juanquintela/tags/migration/20150930:
  migration: Disambiguate MAX_THROTTLE
  qmp/hmp: Add throttle ratio to query-migrate and info migrate
  migration: Dynamic cpu throttling for auto-converge
  migration: Parameters for auto-converge cpu throttling
  cpu: Provide vcpu throttling interface
  migration: yet more possible state transitions
Signed-off-by: NPeter Maydell <peter.maydell@linaro.org>
......@@ -69,6 +69,14 @@ static CPUState *next_cpu;
int64_t max_delay;
int64_t max_advance;
/* vcpu throttling controls */
static QEMUTimer *throttle_timer;
static unsigned int throttle_percentage;
#define CPU_THROTTLE_PCT_MIN 1
#define CPU_THROTTLE_PCT_MAX 99
#define CPU_THROTTLE_TIMESLICE_NS 10000000
bool cpu_is_stopped(CPUState *cpu)
{
return cpu->stopped || !runstate_is_running();
......@@ -505,10 +513,80 @@ static const VMStateDescription vmstate_timers = {
}
};
static void cpu_throttle_thread(void *opaque)
{
CPUState *cpu = opaque;
double pct;
double throttle_ratio;
long sleeptime_ns;
if (!cpu_throttle_get_percentage()) {
return;
}
pct = (double)cpu_throttle_get_percentage()/100;
throttle_ratio = pct / (1 - pct);
sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
qemu_mutex_unlock_iothread();
atomic_set(&cpu->throttle_thread_scheduled, 0);
g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
qemu_mutex_lock_iothread();
}
static void cpu_throttle_timer_tick(void *opaque)
{
CPUState *cpu;
double pct;
/* Stop the timer if needed */
if (!cpu_throttle_get_percentage()) {
return;
}
CPU_FOREACH(cpu) {
if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
}
}
pct = (double)cpu_throttle_get_percentage()/100;
timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
CPU_THROTTLE_TIMESLICE_NS / (1-pct));
}
void cpu_throttle_set(int new_throttle_pct)
{
/* Ensure throttle percentage is within valid range */
new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
atomic_set(&throttle_percentage, new_throttle_pct);
timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
CPU_THROTTLE_TIMESLICE_NS);
}
void cpu_throttle_stop(void)
{
atomic_set(&throttle_percentage, 0);
}
bool cpu_throttle_active(void)
{
return (cpu_throttle_get_percentage() != 0);
}
int cpu_throttle_get_percentage(void)
{
return atomic_read(&throttle_percentage);
}
void cpu_ticks_init(void)
{
seqlock_init(&timers_state.vm_clock_seqlock, NULL);
vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
cpu_throttle_timer_tick, NULL);
}
void configure_icount(QemuOpts *opts, Error **errp)
......
......@@ -232,6 +232,11 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
info->xbzrle_cache->overflow);
}
if (info->has_x_cpu_throttle_percentage) {
monitor_printf(mon, "cpu throttle percentage: %" PRIu64 "\n",
info->x_cpu_throttle_percentage);
}
qapi_free_MigrationInfo(info);
qapi_free_MigrationCapabilityStatusList(caps);
}
......@@ -272,6 +277,12 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
monitor_printf(mon, " %s: %" PRId64,
MigrationParameter_lookup[MIGRATION_PARAMETER_DECOMPRESS_THREADS],
params->decompress_threads);
monitor_printf(mon, " %s: %" PRId64,
MigrationParameter_lookup[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL],
params->x_cpu_throttle_initial);
monitor_printf(mon, " %s: %" PRId64,
MigrationParameter_lookup[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT],
params->x_cpu_throttle_increment);
monitor_printf(mon, "\n");
}
......@@ -1221,6 +1232,8 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
bool has_compress_level = false;
bool has_compress_threads = false;
bool has_decompress_threads = false;
bool has_x_cpu_throttle_initial = false;
bool has_x_cpu_throttle_increment = false;
int i;
for (i = 0; i < MIGRATION_PARAMETER_MAX; i++) {
......@@ -1235,10 +1248,18 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
case MIGRATION_PARAMETER_DECOMPRESS_THREADS:
has_decompress_threads = true;
break;
case MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL:
has_x_cpu_throttle_initial = true;
break;
case MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT:
has_x_cpu_throttle_increment = true;
break;
}
qmp_migrate_set_parameters(has_compress_level, value,
has_compress_threads, value,
has_decompress_threads, value,
has_x_cpu_throttle_initial, value,
has_x_cpu_throttle_increment, value,
&err);
break;
}
......
......@@ -321,6 +321,11 @@ struct CPUState {
uint32_t can_do_io;
int32_t exception_index; /* used by m68k TCG */
/* Used to keep track of an outstanding cpu throttle thread for migration
* autoconverge
*/
bool throttle_thread_scheduled;
/* Note that this is accessed at the start of every TB via a negative
offset from AREG0. Leave this field at the end so as to make the
(absolute value) offset as small as possible. This reduces code
......@@ -565,6 +570,43 @@ CPUState *qemu_get_cpu(int index);
*/
bool cpu_exists(int64_t id);
/**
* cpu_throttle_set:
* @new_throttle_pct: Percent of sleep time. Valid range is 1 to 99.
*
* Throttles all vcpus by forcing them to sleep for the given percentage of
* time. A throttle_percentage of 25 corresponds to a 75% duty cycle roughly.
* (example: 10ms sleep for every 30ms awake).
*
* cpu_throttle_set can be called as needed to adjust new_throttle_pct.
* Once the throttling starts, it will remain in effect until cpu_throttle_stop
* is called.
*/
void cpu_throttle_set(int new_throttle_pct);
/**
* cpu_throttle_stop:
*
* Stops the vcpu throttling started by cpu_throttle_set.
*/
void cpu_throttle_stop(void);
/**
* cpu_throttle_active:
*
* Returns: %true if the vcpus are currently being throttled, %false otherwise.
*/
bool cpu_throttle_active(void);
/**
* cpu_throttle_get_percentage:
*
* Returns the vcpu throttle percentage. See cpu_throttle_set for details.
*
* Returns: The throttle percentage in range 1 to 99.
*/
int cpu_throttle_get_percentage(void);
#ifndef CONFIG_USER_ONLY
typedef void (*CPUInterruptHandler)(CPUState *, int);
......
......@@ -29,8 +29,9 @@
#include "trace.h"
#include "qapi/util.h"
#include "qapi-event.h"
#include "qom/cpu.h"
#define MAX_THROTTLE (32 << 20) /* Migration speed throttling */
#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */
/* Amount of time to allocate to each "chunk" of bandwidth-throttled
* data. */
......@@ -44,6 +45,9 @@
#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
/*0: means nocompress, 1: best speed, ... 9: best compress ratio */
#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
/* Define default autoconverge cpu throttle migration parameters */
#define DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL 20
#define DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT 10
/* Migration XBZRLE default cache size */
#define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)
......@@ -71,6 +75,10 @@ MigrationState *migrate_get_current(void)
DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT,
.parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT,
.parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] =
DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL,
.parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] =
DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT,
};
return &current_migration;
......@@ -372,6 +380,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
params->decompress_threads =
s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
params->x_cpu_throttle_initial =
s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL];
params->x_cpu_throttle_increment =
s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT];
return params;
}
......@@ -435,6 +447,11 @@ MigrationInfo *qmp_query_migrate(Error **errp)
info->disk->total = blk_mig_bytes_total();
}
if (cpu_throttle_active()) {
info->has_x_cpu_throttle_percentage = true;
info->x_cpu_throttle_percentage = cpu_throttle_get_percentage();
}
get_xbzrle_cache_stats(info);
break;
case MIGRATION_STATUS_COMPLETED:
......@@ -494,7 +511,11 @@ void qmp_migrate_set_parameters(bool has_compress_level,
bool has_compress_threads,
int64_t compress_threads,
bool has_decompress_threads,
int64_t decompress_threads, Error **errp)
int64_t decompress_threads,
bool has_x_cpu_throttle_initial,
int64_t x_cpu_throttle_initial,
bool has_x_cpu_throttle_increment,
int64_t x_cpu_throttle_increment, Error **errp)
{
MigrationState *s = migrate_get_current();
......@@ -517,6 +538,18 @@ void qmp_migrate_set_parameters(bool has_compress_level,
"is invalid, it should be in the range of 1 to 255");
return;
}
if (has_x_cpu_throttle_initial &&
(x_cpu_throttle_initial < 1 || x_cpu_throttle_initial > 99)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"x_cpu_throttle_initial",
"an integer in the range of 1 to 99");
}
if (has_x_cpu_throttle_increment &&
(x_cpu_throttle_increment < 1 || x_cpu_throttle_increment > 99)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"x_cpu_throttle_increment",
"an integer in the range of 1 to 99");
}
if (has_compress_level) {
s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level;
......@@ -528,6 +561,15 @@ void qmp_migrate_set_parameters(bool has_compress_level,
s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
decompress_threads;
}
if (has_x_cpu_throttle_initial) {
s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] =
x_cpu_throttle_initial;
}
if (has_x_cpu_throttle_increment) {
s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] =
x_cpu_throttle_increment;
}
}
/* shared migration helpers */
......@@ -643,6 +685,10 @@ static MigrationState *migrate_init(const MigrationParams *params)
s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
int decompress_thread_count =
s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
int x_cpu_throttle_initial =
s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL];
int x_cpu_throttle_increment =
s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT];
memcpy(enabled_capabilities, s->enabled_capabilities,
sizeof(enabled_capabilities));
......@@ -658,6 +704,10 @@ static MigrationState *migrate_init(const MigrationParams *params)
compress_thread_count;
s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
decompress_thread_count;
s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] =
x_cpu_throttle_initial;
s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] =
x_cpu_throttle_increment;
s->bandwidth_limit = bandwidth_limit;
migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
......@@ -1026,6 +1076,9 @@ static void *migration_thread(void *opaque)
}
}
/* If we enabled cpu throttling for auto-converge, turn it off. */
cpu_throttle_stop();
qemu_mutex_lock_iothread();
if (s->state == MIGRATION_STATUS_COMPLETED) {
int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
......
......@@ -47,9 +47,7 @@
do { } while (0)
#endif
static bool mig_throttle_on;
static int dirty_rate_high_cnt;
static void check_guest_throttling(void);
static uint64_t bitmap_sync_count;
......@@ -407,6 +405,29 @@ static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
return size;
}
/* Reduce amount of guest cpu execution to hopefully slow down memory writes.
* If guest dirty memory rate is reduced below the rate at which we can
* transfer pages to the destination then we should be able to complete
* migration. Some workloads dirty memory way too fast and will not effectively
* converge, even with auto-converge.
*/
static void mig_throttle_guest_down(void)
{
MigrationState *s = migrate_get_current();
uint64_t pct_initial =
s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL];
uint64_t pct_icrement =
s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT];
/* We have not started throttling yet. Let's start it. */
if (!cpu_throttle_active()) {
cpu_throttle_set(pct_initial);
} else {
/* Throttling already on, just increase the rate */
cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
}
}
/* Update the xbzrle cache to reflect a page that's been sent as all 0.
* The important thing is that a stale (not-yet-0'd) page be replaced
* by the new data.
......@@ -599,21 +620,21 @@ static void migration_bitmap_sync(void)
/* The following detection logic can be refined later. For now:
Check to see if the dirtied bytes is 50% more than the approx.
amount of bytes that just got transferred since the last time we
were in this routine. If that happens >N times (for now N==4)
we turn on the throttle down logic */
were in this routine. If that happens twice, start or increase
throttling */
bytes_xfer_now = ram_bytes_transferred();
if (s->dirty_pages_rate &&
(num_dirty_pages_period * TARGET_PAGE_SIZE >
(bytes_xfer_now - bytes_xfer_prev)/2) &&
(dirty_rate_high_cnt++ > 4)) {
(dirty_rate_high_cnt++ >= 2)) {
trace_migration_throttle();
mig_throttle_on = true;
dirty_rate_high_cnt = 0;
mig_throttle_guest_down();
}
bytes_xfer_prev = bytes_xfer_now;
} else {
mig_throttle_on = false;
}
if (migrate_use_xbzrle()) {
if (iterations_prev != acct_info.iterations) {
acct_info.xbzrle_cache_miss_rate =
......@@ -1146,7 +1167,6 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
RAMBlock *block;
int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
mig_throttle_on = false;
dirty_rate_high_cnt = 0;
bitmap_sync_count = 0;
migration_bitmap_sync_init();
......@@ -1251,7 +1271,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
}
pages_sent += pages;
acct_info.iterations++;
check_guest_throttling();
/* we want to check in the 1st loop, just in case it was the 1st time
and we had to sync the dirty bitmap.
qemu_get_clock_ns() is a bit expensive, so we only check each some
......@@ -1664,52 +1684,3 @@ void ram_mig_init(void)
qemu_mutex_init(&XBZRLE.lock);
register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
}
/* Stub function that's gets run on the vcpu when its brought out of the
VM to run inside qemu via async_run_on_cpu()*/
static void mig_sleep_cpu(void *opq)
{
qemu_mutex_unlock_iothread();
g_usleep(30*1000);
qemu_mutex_lock_iothread();
}
/* To reduce the dirty rate explicitly disallow the VCPUs from spending
much time in the VM. The migration thread will try to catchup.
Workload will experience a performance drop.
*/
static void mig_throttle_guest_down(void)
{
CPUState *cpu;
qemu_mutex_lock_iothread();
CPU_FOREACH(cpu) {
async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
}
qemu_mutex_unlock_iothread();
}
static void check_guest_throttling(void)
{
static int64_t t0;
int64_t t1;
if (!mig_throttle_on) {
return;
}
if (!t0) {
t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
return;
}
t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
/* If it has been more than 40 ms since the last time the guest
* was throttled then do it again.
*/
if (40 < (t1-t0)/1000000) {
mig_throttle_guest_down();
t0 = t1;
}
}
......@@ -480,6 +480,10 @@
# may be expensive, but do not actually occur during the iterative
# migration rounds themselves. (since 1.6)
#
# @x-cpu-throttle-percentage: #optional percentage of time guest cpus are being
# throttled during auto-converge. This is only present when auto-converge
# has started throttling guest cpus. (Since 2.5)
#
# Since: 0.14.0
##
{ 'struct': 'MigrationInfo',
......@@ -489,7 +493,8 @@
'*total-time': 'int',
'*expected-downtime': 'int',
'*downtime': 'int',
'*setup-time': 'int'} }
'*setup-time': 'int',
'*x-cpu-throttle-percentage': 'int'} }
##
# @query-migrate
......@@ -596,10 +601,18 @@
# compression, so set the decompress-threads to the number about 1/4
# of compress-threads is adequate.
#
# @x-cpu-throttle-initial: Initial percentage of time guest cpus are throttled
# when migration auto-converge is activated. The
# default value is 20. (Since 2.5)
#
# @x-cpu-throttle-increment: throttle percentage increase each time
# auto-converge detects that migration is not making
# progress. The default value is 10. (Since 2.5)
# Since: 2.4
##
{ 'enum': 'MigrationParameter',
'data': ['compress-level', 'compress-threads', 'decompress-threads'] }
'data': ['compress-level', 'compress-threads', 'decompress-threads',
'x-cpu-throttle-initial', 'x-cpu-throttle-increment'] }
#
# @migrate-set-parameters
......@@ -612,12 +625,21 @@
#
# @decompress-threads: decompression thread count
#
# @x-cpu-throttle-initial: Initial percentage of time guest cpus are throttled
# when migration auto-converge is activated. The
# default value is 20. (Since 2.5)
#
# @x-cpu-throttle-increment: throttle percentage increase each time
# auto-converge detects that migration is not making
# progress. The default value is 10. (Since 2.5)
# Since: 2.4
##
{ 'command': 'migrate-set-parameters',
'data': { '*compress-level': 'int',
'*compress-threads': 'int',
'*decompress-threads': 'int'} }
'*decompress-threads': 'int',
'*x-cpu-throttle-initial': 'int',
'*x-cpu-throttle-increment': 'int'} }
#
# @MigrationParameters
......@@ -628,12 +650,22 @@
#
# @decompress-threads: decompression thread count
#
# @x-cpu-throttle-initial: Initial percentage of time guest cpus are throttled
# when migration auto-converge is activated. The
# default value is 20. (Since 2.5)
#
# @x-cpu-throttle-increment: throttle percentage increase each time
# auto-converge detects that migration is not making
# progress. The default value is 10. (Since 2.5)
#
# Since: 2.4
##
{ 'struct': 'MigrationParameters',
'data': { 'compress-level': 'int',
'compress-threads': 'int',
'decompress-threads': 'int'} }
'decompress-threads': 'int',
'x-cpu-throttle-initial': 'int',
'x-cpu-throttle-increment': 'int'} }
##
# @query-migrate-parameters
#
......
......@@ -580,6 +580,7 @@ static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE_INMIGRATE, RUN_STATE_SUSPENDED },
{ RUN_STATE_INMIGRATE, RUN_STATE_WATCHDOG },
{ RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED },
{ RUN_STATE_INMIGRATE, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册