提交 b3f7f0c5 编写于 作者: Z zhanghailiang 提交者: Amit Shah

COLO: Implement the process of failover for primary VM

For primary side, if COLO gets failover request from users.
To be exact, gets 'x_colo_lost_heartbeat' command.
COLO thread will exit the loop while the failover BH does the
cleanup work and resumes VM.
Signed-off-by: Nzhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: NLi Zhijian <lizhijian@cn.fujitsu.com>
Reviewed-by: NDr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: NAmit Shah <amit.shah@redhat.com>
Signed-off-by: NAmit Shah <amit@amitshah.net>
上级 aef06085
...@@ -32,4 +32,7 @@ void *colo_process_incoming_thread(void *opaque); ...@@ -32,4 +32,7 @@ void *colo_process_incoming_thread(void *opaque);
bool migration_incoming_in_colo_state(void); bool migration_incoming_in_colo_state(void);
COLOMode get_colo_mode(void); COLOMode get_colo_mode(void);
/* failover */
void colo_do_failover(MigrationState *s);
#endif #endif
...@@ -21,5 +21,6 @@ FailoverStatus failover_set_state(FailoverStatus old_state, ...@@ -21,5 +21,6 @@ FailoverStatus failover_set_state(FailoverStatus old_state,
FailoverStatus new_state); FailoverStatus new_state);
FailoverStatus failover_get_state(void); FailoverStatus failover_get_state(void);
void failover_request_active(Error **errp); void failover_request_active(Error **errp);
bool failover_request_is_active(void);
#endif #endif
...@@ -36,7 +36,7 @@ static void colo_failover_bh(void *opaque) ...@@ -36,7 +36,7 @@ static void colo_failover_bh(void *opaque)
return; return;
} }
/* TODO: Do failover work */ colo_do_failover(NULL);
} }
void failover_request_active(Error **errp) void failover_request_active(Error **errp)
......
...@@ -41,6 +41,40 @@ bool migration_incoming_in_colo_state(void) ...@@ -41,6 +41,40 @@ bool migration_incoming_in_colo_state(void)
return mis && (mis->state == MIGRATION_STATUS_COLO); return mis && (mis->state == MIGRATION_STATUS_COLO);
} }
static bool colo_runstate_is_stopped(void)
{
return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
}
static void primary_vm_do_failover(void)
{
MigrationState *s = migrate_get_current();
int old_state;
migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
MIGRATION_STATUS_COMPLETED);
old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
FAILOVER_STATUS_COMPLETED);
if (old_state != FAILOVER_STATUS_ACTIVE) {
error_report("Incorrect state (%s) while doing failover for Primary VM",
FailoverStatus_lookup[old_state]);
return;
}
}
void colo_do_failover(MigrationState *s)
{
/* Make sure VM stopped while failover happened. */
if (!colo_runstate_is_stopped()) {
vm_stop_force_state(RUN_STATE_COLO);
}
if (get_colo_mode() == COLO_MODE_PRIMARY) {
primary_vm_do_failover();
}
}
static void colo_send_message(QEMUFile *f, COLOMessage msg, static void colo_send_message(QEMUFile *f, COLOMessage msg,
Error **errp) Error **errp)
{ {
...@@ -162,9 +196,20 @@ static int colo_do_checkpoint_transaction(MigrationState *s, ...@@ -162,9 +196,20 @@ static int colo_do_checkpoint_transaction(MigrationState *s,
bioc->usage = 0; bioc->usage = 0;
qemu_mutex_lock_iothread(); qemu_mutex_lock_iothread();
if (failover_get_state() != FAILOVER_STATUS_NONE) {
qemu_mutex_unlock_iothread();
goto out;
}
vm_stop_force_state(RUN_STATE_COLO); vm_stop_force_state(RUN_STATE_COLO);
qemu_mutex_unlock_iothread(); qemu_mutex_unlock_iothread();
trace_colo_vm_state_change("run", "stop"); trace_colo_vm_state_change("run", "stop");
/*
* Failover request bh could be called after vm_stop_force_state(),
* So we need check failover_request_is_active() again.
*/
if (failover_get_state() != FAILOVER_STATUS_NONE) {
goto out;
}
/* Disable block migration */ /* Disable block migration */
s->params.blk = 0; s->params.blk = 0;
...@@ -259,6 +304,11 @@ static void colo_process_checkpoint(MigrationState *s) ...@@ -259,6 +304,11 @@ static void colo_process_checkpoint(MigrationState *s)
trace_colo_vm_state_change("stop", "run"); trace_colo_vm_state_change("stop", "run");
while (s->state == MIGRATION_STATUS_COLO) { while (s->state == MIGRATION_STATUS_COLO) {
if (failover_get_state() != FAILOVER_STATUS_NONE) {
error_report("failover request");
goto out;
}
current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
if (current_time - checkpoint_time < if (current_time - checkpoint_time <
s->parameters.x_checkpoint_delay) { s->parameters.x_checkpoint_delay) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册