From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 80FC069203 for ; Thu, 11 Feb 2021 17:11:33 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 7A05B1E664 for ; Thu, 11 Feb 2021 17:11:33 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 21CE31E628 for ; Thu, 11 Feb 2021 17:11:32 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id DD91C46245 for ; Thu, 11 Feb 2021 17:11:31 +0100 (CET) From: Stefan Reiter To: pve-devel@lists.proxmox.com Date: Thu, 11 Feb 2021 17:11:15 +0100 Message-Id: <20210211161115.15209-5-s.reiter@proxmox.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20210211161115.15209-1-s.reiter@proxmox.com> References: <20210211161115.15209-1-s.reiter@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.031 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pve-devel] [PATCH INFO v2 qemu 5/5] PVE: savevm-async: improve snapshot abort X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 11 Feb 2021 16:11:33 -0000 Do not block the VM and save the state on aborting a snapshot, as the snapshot will be invalid anyway. Also, when aborting, wait for the target file to be closed, otherwise a client might run into race-conditions when trying to remove the file still opened by QEMU. Signed-off-by: Stefan Reiter --- Included in first patch, squashed into existing patches. New in v2. hmp-commands.hx | 3 +- migration/savevm-async.c | 75 +++++++++++++++++++++++++++++++++------- monitor/hmp-cmds.c | 2 +- qapi/misc.json | 2 +- 4 files changed, 66 insertions(+), 16 deletions(-) diff --git a/hmp-commands.hx b/hmp-commands.hx index b7639d189d..54de3f80e6 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1927,5 +1927,6 @@ ERST .args_type = "", .params = "", .help = "Resume VM after snaphot.", - .cmd = hmp_savevm_end, + .cmd = hmp_savevm_end, + .coroutine = true, }, diff --git a/migration/savevm-async.c b/migration/savevm-async.c index 156b7a030e..8a17ec1f74 100644 --- a/migration/savevm-async.c +++ b/migration/savevm-async.c @@ -16,6 +16,7 @@ #include "qapi/qapi-commands-misc.h" #include "qapi/qapi-commands-block.h" #include "qemu/cutils.h" +#include "qemu/timer.h" #include "qemu/main-loop.h" #include "qemu/rcu.h" @@ -52,8 +53,15 @@ static struct SnapshotState { int64_t total_time; QEMUBH *finalize_bh; Coroutine *co; + QemuCoSleepState *target_close_wait; } snap_state; +static bool savevm_aborted(void) +{ + return snap_state.state == SAVE_STATE_CANCELLED || + snap_state.state == SAVE_STATE_ERROR; +} + SaveVMInfo *qmp_query_savevm(Error **errp) { SaveVMInfo *info = g_malloc0(sizeof(*info)); @@ -106,17 +114,23 @@ static int save_snapshot_cleanup(void) } if (snap_state.target) { - /* try to truncate, but ignore errors (will fail on block devices). - * note1: bdrv_read() need whole blocks, so we need to round up - * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment - */ - size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2); - blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL); + if (!savevm_aborted()) { + /* try to truncate, but ignore errors (will fail on block devices). + * note1: bdrv_read() need whole blocks, so we need to round up + * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment + */ + size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2); + blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL); + } blk_op_unblock_all(snap_state.target, snap_state.blocker); error_free(snap_state.blocker); snap_state.blocker = NULL; blk_unref(snap_state.target); snap_state.target = NULL; + + if (snap_state.target_close_wait) { + qemu_co_sleep_wake(snap_state.target_close_wait); + } } return ret; @@ -202,6 +216,8 @@ static void process_savevm_finalize(void *opaque) AioContext *iohandler_ctx = iohandler_get_aio_context(); MigrationState *ms = migrate_get_current(); + bool aborted = savevm_aborted(); + #ifdef DEBUG_SAVEVM_STATE int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); #endif @@ -223,10 +239,13 @@ static void process_savevm_finalize(void *opaque) save_snapshot_error("vm_stop_force_state error %d", ret); } - (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false); - ret = qemu_file_get_error(snap_state.file); - if (ret < 0) { - save_snapshot_error("qemu_savevm_state_iterate error %d", ret); + if (!aborted) { + /* skip state saving if we aborted, snapshot will be invalid anyway */ + (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false); + ret = qemu_file_get_error(snap_state.file); + if (ret < 0) { + save_snapshot_error("qemu_savevm_state_iterate error %d", ret); + } } DPRINTF("state saving complete\n"); @@ -235,7 +254,7 @@ static void process_savevm_finalize(void *opaque) /* clear migration state */ migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, - ret ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED); + ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED); ms->to_dst_file = NULL; qemu_savevm_state_cleanup(); @@ -245,6 +264,9 @@ static void process_savevm_finalize(void *opaque) save_snapshot_error("save_snapshot_cleanup error %d", ret); } else if (snap_state.state == SAVE_STATE_ACTIVE) { snap_state.state = SAVE_STATE_COMPLETED; + } else if (aborted) { + save_snapshot_error("process_savevm_cleanup: found aborted state: %d", + snap_state.state); } else { save_snapshot_error("process_savevm_cleanup: invalid state: %d", snap_state.state); @@ -434,11 +456,14 @@ restart: if (snap_state.saved_vm_running) { vm_start(); + snap_state.saved_vm_running = false; } } -void qmp_savevm_end(Error **errp) +void coroutine_fn qmp_savevm_end(Error **errp) { + int64_t timeout; + if (snap_state.state == SAVE_STATE_DONE) { error_set(errp, ERROR_CLASS_GENERIC_ERROR, "VM snapshot not started\n"); @@ -447,14 +472,38 @@ void qmp_savevm_end(Error **errp) if (snap_state.state == SAVE_STATE_ACTIVE) { snap_state.state = SAVE_STATE_CANCELLED; - return; + goto wait_for_close; } if (snap_state.saved_vm_running) { vm_start(); + snap_state.saved_vm_running = false; } snap_state.state = SAVE_STATE_DONE; + +wait_for_close: + if (!snap_state.target) { + DPRINTF("savevm-end: no target file open\n"); + return; + } + + /* wait until cleanup is done before returning, this ensures that after this + * call exits the statefile will be closed and can be removed immediately */ + DPRINTF("savevm-end: waiting for cleanup\n"); + timeout = 30L * 1000 * 1000 * 1000; + qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, timeout, + &snap_state.target_close_wait); + snap_state.target_close_wait = NULL; + if (snap_state.target) { + save_snapshot_error("timeout waiting for target file close in " + "qmp_savevm_end"); + /* we cannot assume the snapshot finished in this case, so leave the + * state alone - caller has to figure something out */ + return; + } + + DPRINTF("savevm-end: cleanup done\n"); } // FIXME: Deprecated diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index f47fc01c10..95f4e7f5c1 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -2049,7 +2049,7 @@ void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, errp); } -void hmp_savevm_end(Monitor *mon, const QDict *qdict) +void coroutine_fn hmp_savevm_end(Monitor *mon, const QDict *qdict) { Error *errp = NULL; diff --git a/qapi/misc.json b/qapi/misc.json index b7e132698e..4f5333d960 100644 --- a/qapi/misc.json +++ b/qapi/misc.json @@ -506,7 +506,7 @@ # Resume VM after a snapshot. # ## -{ 'command': 'savevm-end' } +{ 'command': 'savevm-end', 'coroutine': true } ## # @CommandLineParameterType: -- 2.20.1