From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id E9DD71FF392 for ; Tue, 25 Jun 2024 15:35:54 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 5557C10C8A; Tue, 25 Jun 2024 15:35:58 +0200 (CEST) From: Fiona Ebner To: pve-devel@lists.proxmox.com Date: Tue, 25 Jun 2024 15:35:51 +0200 Message-Id: <20240625133551.210636-2-f.ebner@proxmox.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240625133551.210636-1-f.ebner@proxmox.com> References: <20240625133551.210636-1-f.ebner@proxmox.com> MIME-Version: 1.0 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.063 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [bitmap.name] Subject: [pve-devel] [PATCH v2 qemu 2/2] PVE backup: factor out setting up snapshot access for fleecing X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox VE development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pve-devel-bounces@lists.proxmox.com Sender: "pve-devel" Avoids some line bloat in the create_backup_jobs_bh() function and is in preparation for setting up the snapshot access independently of fleecing, in particular that will be useful for providing access to the snapshot via NBD. Signed-off-by: Fiona Ebner --- Changes in v2: * no actual change here, but the error case will now also clean up the block nodes, because it is on top of v2 of patch 1/2 .../0050-PVE-backup-add-fleecing-option.patch | 126 +++++++++++------- ...ve-error-when-copy-before-write-fail.patch | 2 +- 2 files changed, 78 insertions(+), 50 deletions(-) diff --git a/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch b/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch index 0ba6235..2d2dc40 100644 --- a/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch +++ b/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch @@ -63,9 +63,9 @@ Signed-off-by: Fiona Ebner Signed-off-by: Thomas Lamprecht --- block/monitor/block-hmp-cmds.c | 1 + - pve-backup.c | 144 ++++++++++++++++++++++++++++++++- - qapi/block-core.json | 10 ++- - 3 files changed, 151 insertions(+), 4 deletions(-) + pve-backup.c | 165 ++++++++++++++++++++++++++++++++- + qapi/block-core.json | 10 +- + 3 files changed, 172 insertions(+), 4 deletions(-) diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c index 5000c084c5..70b3de4c7e 100644 @@ -80,7 +80,7 @@ index 5000c084c5..70b3de4c7e 100644 hmp_handle_error(mon, error); diff --git a/pve-backup.c b/pve-backup.c -index 5ebb6a3947..2d3ca78eac 100644 +index 5ebb6a3947..0c1d4a347b 100644 --- a/pve-backup.c +++ b/pve-backup.c @@ -7,9 +7,11 @@ @@ -144,7 +144,70 @@ index 5ebb6a3947..2d3ca78eac 100644 /* * Needs to happen outside of coroutine, because it takes the graph write lock. */ -@@ -519,9 +549,80 @@ static void create_backup_jobs_bh(void *opaque) { +@@ -483,6 +513,62 @@ static int coroutine_fn pvebackup_co_add_config( + goto out; + } + ++/* ++ * Setup a snapshot-access block node for a device with associated fleecing image. ++ */ ++static int setup_snapshot_access(PVEBackupDevInfo *di, Error **errp) ++{ ++ Error *local_err = NULL; ++ ++ if (!di->fleecing.bs) { ++ error_setg(errp, "no associated fleecing image"); ++ return -1; ++ } ++ ++ QDict *cbw_opts = qdict_new(); ++ qdict_put_str(cbw_opts, "driver", "copy-before-write"); ++ qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs)); ++ qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs)); ++ ++ if (di->bitmap) { ++ /* ++ * Only guest writes to parts relevant for the backup need to be intercepted with ++ * old data being copied to the fleecing image. ++ */ ++ qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs)); ++ qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap)); ++ } ++ /* ++ * Fleecing storage is supposed to be fast and it's better to break backup than guest ++ * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so ++ * abort a bit before that. ++ */ ++ qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot"); ++ qdict_put_int(cbw_opts, "cbw-timeout", 45); ++ ++ di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err); ++ ++ if (!di->fleecing.cbw) { ++ error_setg(errp, "appending cbw node for fleecing failed: %s", ++ local_err ? error_get_pretty(local_err) : "unknown error"); ++ return -1; ++ } ++ ++ QDict *snapshot_access_opts = qdict_new(); ++ qdict_put_str(snapshot_access_opts, "driver", "snapshot-access"); ++ qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw)); ++ ++ di->fleecing.snapshot_access = ++ bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err); ++ if (!di->fleecing.snapshot_access) { ++ error_setg(errp, "setting up snapshot access for fleecing failed: %s", ++ local_err ? error_get_pretty(local_err) : "unknown error"); ++ return -1; ++ } ++ ++ return 0; ++} ++ + /* + * backup_job_create can *not* be run from a coroutine, so this can't either. + * The caller is responsible that backup_mutex is held nonetheless. +@@ -519,9 +605,45 @@ static void create_backup_jobs_bh(void *opaque) { } bdrv_drained_begin(di->bs); @@ -156,49 +219,14 @@ index 5ebb6a3947..2d3ca78eac 100644 + const char *job_id = bdrv_get_device_name(di->bs); + bdrv_graph_co_rdunlock(); + if (di->fleecing.bs) { -+ QDict *cbw_opts = qdict_new(); -+ qdict_put_str(cbw_opts, "driver", "copy-before-write"); -+ qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs)); -+ qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs)); -+ -+ if (di->bitmap) { -+ /* -+ * Only guest writes to parts relevant for the backup need to be intercepted with -+ * old data being copied to the fleecing image. -+ */ -+ qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs)); -+ qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap)); -+ } -+ /* -+ * Fleecing storage is supposed to be fast and it's better to break backup than guest -+ * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so -+ * abort a bit before that. -+ */ -+ qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot"); -+ qdict_put_int(cbw_opts, "cbw-timeout", 45); -+ -+ di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err); -+ -+ if (!di->fleecing.cbw) { -+ error_setg(errp, "appending cbw node for fleecing failed: %s", -+ local_err ? error_get_pretty(local_err) : "unknown error"); -+ bdrv_drained_end(di->bs); -+ break; -+ } -+ -+ QDict *snapshot_access_opts = qdict_new(); -+ qdict_put_str(snapshot_access_opts, "driver", "snapshot-access"); -+ qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw)); -+ -+ di->fleecing.snapshot_access = -+ bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err); -+ if (!di->fleecing.snapshot_access) { ++ if (setup_snapshot_access(di, &local_err) < 0) { + error_setg(errp, "setting up snapshot access for fleecing failed: %s", + local_err ? error_get_pretty(local_err) : "unknown error"); + cleanup_snapshot_access(di); + bdrv_drained_end(di->bs); + break; + } ++ + source_bs = di->fleecing.snapshot_access; + discard_source = true; + @@ -227,7 +255,7 @@ index 5ebb6a3947..2d3ca78eac 100644 BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn, &local_err); -@@ -535,6 +636,7 @@ static void create_backup_jobs_bh(void *opaque) { +@@ -535,6 +657,7 @@ static void create_backup_jobs_bh(void *opaque) { } if (!job || local_err) { @@ -235,7 +263,7 @@ index 5ebb6a3947..2d3ca78eac 100644 error_setg(errp, "backup_job_create failed: %s", local_err ? error_get_pretty(local_err) : "null"); break; -@@ -577,6 +679,14 @@ static void create_backup_jobs_bh(void *opaque) { +@@ -577,6 +700,14 @@ static void create_backup_jobs_bh(void *opaque) { aio_co_enter(data->ctx, data->co); } @@ -250,7 +278,7 @@ index 5ebb6a3947..2d3ca78eac 100644 /* * Returns a list of device infos, which needs to be freed by the caller. In * case of an error, errp will be set, but the returned value might still be a -@@ -584,6 +694,7 @@ static void create_backup_jobs_bh(void *opaque) { +@@ -584,6 +715,7 @@ static void create_backup_jobs_bh(void *opaque) { */ static GList coroutine_fn GRAPH_RDLOCK *get_device_info( const char *devlist, @@ -258,7 +286,7 @@ index 5ebb6a3947..2d3ca78eac 100644 Error **errp) { gchar **devs = NULL; -@@ -607,6 +718,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info( +@@ -607,6 +739,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info( } PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1); di->bs = bs; @@ -290,7 +318,7 @@ index 5ebb6a3947..2d3ca78eac 100644 di_list = g_list_append(di_list, di); d++; } -@@ -656,6 +792,7 @@ UuidInfo coroutine_fn *qmp_backup( +@@ -656,6 +813,7 @@ UuidInfo coroutine_fn *qmp_backup( const char *devlist, bool has_speed, int64_t speed, bool has_max_workers, int64_t max_workers, @@ -298,7 +326,7 @@ index 5ebb6a3947..2d3ca78eac 100644 Error **errp) { assert(qemu_in_coroutine()); -@@ -684,7 +821,7 @@ UuidInfo coroutine_fn *qmp_backup( +@@ -684,7 +842,7 @@ UuidInfo coroutine_fn *qmp_backup( format = has_format ? format : BACKUP_FORMAT_VMA; bdrv_graph_co_rdlock(); @@ -307,7 +335,7 @@ index 5ebb6a3947..2d3ca78eac 100644 bdrv_graph_co_rdunlock(); if (local_err) { error_propagate(errp, local_err); -@@ -1089,5 +1226,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp) +@@ -1089,5 +1247,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp) ret->query_bitmap_info = true; ret->pbs_masterkey = true; ret->backup_max_workers = true; diff --git a/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch b/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch index 66fc9fa..a314fe7 100644 --- a/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch +++ b/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch @@ -96,7 +96,7 @@ index dc6cafe7fa..a27d2d7d9f 100644 #endif /* COPY_BEFORE_WRITE_H */ diff --git a/pve-backup.c b/pve-backup.c -index 2d3ca78eac..c4178758b3 100644 +index 0c1d4a347b..051ebffe48 100644 --- a/pve-backup.c +++ b/pve-backup.c @@ -374,6 +374,18 @@ static void pvebackup_complete_cb(void *opaque, int ret) -- 2.39.2 _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel