* [pve-devel] [PATCH qemu 1/2] PVE Backup: fixup error handling for fleecing
@ 2024-06-17 11:29 Fiona Ebner
2024-06-17 11:29 ` [pve-devel] [PATCH qemu 2/2] PVE backup: factor out setting up snapshot access " Fiona Ebner
2024-06-25 12:57 ` [pve-devel] [PATCH qemu 1/2] PVE Backup: fixup error handling " Fiona Ebner
0 siblings, 2 replies; 3+ messages in thread
From: Fiona Ebner @ 2024-06-17 11:29 UTC (permalink / raw)
To: pve-devel
The drained section needs to be terminated before breaking out of the
loop in the error scenarios. Otherwise, guest IO on the drive would
become stuck.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
.../0050-PVE-backup-add-fleecing-option.patch | 22 ++++++++++---------
...ve-error-when-copy-before-write-fail.patch | 2 +-
2 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch b/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
index dbb2883..d8ad45f 100644
--- a/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
+++ b/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
@@ -63,9 +63,9 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 1 +
- pve-backup.c | 135 ++++++++++++++++++++++++++++++++-
+ pve-backup.c | 137 ++++++++++++++++++++++++++++++++-
qapi/block-core.json | 10 ++-
- 3 files changed, 142 insertions(+), 4 deletions(-)
+ 3 files changed, 144 insertions(+), 4 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 5000c084c5..70b3de4c7e 100644
@@ -80,7 +80,7 @@ index 5000c084c5..70b3de4c7e 100644
hmp_handle_error(mon, error);
diff --git a/pve-backup.c b/pve-backup.c
-index 5ebb6a3947..a747d12d3d 100644
+index 5ebb6a3947..5ea9e1f82f 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -7,9 +7,11 @@
@@ -134,7 +134,7 @@ index 5ebb6a3947..a747d12d3d 100644
/*
* Needs to happen outside of coroutine, because it takes the graph write lock.
*/
-@@ -519,9 +544,77 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -519,9 +544,79 @@ static void create_backup_jobs_bh(void *opaque) {
}
bdrv_drained_begin(di->bs);
@@ -172,6 +172,7 @@ index 5ebb6a3947..a747d12d3d 100644
+ if (!di->fleecing.cbw) {
+ error_setg(errp, "appending cbw node for fleecing failed: %s",
+ local_err ? error_get_pretty(local_err) : "unknown error");
++ bdrv_drained_end(di->bs);
+ break;
+ }
+
@@ -184,6 +185,7 @@ index 5ebb6a3947..a747d12d3d 100644
+ if (!di->fleecing.snapshot_access) {
+ error_setg(errp, "setting up snapshot access for fleecing failed: %s",
+ local_err ? error_get_pretty(local_err) : "unknown error");
++ bdrv_drained_end(di->bs);
+ break;
+ }
+ source_bs = di->fleecing.snapshot_access;
@@ -214,7 +216,7 @@ index 5ebb6a3947..a747d12d3d 100644
BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
&local_err);
-@@ -577,6 +670,14 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -577,6 +672,14 @@ static void create_backup_jobs_bh(void *opaque) {
aio_co_enter(data->ctx, data->co);
}
@@ -229,7 +231,7 @@ index 5ebb6a3947..a747d12d3d 100644
/*
* Returns a list of device infos, which needs to be freed by the caller. In
* case of an error, errp will be set, but the returned value might still be a
-@@ -584,6 +685,7 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -584,6 +687,7 @@ static void create_backup_jobs_bh(void *opaque) {
*/
static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
const char *devlist,
@@ -237,7 +239,7 @@ index 5ebb6a3947..a747d12d3d 100644
Error **errp)
{
gchar **devs = NULL;
-@@ -607,6 +709,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
+@@ -607,6 +711,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
}
PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
di->bs = bs;
@@ -269,7 +271,7 @@ index 5ebb6a3947..a747d12d3d 100644
di_list = g_list_append(di_list, di);
d++;
}
-@@ -656,6 +783,7 @@ UuidInfo coroutine_fn *qmp_backup(
+@@ -656,6 +785,7 @@ UuidInfo coroutine_fn *qmp_backup(
const char *devlist,
bool has_speed, int64_t speed,
bool has_max_workers, int64_t max_workers,
@@ -277,7 +279,7 @@ index 5ebb6a3947..a747d12d3d 100644
Error **errp)
{
assert(qemu_in_coroutine());
-@@ -684,7 +812,7 @@ UuidInfo coroutine_fn *qmp_backup(
+@@ -684,7 +814,7 @@ UuidInfo coroutine_fn *qmp_backup(
format = has_format ? format : BACKUP_FORMAT_VMA;
bdrv_graph_co_rdlock();
@@ -286,7 +288,7 @@ index 5ebb6a3947..a747d12d3d 100644
bdrv_graph_co_rdunlock();
if (local_err) {
error_propagate(errp, local_err);
-@@ -1089,5 +1217,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
+@@ -1089,5 +1219,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->query_bitmap_info = true;
ret->pbs_masterkey = true;
ret->backup_max_workers = true;
diff --git a/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch b/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
index 4522d37..7fc2b3c 100644
--- a/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
+++ b/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
@@ -96,7 +96,7 @@ index dc6cafe7fa..a27d2d7d9f 100644
#endif /* COPY_BEFORE_WRITE_H */
diff --git a/pve-backup.c b/pve-backup.c
-index a747d12d3d..4e730aa3da 100644
+index 5ea9e1f82f..aa6ddf4c52 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -374,6 +374,15 @@ static void pvebackup_complete_cb(void *opaque, int ret)
--
2.39.2
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 3+ messages in thread
* [pve-devel] [PATCH qemu 2/2] PVE backup: factor out setting up snapshot access for fleecing
2024-06-17 11:29 [pve-devel] [PATCH qemu 1/2] PVE Backup: fixup error handling for fleecing Fiona Ebner
@ 2024-06-17 11:29 ` Fiona Ebner
2024-06-25 12:57 ` [pve-devel] [PATCH qemu 1/2] PVE Backup: fixup error handling " Fiona Ebner
1 sibling, 0 replies; 3+ messages in thread
From: Fiona Ebner @ 2024-06-17 11:29 UTC (permalink / raw)
To: pve-devel
Avoids some line bloat in the create_backup_jobs_bh() function and is
in preparation for setting up the snapshot access independently of
fleecing, in particular that will be useful for providing access to
the snapshot via NBD.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
.../0050-PVE-backup-add-fleecing-option.patch | 122 +++++++++++-------
...ve-error-when-copy-before-write-fail.patch | 2 +-
2 files changed, 76 insertions(+), 48 deletions(-)
diff --git a/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch b/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
index d8ad45f..fb9f40e 100644
--- a/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
+++ b/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
@@ -63,9 +63,9 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 1 +
- pve-backup.c | 137 ++++++++++++++++++++++++++++++++-
+ pve-backup.c | 158 ++++++++++++++++++++++++++++++++-
qapi/block-core.json | 10 ++-
- 3 files changed, 144 insertions(+), 4 deletions(-)
+ 3 files changed, 165 insertions(+), 4 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 5000c084c5..70b3de4c7e 100644
@@ -80,7 +80,7 @@ index 5000c084c5..70b3de4c7e 100644
hmp_handle_error(mon, error);
diff --git a/pve-backup.c b/pve-backup.c
-index 5ebb6a3947..5ea9e1f82f 100644
+index 5ebb6a3947..e04abf574a 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -7,9 +7,11 @@
@@ -134,7 +134,70 @@ index 5ebb6a3947..5ea9e1f82f 100644
/*
* Needs to happen outside of coroutine, because it takes the graph write lock.
*/
-@@ -519,9 +544,79 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -483,6 +508,62 @@ static int coroutine_fn pvebackup_co_add_config(
+ goto out;
+ }
+
++/*
++ * Setup a snapshot-access block node for a device with associated fleecing image.
++ */
++static int setup_snapshot_access(PVEBackupDevInfo *di, Error **errp)
++{
++ Error *local_err = NULL;
++
++ if (!di->fleecing.bs) {
++ error_setg(errp, "no associated fleecing image");
++ return -1;
++ }
++
++ QDict *cbw_opts = qdict_new();
++ qdict_put_str(cbw_opts, "driver", "copy-before-write");
++ qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
++ qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
++
++ if (di->bitmap) {
++ /*
++ * Only guest writes to parts relevant for the backup need to be intercepted with
++ * old data being copied to the fleecing image.
++ */
++ qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
++ qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
++ }
++ /*
++ * Fleecing storage is supposed to be fast and it's better to break backup than guest
++ * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
++ * abort a bit before that.
++ */
++ qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
++ qdict_put_int(cbw_opts, "cbw-timeout", 45);
++
++ di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
++
++ if (!di->fleecing.cbw) {
++ error_setg(errp, "appending cbw node for fleecing failed: %s",
++ local_err ? error_get_pretty(local_err) : "unknown error");
++ return -1;
++ }
++
++ QDict *snapshot_access_opts = qdict_new();
++ qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
++ qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
++
++ di->fleecing.snapshot_access =
++ bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
++ if (!di->fleecing.snapshot_access) {
++ error_setg(errp, "setting up snapshot access for fleecing failed: %s",
++ local_err ? error_get_pretty(local_err) : "unknown error");
++ return -1;
++ }
++
++ return 0;
++}
++
+ /*
+ * backup_job_create can *not* be run from a coroutine, so this can't either.
+ * The caller is responsible that backup_mutex is held nonetheless.
+@@ -519,9 +600,44 @@ static void create_backup_jobs_bh(void *opaque) {
}
bdrv_drained_begin(di->bs);
@@ -146,48 +209,13 @@ index 5ebb6a3947..5ea9e1f82f 100644
+ const char *job_id = bdrv_get_device_name(di->bs);
+ bdrv_graph_co_rdunlock();
+ if (di->fleecing.bs) {
-+ QDict *cbw_opts = qdict_new();
-+ qdict_put_str(cbw_opts, "driver", "copy-before-write");
-+ qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
-+ qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
-+
-+ if (di->bitmap) {
-+ /*
-+ * Only guest writes to parts relevant for the backup need to be intercepted with
-+ * old data being copied to the fleecing image.
-+ */
-+ qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
-+ qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
-+ }
-+ /*
-+ * Fleecing storage is supposed to be fast and it's better to break backup than guest
-+ * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
-+ * abort a bit before that.
-+ */
-+ qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
-+ qdict_put_int(cbw_opts, "cbw-timeout", 45);
-+
-+ di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
-+
-+ if (!di->fleecing.cbw) {
-+ error_setg(errp, "appending cbw node for fleecing failed: %s",
-+ local_err ? error_get_pretty(local_err) : "unknown error");
-+ bdrv_drained_end(di->bs);
-+ break;
-+ }
-+
-+ QDict *snapshot_access_opts = qdict_new();
-+ qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
-+ qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
-+
-+ di->fleecing.snapshot_access =
-+ bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
-+ if (!di->fleecing.snapshot_access) {
++ if (setup_snapshot_access(di, &local_err) < 0) {
+ error_setg(errp, "setting up snapshot access for fleecing failed: %s",
+ local_err ? error_get_pretty(local_err) : "unknown error");
+ bdrv_drained_end(di->bs);
+ break;
+ }
++
+ source_bs = di->fleecing.snapshot_access;
+ discard_source = true;
+
@@ -216,7 +244,7 @@ index 5ebb6a3947..5ea9e1f82f 100644
BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
&local_err);
-@@ -577,6 +672,14 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -577,6 +693,14 @@ static void create_backup_jobs_bh(void *opaque) {
aio_co_enter(data->ctx, data->co);
}
@@ -231,7 +259,7 @@ index 5ebb6a3947..5ea9e1f82f 100644
/*
* Returns a list of device infos, which needs to be freed by the caller. In
* case of an error, errp will be set, but the returned value might still be a
-@@ -584,6 +687,7 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -584,6 +708,7 @@ static void create_backup_jobs_bh(void *opaque) {
*/
static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
const char *devlist,
@@ -239,7 +267,7 @@ index 5ebb6a3947..5ea9e1f82f 100644
Error **errp)
{
gchar **devs = NULL;
-@@ -607,6 +711,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
+@@ -607,6 +732,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
}
PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
di->bs = bs;
@@ -271,7 +299,7 @@ index 5ebb6a3947..5ea9e1f82f 100644
di_list = g_list_append(di_list, di);
d++;
}
-@@ -656,6 +785,7 @@ UuidInfo coroutine_fn *qmp_backup(
+@@ -656,6 +806,7 @@ UuidInfo coroutine_fn *qmp_backup(
const char *devlist,
bool has_speed, int64_t speed,
bool has_max_workers, int64_t max_workers,
@@ -279,7 +307,7 @@ index 5ebb6a3947..5ea9e1f82f 100644
Error **errp)
{
assert(qemu_in_coroutine());
-@@ -684,7 +814,7 @@ UuidInfo coroutine_fn *qmp_backup(
+@@ -684,7 +835,7 @@ UuidInfo coroutine_fn *qmp_backup(
format = has_format ? format : BACKUP_FORMAT_VMA;
bdrv_graph_co_rdlock();
@@ -288,7 +316,7 @@ index 5ebb6a3947..5ea9e1f82f 100644
bdrv_graph_co_rdunlock();
if (local_err) {
error_propagate(errp, local_err);
-@@ -1089,5 +1219,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
+@@ -1089,5 +1240,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->query_bitmap_info = true;
ret->pbs_masterkey = true;
ret->backup_max_workers = true;
diff --git a/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch b/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
index 7fc2b3c..97de053 100644
--- a/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
+++ b/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
@@ -96,7 +96,7 @@ index dc6cafe7fa..a27d2d7d9f 100644
#endif /* COPY_BEFORE_WRITE_H */
diff --git a/pve-backup.c b/pve-backup.c
-index 5ea9e1f82f..aa6ddf4c52 100644
+index e04abf574a..6a8cfa18c1 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -374,6 +374,15 @@ static void pvebackup_complete_cb(void *opaque, int ret)
--
2.39.2
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [pve-devel] [PATCH qemu 1/2] PVE Backup: fixup error handling for fleecing
2024-06-17 11:29 [pve-devel] [PATCH qemu 1/2] PVE Backup: fixup error handling for fleecing Fiona Ebner
2024-06-17 11:29 ` [pve-devel] [PATCH qemu 2/2] PVE backup: factor out setting up snapshot access " Fiona Ebner
@ 2024-06-25 12:57 ` Fiona Ebner
1 sibling, 0 replies; 3+ messages in thread
From: Fiona Ebner @ 2024-06-25 12:57 UTC (permalink / raw)
To: pve-devel
Am 17.06.24 um 13:29 schrieb Fiona Ebner:
> The drained section needs to be terminated before breaking out of the
> loop in the error scenarios. Otherwise, guest IO on the drive would
> become stuck.
>
This is incomplete. I'll send a v2.
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-06-25 12:57 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-17 11:29 [pve-devel] [PATCH qemu 1/2] PVE Backup: fixup error handling for fleecing Fiona Ebner
2024-06-17 11:29 ` [pve-devel] [PATCH qemu 2/2] PVE backup: factor out setting up snapshot access " Fiona Ebner
2024-06-25 12:57 ` [pve-devel] [PATCH qemu 1/2] PVE Backup: fixup error handling " Fiona Ebner
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal