[pve-devel] [PATCH v2 qemu 2/2] PVE backup: factor out setting up snapshot access for fleecing

From: Fiona Ebner <f.ebner@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH v2 qemu 2/2] PVE backup: factor out setting up snapshot access for fleecing
Date: Tue, 25 Jun 2024 15:35:51 +0200	[thread overview]
Message-ID: <20240625133551.210636-2-f.ebner@proxmox.com> (raw)
In-Reply-To: <20240625133551.210636-1-f.ebner@proxmox.com>

Avoids some line bloat in the create_backup_jobs_bh() function and is
in preparation for setting up the snapshot access independently of
fleecing, in particular that will be useful for providing access to
the snapshot via NBD.

Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---

Changes in v2:
    * no actual change here, but the error case will now also clean up
      the block nodes, because it is on top of v2 of patch 1/2

 .../0050-PVE-backup-add-fleecing-option.patch | 126 +++++++++++-------
 ...ve-error-when-copy-before-write-fail.patch |   2 +-
 2 files changed, 78 insertions(+), 50 deletions(-)

diff --git a/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch b/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
index 0ba6235..2d2dc40 100644
--- a/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
+++ b/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
@@ -63,9 +63,9 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 ---
  block/monitor/block-hmp-cmds.c |   1 +
- pve-backup.c                   | 144 ++++++++++++++++++++++++++++++++-
- qapi/block-core.json           |  10 ++-
- 3 files changed, 151 insertions(+), 4 deletions(-)
+ pve-backup.c                   | 165 ++++++++++++++++++++++++++++++++-
+ qapi/block-core.json           |  10 +-
+ 3 files changed, 172 insertions(+), 4 deletions(-)
 
 diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
 index 5000c084c5..70b3de4c7e 100644
@@ -80,7 +80,7 @@ index 5000c084c5..70b3de4c7e 100644
  
      hmp_handle_error(mon, error);
 diff --git a/pve-backup.c b/pve-backup.c
-index 5ebb6a3947..2d3ca78eac 100644
+index 5ebb6a3947..0c1d4a347b 100644
 --- a/pve-backup.c
 +++ b/pve-backup.c
 @@ -7,9 +7,11 @@
@@ -144,7 +144,70 @@ index 5ebb6a3947..2d3ca78eac 100644
      /*
       * Needs to happen outside of coroutine, because it takes the graph write lock.
       */
-@@ -519,9 +549,80 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -483,6 +513,62 @@ static int coroutine_fn pvebackup_co_add_config(
+     goto out;
+ }
+ 
++/*
++ * Setup a snapshot-access block node for a device with associated fleecing image.
++ */
++static int setup_snapshot_access(PVEBackupDevInfo *di, Error **errp)
++{
++    Error *local_err = NULL;
++
++    if (!di->fleecing.bs) {
++        error_setg(errp, "no associated fleecing image");
++        return -1;
++    }
++
++    QDict *cbw_opts = qdict_new();
++    qdict_put_str(cbw_opts, "driver", "copy-before-write");
++    qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
++    qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
++
++    if (di->bitmap) {
++        /*
++         * Only guest writes to parts relevant for the backup need to be intercepted with
++         * old data being copied to the fleecing image.
++         */
++        qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
++        qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
++    }
++    /*
++     * Fleecing storage is supposed to be fast and it's better to break backup than guest
++     * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
++     * abort a bit before that.
++     */
++    qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
++    qdict_put_int(cbw_opts, "cbw-timeout", 45);
++
++    di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
++
++    if (!di->fleecing.cbw) {
++        error_setg(errp, "appending cbw node for fleecing failed: %s",
++                   local_err ? error_get_pretty(local_err) : "unknown error");
++        return -1;
++    }
++
++    QDict *snapshot_access_opts = qdict_new();
++    qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
++    qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
++
++    di->fleecing.snapshot_access =
++        bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
++    if (!di->fleecing.snapshot_access) {
++        error_setg(errp, "setting up snapshot access for fleecing failed: %s",
++                   local_err ? error_get_pretty(local_err) : "unknown error");
++        return -1;
++    }
++
++    return 0;
++}
++
+ /*
+  * backup_job_create can *not* be run from a coroutine, so this can't either.
+  * The caller is responsible that backup_mutex is held nonetheless.
+@@ -519,9 +605,45 @@ static void create_backup_jobs_bh(void *opaque) {
          }
          bdrv_drained_begin(di->bs);
  
@@ -156,49 +219,14 @@ index 5ebb6a3947..2d3ca78eac 100644
 +        const char *job_id = bdrv_get_device_name(di->bs);
 +        bdrv_graph_co_rdunlock();
 +        if (di->fleecing.bs) {
-+            QDict *cbw_opts = qdict_new();
-+            qdict_put_str(cbw_opts, "driver", "copy-before-write");
-+            qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
-+            qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
-+
-+            if (di->bitmap) {
-+                /*
-+                 * Only guest writes to parts relevant for the backup need to be intercepted with
-+                 * old data being copied to the fleecing image.
-+                 */
-+                qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
-+                qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
-+            }
-+            /*
-+             * Fleecing storage is supposed to be fast and it's better to break backup than guest
-+             * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
-+             * abort a bit before that.
-+             */
-+            qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
-+            qdict_put_int(cbw_opts, "cbw-timeout", 45);
-+
-+            di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
-+
-+            if (!di->fleecing.cbw) {
-+                error_setg(errp, "appending cbw node for fleecing failed: %s",
-+                           local_err ? error_get_pretty(local_err) : "unknown error");
-+                bdrv_drained_end(di->bs);
-+                break;
-+            }
-+
-+            QDict *snapshot_access_opts = qdict_new();
-+            qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
-+            qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
-+
-+            di->fleecing.snapshot_access =
-+                bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
-+            if (!di->fleecing.snapshot_access) {
++            if (setup_snapshot_access(di, &local_err) < 0) {
 +                error_setg(errp, "setting up snapshot access for fleecing failed: %s",
 +                           local_err ? error_get_pretty(local_err) : "unknown error");
 +                cleanup_snapshot_access(di);
 +                bdrv_drained_end(di->bs);
 +                break;
 +            }
++
 +            source_bs = di->fleecing.snapshot_access;
 +            discard_source = true;
 +
@@ -227,7 +255,7 @@ index 5ebb6a3947..2d3ca78eac 100644
              BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
              &local_err);
  
-@@ -535,6 +636,7 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -535,6 +657,7 @@ static void create_backup_jobs_bh(void *opaque) {
          }
  
          if (!job || local_err) {
@@ -235,7 +263,7 @@ index 5ebb6a3947..2d3ca78eac 100644
              error_setg(errp, "backup_job_create failed: %s",
                         local_err ? error_get_pretty(local_err) : "null");
              break;
-@@ -577,6 +679,14 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -577,6 +700,14 @@ static void create_backup_jobs_bh(void *opaque) {
      aio_co_enter(data->ctx, data->co);
  }
  
@@ -250,7 +278,7 @@ index 5ebb6a3947..2d3ca78eac 100644
  /*
   * Returns a list of device infos, which needs to be freed by the caller. In
   * case of an error, errp will be set, but the returned value might still be a
-@@ -584,6 +694,7 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -584,6 +715,7 @@ static void create_backup_jobs_bh(void *opaque) {
   */
  static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
      const char *devlist,
@@ -258,7 +286,7 @@ index 5ebb6a3947..2d3ca78eac 100644
      Error **errp)
  {
      gchar **devs = NULL;
-@@ -607,6 +718,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
+@@ -607,6 +739,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
              }
              PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
              di->bs = bs;
@@ -290,7 +318,7 @@ index 5ebb6a3947..2d3ca78eac 100644
              di_list = g_list_append(di_list, di);
              d++;
          }
-@@ -656,6 +792,7 @@ UuidInfo coroutine_fn *qmp_backup(
+@@ -656,6 +813,7 @@ UuidInfo coroutine_fn *qmp_backup(
      const char *devlist,
      bool has_speed, int64_t speed,
      bool has_max_workers, int64_t max_workers,
@@ -298,7 +326,7 @@ index 5ebb6a3947..2d3ca78eac 100644
      Error **errp)
  {
      assert(qemu_in_coroutine());
-@@ -684,7 +821,7 @@ UuidInfo coroutine_fn *qmp_backup(
+@@ -684,7 +842,7 @@ UuidInfo coroutine_fn *qmp_backup(
      format = has_format ? format : BACKUP_FORMAT_VMA;
  
      bdrv_graph_co_rdlock();
@@ -307,7 +335,7 @@ index 5ebb6a3947..2d3ca78eac 100644
      bdrv_graph_co_rdunlock();
      if (local_err) {
          error_propagate(errp, local_err);
-@@ -1089,5 +1226,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
+@@ -1089,5 +1247,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
      ret->query_bitmap_info = true;
      ret->pbs_masterkey = true;
      ret->backup_max_workers = true;
diff --git a/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch b/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
index 66fc9fa..a314fe7 100644
--- a/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
+++ b/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
@@ -96,7 +96,7 @@ index dc6cafe7fa..a27d2d7d9f 100644
  
  #endif /* COPY_BEFORE_WRITE_H */
 diff --git a/pve-backup.c b/pve-backup.c
-index 2d3ca78eac..c4178758b3 100644
+index 0c1d4a347b..051ebffe48 100644
 --- a/pve-backup.c
 +++ b/pve-backup.c
 @@ -374,6 +374,18 @@ static void pvebackup_complete_cb(void *opaque, int ret)
-- 
2.39.2



_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel