* [pve-devel] [PATCH qemu] fix #4101: acquire job's aio context before calling job_unref
@ 2022-06-09 11:55 Fabian Ebner
2022-06-09 11:57 ` Fabian Ebner
2022-06-09 12:11 ` Wolfgang Bumiller
0 siblings, 2 replies; 3+ messages in thread
From: Fabian Ebner @ 2022-06-09 11:55 UTC (permalink / raw)
To: pve-devel
Otherwise, we might not run into an abort via bdrv_co_yield_to_drain()
(can at least happen when a disk with iothread is used):
> #0 0x00007fef4f5dece1 __GI_raise (libc.so.6 + 0x3bce1)
> #1 0x00007fef4f5c8537 __GI_abort (libc.so.6 + 0x25537)
> #2 0x00005641bce3c71f error_exit (qemu-system-x86_64 + 0x80371f)
> #3 0x00005641bce3d02b qemu_mutex_unlock_impl (qemu-system-x86_64 + 0x80402b)
> #4 0x00005641bcd51655 bdrv_co_yield_to_drain (qemu-system-x86_64 + 0x718655)
> #5 0x00005641bcd52de8 bdrv_do_drained_begin (qemu-system-x86_64 + 0x719de8)
> #6 0x00005641bcd47e07 blk_drain (qemu-system-x86_64 + 0x70ee07)
> #7 0x00005641bcd498cd blk_unref (qemu-system-x86_64 + 0x7108cd)
> #8 0x00005641bcd31e6f block_job_free (qemu-system-x86_64 + 0x6f8e6f)
> #9 0x00005641bcd32d65 job_unref (qemu-system-x86_64 + 0x6f9d65)
> #10 0x00005641bcd93b3d pvebackup_co_complete_stream (qemu-system-x86_64 + 0x75ab3d)
> #11 0x00005641bce4e353 coroutine_trampoline (qemu-system-x86_64 + 0x815353)
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
...ensure-jobs-in-di_list-are-referenced.patch | 18 ++++++++++++------
...id-segfault-issues-upon-backup-cancel.patch | 6 +++---
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/debian/patches/pve/0055-PVE-Backup-ensure-jobs-in-di_list-are-referenced.patch b/debian/patches/pve/0055-PVE-Backup-ensure-jobs-in-di_list-are-referenced.patch
index db86632..ebb7919 100644
--- a/debian/patches/pve/0055-PVE-Backup-ensure-jobs-in-di_list-are-referenced.patch
+++ b/debian/patches/pve/0055-PVE-Backup-ensure-jobs-in-di_list-are-referenced.patch
@@ -17,26 +17,29 @@ freed. With unlucky timings it seems possible that:
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
- pve-backup.c | 13 +++++++++++++
- 1 file changed, 13 insertions(+)
+ pve-backup.c | 19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
diff --git a/pve-backup.c b/pve-backup.c
-index 5bed6f4014..cd45e66a61 100644
+index 5bed6f4014..7b094e5018 100644
--- a/pve-backup.c
+++ b/pve-backup.c
-@@ -316,6 +316,11 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
+@@ -316,6 +316,14 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
}
}
+ if (di->job) {
++ AioContext *ctx = di->job->job.aio_context;
++ aio_context_acquire(ctx);
+ job_unref(&di->job->job);
++ aio_context_release(ctx);
+ di->job = NULL;
+ }
+
// remove self from job list
backup_state.di_list = g_list_remove(backup_state.di_list, di);
-@@ -494,6 +499,9 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -494,6 +502,9 @@ static void create_backup_jobs_bh(void *opaque) {
aio_context_release(aio_context);
di->job = job;
@@ -46,13 +49,16 @@ index 5bed6f4014..cd45e66a61 100644
if (!job || local_err) {
error_setg(errp, "backup_job_create failed: %s",
-@@ -528,6 +536,11 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -528,6 +539,14 @@ static void create_backup_jobs_bh(void *opaque) {
aio_context_release(ctx);
canceled = true;
}
+
+ if (di->job) {
++ AioContext *ctx = di->job->job.aio_context;
++ aio_context_acquire(ctx);
+ job_unref(&di->job->job);
++ aio_context_release(ctx);
+ di->job = NULL;
+ }
}
diff --git a/debian/patches/pve/0056-PVE-Backup-avoid-segfault-issues-upon-backup-cancel.patch b/debian/patches/pve/0056-PVE-Backup-avoid-segfault-issues-upon-backup-cancel.patch
index e0c91e6..a5de91d 100644
--- a/debian/patches/pve/0056-PVE-Backup-avoid-segfault-issues-upon-backup-cancel.patch
+++ b/debian/patches/pve/0056-PVE-Backup-avoid-segfault-issues-upon-backup-cancel.patch
@@ -37,10 +37,10 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
1 file changed, 39 insertions(+), 22 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
-index cd45e66a61..be21027dad 100644
+index 7b094e5018..26f9aad3d8 100644
--- a/pve-backup.c
+++ b/pve-backup.c
-@@ -352,15 +352,42 @@ static void pvebackup_complete_cb(void *opaque, int ret)
+@@ -355,15 +355,42 @@ static void pvebackup_complete_cb(void *opaque, int ret)
/*
* job_cancel(_sync) does not like to be called from coroutines, so defer to
@@ -89,7 +89,7 @@ index cd45e66a61..be21027dad 100644
aio_co_enter(data->ctx, data->co);
}
-@@ -381,22 +408,12 @@ void coroutine_fn qmp_backup_cancel(Error **errp)
+@@ -384,22 +411,12 @@ void coroutine_fn qmp_backup_cancel(Error **errp)
proxmox_backup_abort(backup_state.pbs, "backup canceled");
}
--
2.30.2
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [pve-devel] [PATCH qemu] fix #4101: acquire job's aio context before calling job_unref
2022-06-09 11:55 [pve-devel] [PATCH qemu] fix #4101: acquire job's aio context before calling job_unref Fabian Ebner
@ 2022-06-09 11:57 ` Fabian Ebner
2022-06-09 12:11 ` Wolfgang Bumiller
1 sibling, 0 replies; 3+ messages in thread
From: Fabian Ebner @ 2022-06-09 11:57 UTC (permalink / raw)
To: pve-devel
Am 09.06.22 um 13:55 schrieb Fabian Ebner:
> Otherwise, we might not run into an abort via bdrv_co_yield_to_drain()
Sorry, the "not" should not be here.
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [pve-devel] [PATCH qemu] fix #4101: acquire job's aio context before calling job_unref
2022-06-09 11:55 [pve-devel] [PATCH qemu] fix #4101: acquire job's aio context before calling job_unref Fabian Ebner
2022-06-09 11:57 ` Fabian Ebner
@ 2022-06-09 12:11 ` Wolfgang Bumiller
1 sibling, 0 replies; 3+ messages in thread
From: Wolfgang Bumiller @ 2022-06-09 12:11 UTC (permalink / raw)
To: Fabian Ebner; +Cc: pve-devel
minor nit
but otherwise LGTM
On Thu, Jun 09, 2022 at 01:55:38PM +0200, Fabian Ebner wrote:
> Otherwise, we might not run into an abort via bdrv_co_yield_to_drain()
> (can at least happen when a disk with iothread is used):
> > #0 0x00007fef4f5dece1 __GI_raise (libc.so.6 + 0x3bce1)
> > #1 0x00007fef4f5c8537 __GI_abort (libc.so.6 + 0x25537)
> > #2 0x00005641bce3c71f error_exit (qemu-system-x86_64 + 0x80371f)
> > #3 0x00005641bce3d02b qemu_mutex_unlock_impl (qemu-system-x86_64 + 0x80402b)
> > #4 0x00005641bcd51655 bdrv_co_yield_to_drain (qemu-system-x86_64 + 0x718655)
> > #5 0x00005641bcd52de8 bdrv_do_drained_begin (qemu-system-x86_64 + 0x719de8)
> > #6 0x00005641bcd47e07 blk_drain (qemu-system-x86_64 + 0x70ee07)
> > #7 0x00005641bcd498cd blk_unref (qemu-system-x86_64 + 0x7108cd)
> > #8 0x00005641bcd31e6f block_job_free (qemu-system-x86_64 + 0x6f8e6f)
> > #9 0x00005641bcd32d65 job_unref (qemu-system-x86_64 + 0x6f9d65)
> > #10 0x00005641bcd93b3d pvebackup_co_complete_stream (qemu-system-x86_64 + 0x75ab3d)
> > #11 0x00005641bce4e353 coroutine_trampoline (qemu-system-x86_64 + 0x815353)
>
> Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
> ---
> ...ensure-jobs-in-di_list-are-referenced.patch | 18 ++++++++++++------
> ...id-segfault-issues-upon-backup-cancel.patch | 6 +++---
> 2 files changed, 15 insertions(+), 9 deletions(-)
>
> diff --git a/debian/patches/pve/0055-PVE-Backup-ensure-jobs-in-di_list-are-referenced.patch b/debian/patches/pve/0055-PVE-Backup-ensure-jobs-in-di_list-are-referenced.patch
> index db86632..ebb7919 100644
> --- a/debian/patches/pve/0055-PVE-Backup-ensure-jobs-in-di_list-are-referenced.patch
> +++ b/debian/patches/pve/0055-PVE-Backup-ensure-jobs-in-di_list-are-referenced.patch
> @@ -17,26 +17,29 @@ freed. With unlucky timings it seems possible that:
> Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
> Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
> ---
> - pve-backup.c | 13 +++++++++++++
> - 1 file changed, 13 insertions(+)
> + pve-backup.c | 19 +++++++++++++++++++
> + 1 file changed, 19 insertions(+)
>
> diff --git a/pve-backup.c b/pve-backup.c
> -index 5bed6f4014..cd45e66a61 100644
> +index 5bed6f4014..7b094e5018 100644
> --- a/pve-backup.c
> +++ b/pve-backup.c
> -@@ -316,6 +316,11 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
> +@@ -316,6 +316,14 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
> }
> }
>
> + if (di->job) {
> ++ AioContext *ctx = di->job->job.aio_context;
> ++ aio_context_acquire(ctx);
> + job_unref(&di->job->job);
> ++ aio_context_release(ctx);
> + di->job = NULL;
(I think it might be nicer to have this assignment before the release
call)
> + }
> +
> // remove self from job list
> backup_state.di_list = g_list_remove(backup_state.di_list, di);
>
> -@@ -494,6 +499,9 @@ static void create_backup_jobs_bh(void *opaque) {
> +@@ -494,6 +502,9 @@ static void create_backup_jobs_bh(void *opaque) {
> aio_context_release(aio_context);
>
> di->job = job;
> @@ -46,13 +49,16 @@ index 5bed6f4014..cd45e66a61 100644
>
> if (!job || local_err) {
> error_setg(errp, "backup_job_create failed: %s",
> -@@ -528,6 +536,11 @@ static void create_backup_jobs_bh(void *opaque) {
> +@@ -528,6 +539,14 @@ static void create_backup_jobs_bh(void *opaque) {
> aio_context_release(ctx);
> canceled = true;
> }
> +
> + if (di->job) {
> ++ AioContext *ctx = di->job->job.aio_context;
> ++ aio_context_acquire(ctx);
Since now both the above concelation and this unref acquire the aio
context, we could just move the cancellation down into this `if(job)` as
if (!canceled) {
job_cancel_sync(...);
canceled = true;
}
to be a bit more concise
> + job_unref(&di->job->job);
> ++ aio_context_release(ctx);
> + di->job = NULL;
^ (and also assign before release)
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2022-06-09 12:11 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-09 11:55 [pve-devel] [PATCH qemu] fix #4101: acquire job's aio context before calling job_unref Fabian Ebner
2022-06-09 11:57 ` Fabian Ebner
2022-06-09 12:11 ` Wolfgang Bumiller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox