From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id A59371FF13A for ; Wed, 27 May 2026 14:38:24 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id E30711B8FD; Wed, 27 May 2026 14:38:22 +0200 (CEST) From: Robert Obkircher To: pbs-devel@lists.proxmox.com Subject: [PATCH v1 proxmox-backup] api: do not block tokio worker threads during chunk inserts Date: Wed, 27 May 2026 14:37:51 +0200 Message-ID: <20260527123809.238964-1-r.obkircher@proxmox.com> X-Mailer: git-send-email 2.47.3 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1779885471874 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.055 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [proxmox.com] Message-ID-Hash: 2EEFJUF45NSSMNEVDJPW2YBJHVTFS3DD X-Message-ID-Hash: 2EEFJUF45NSSMNEVDJPW2YBJHVTFS3DD X-MailFrom: r.obkircher@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Backup Server development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Move synchronous operations off the worker threads to prevent blocking the I/O and timer drivers of the entire runtime. This is especially important for S3 uploads, which wait up to 3 hours for the chunk lock. Also prevent worker starvation, which could happen because S3 uploads are wrapped in proxmox_async::runtime::block_on, which prevents other futures from running in the current thread. In the backtrace from the linked forum post, two workers were waiting for chunk locks (presumably due to duplicates) while the remaining 19 were stuck because block_on called std::thread::park. Fixes: https://forum.proxmox.com/threads/183705 Signed-off-by: Robert Obkircher --- src/api2/backup/upload_chunk.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/api2/backup/upload_chunk.rs b/src/api2/backup/upload_chunk.rs index 59e4caee2..eec24add2 100644 --- a/src/api2/backup/upload_chunk.rs +++ b/src/api2/backup/upload_chunk.rs @@ -1,4 +1,5 @@ use std::pin::Pin; +use std::sync::Arc; use std::task::{Context, Poll}; use anyhow::{Error, bail, format_err}; @@ -8,6 +9,7 @@ use http_body_util::{BodyDataStream, BodyExt}; use hyper::body::Incoming; use hyper::http::request::Parts; use serde_json::{Value, json}; +use tokio::task::spawn_blocking; use proxmox_router::{ApiHandler, ApiMethod, ApiResponseFuture, RpcEnvironment}; use proxmox_schema::*; @@ -232,14 +234,18 @@ async fn upload_to_backend( let (digest, size, chunk) = UploadChunk::new(BodyDataStream::new(req_body), digest, size, encoded_size).await?; + let datastore = Arc::clone(&env.datastore); + let backend = env.backend.clone(); + if env.no_cache { let (is_duplicate, chunk_size) = - env.datastore - .insert_chunk_no_cache(&chunk, &digest, &env.backend)?; + spawn_blocking(move || datastore.insert_chunk_no_cache(&chunk, &digest, &backend)) + .await??; return Ok((digest, size, chunk_size as u32, is_duplicate)); } - let (is_duplicate, chunk_size) = env.datastore.insert_chunk(&chunk, &digest, &env.backend)?; + let (is_duplicate, chunk_size) = + spawn_blocking(move || datastore.insert_chunk(&chunk, &digest, &backend)).await??; Ok((digest, size, chunk_size as u32, is_duplicate)) } -- 2.47.3