From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id A47881FF187 for ; Mon, 25 Aug 2025 15:10:49 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 4EC3112ED7; Mon, 25 Aug 2025 15:10:53 +0200 (CEST) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Mon, 25 Aug 2025 15:10:05 +0200 Message-ID: <20250825131007.626777-6-c.ebner@proxmox.com> X-Mailer: git-send-email 2.47.2 In-Reply-To: <20250825131007.626777-1-c.ebner@proxmox.com> References: <20250825131007.626777-1-c.ebner@proxmox.com> MIME-Version: 1.0 X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1756127415183 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.043 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [PATCH proxmox v2 5/6] s3-client: add retry logic for transient client errors X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" Implements a retry logic with exponentially increasing backoff time for transient client errors. For this, clone the requests by destructuring and efficiently cloneing its body, leveraging Bytes::clone(). Retry up to 3 times, adding an exponentially increasing backoff time for each retry starting at 1 second, with the intention to reduce network congestion and remote system overload. Signed-off-by: Christian Ebner --- proxmox-s3-client/src/client.rs | 62 +++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/proxmox-s3-client/src/client.rs b/proxmox-s3-client/src/client.rs index 64d62c54..e3845111 100644 --- a/proxmox-s3-client/src/client.rs +++ b/proxmox-s3-client/src/client.rs @@ -39,6 +39,8 @@ const S3_TCP_KEEPALIVE_TIME: u32 = 120; const MAX_S3_UPLOAD_RETRY: usize = 3; // Assumed minimum upload rate of 1 KiB/s for dynamic put object request timeout calculation. const S3_MIN_ASSUMED_UPLOAD_RATE: u64 = 1024; +const MAX_S3_HTTP_REQUEST_RETRY: usize = 3; +const S3_HTTP_REQUEST_RETRY_BACKOFF_DEFAULT: Duration = Duration::from_secs(1); /// S3 object key path prefix without the context prefix as defined by the client options. /// @@ -293,23 +295,53 @@ impl S3Client { timeout: Option, ) -> Result, Error> { let request = self.prepare(request).await?; - if request.method() == Method::PUT { - if let Some(limiter) = &self.put_rate_limiter { - let sleep = { - let mut limiter = limiter.lock().unwrap(); - limiter.register_traffic(Instant::now(), 1) - }; - tokio::time::sleep(sleep).await; + + let (parts, body) = request.into_parts(); + let body_bytes = body + .bytes() + .ok_or_else(|| format_err!("cannot prepare request with streaming body"))?; + + let deadline = timeout.map(|timeout| tokio::time::Instant::now() + timeout); + + for retry in 0..MAX_S3_HTTP_REQUEST_RETRY { + let request = Request::from_parts(parts.clone(), Body::from(body_bytes.clone())); + if parts.method == Method::PUT { + if let Some(limiter) = &self.put_rate_limiter { + let sleep = { + let mut limiter = limiter.lock().unwrap(); + limiter.register_traffic(Instant::now(), 1) + }; + tokio::time::sleep(sleep).await; + } + } + + if retry > 0 { + let backoff_secs = S3_HTTP_REQUEST_RETRY_BACKOFF_DEFAULT * 3_u32.pow(retry as u32); + tokio::time::sleep(backoff_secs).await; + } + + let response = if let Some(deadline) = deadline { + tokio::time::timeout_at(deadline, self.client.request(request)).await + } else { + Ok(self.client.request(request).await) + }; + + match response { + Ok(Ok(response)) => return Ok(response), + Ok(Err(err)) => { + if retry >= MAX_S3_HTTP_REQUEST_RETRY - 1 { + return Err(err.into()); + } + } + Err(_elapsed) => { + if retry >= MAX_S3_HTTP_REQUEST_RETRY - 1 { + bail!("request timed out exceeding retries"); + } + } } } - let response = if let Some(timeout) = timeout { - tokio::time::timeout(timeout, self.client.request(request)) - .await - .context("request timeout")?? - } else { - self.client.request(request).await? - }; - Ok(response) + + bail!("failed to send request exceeding retries"); } /// Check if bucket exists and got permissions to access it. -- 2.47.2 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel