From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id AA4F71FF13A for ; Wed, 15 Apr 2026 10:33:55 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 5C14383E4; Wed, 15 Apr 2026 10:33:55 +0200 (CEST) Date: Wed, 15 Apr 2026 10:33:46 +0200 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= Subject: Re: [PATCH proxmox-backup 1/2] fix #6373: HTTP level reader heartbeat for proxy connection keepalive To: Christian Ebner , pbs-devel@lists.proxmox.com References: <20260129122700.448448-1-c.ebner@proxmox.com> <20260129122700.448448-3-c.ebner@proxmox.com> In-Reply-To: <20260129122700.448448-3-c.ebner@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.17.0 (https://github.com/astroidmail/astroid) Message-Id: <1776240724.q4ii7heihr.astroid@yuna.none> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1776241951976 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.054 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: XH7QMTQBXBU4VUPKY3NSJY3VUI67UZOO X-Message-ID-Hash: XH7QMTQBXBU4VUPKY3NSJY3VUI67UZOO X-MailFrom: f.gruenbichler@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Backup Server development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: On January 29, 2026 1:26 pm, Christian Ebner wrote: > Backup readers can have long periods of idle connections, e.g. if a > backup snapshot has been mounted and all relevant chunks are locally > cached or a backup session with previous metadata archive not needing > to fetch new contents while the backup is ongoing. >=20 > Proxies like e.g. HAProxy might however close idle connections for > better resource handling [0,1], even multiplexed HTTP/2 connections as > are being used for the Proxmox Backup Sever backup/reader protocol. >=20 > This mainly affects the backup reader, while the backup writer will > do indexing and chunk uploads anyways. but if the storage is slow, there might not be chunk traffic for a few seconds as well? > Therefore, perform heartbeat traffic in the backup reader http2 client > when no other requests are being send. To do so, an new `heartbeat` > API endpoint is introduced as part of the backup reader protocol, > returning empty on GET requests. By making this part of the backup > reader protocol, this is limited to active reader HTTP/2 sessions. >=20 > On the client side the heartbeat is send out periodically whenever a > timeout is being reached, the timeout however being reset if other > requests are being performed via the http2 client. >=20 > Since older servers do not provide the new API path, ignore errors > as the response is not strictly necessary for the connection to > remain established. >=20 > The timeout is currently only being performed if the timeout value > in seconds is given via the PBS_READER_HEARTBEAT_TIMEOUT. s/timeout/heartbeat/ ? >=20 > Testing was performed using HAProxy with the Proxmox Backup Server > as backend using the following 5 second connection idle timeouts > as configuration parameters in haproxy.cfg: > ``` > ... > defaults > ... > timeout connect 5000 > timeout client 5000 > timeout server 5000 > .. >=20 > frontend http-in > bind *:8007 > mode tcp > default_backend pbs >=20 > backend pbs > mode tcp > http-reuse always > server pbs :8007 verify none > ``` >=20 > As command invocation: > ``` > PBS_READER_HEARTBEAT_TIMEOUT=3D1 proxmox-backup-client mount <= archive> \ > --repository @: --verbo= se > ``` >=20 > [0] https://www.haproxy.com/documentation/haproxy-configuration-manual/la= test/#4-timeout%20client > [1] https://www.haproxy.com/documentation/haproxy-configuration-manual/la= test/#4.2-timeout%20http-keep-alive >=20 > Fixes: https://bugzilla.proxmox.com/show_bug.cgi?id=3D6373 > Signed-off-by: Christian Ebner > --- > pbs-client/src/backup_reader.rs | 69 ++++++++++++++++++++++++++++++--- > src/api2/reader/mod.rs | 9 ++++- > 2 files changed, 72 insertions(+), 6 deletions(-) >=20 > diff --git a/pbs-client/src/backup_reader.rs b/pbs-client/src/backup_read= er.rs > index 88cba599b..95236bef2 100644 > --- a/pbs-client/src/backup_reader.rs > +++ b/pbs-client/src/backup_reader.rs > @@ -1,10 +1,13 @@ > use anyhow::{format_err, Error}; > use std::fs::File; > use std::io::{Seek, SeekFrom, Write}; > +use std::str::FromStr; > use std::sync::Arc; > +use std::time::Duration; > =20 > use futures::future::AbortHandle; > use serde_json::{json, Value}; > +use tokio::sync::mpsc; > =20 > use pbs_api_types::{BackupArchiveName, BackupDir, BackupNamespace, MANIF= EST_BLOB_NAME}; > use pbs_datastore::data_blob::DataBlob; > @@ -18,26 +21,65 @@ use pbs_tools::sha::sha256; > =20 > use super::{H2Client, HttpClient}; > =20 > +enum HeartBeatMsg { > + Reset, > + Abort, > +} > + > /// Backup Reader > pub struct BackupReader { > h2: H2Client, > abort: AbortHandle, > crypt_config: Option>, > + heartbeat: Option>, > } > =20 > impl Drop for BackupReader { > fn drop(&mut self) { > + self.send_msg_heartbeat(HeartBeatMsg::Abort); > self.abort.abort(); > } > } > =20 > impl BackupReader { > fn new(h2: H2Client, abort: AbortHandle, crypt_config: Option>) -> Arc { > - Arc::new(Self { > - h2, > - abort, > - crypt_config, > - }) > + let timeout =3D match std::env::var("PBS_READER_HEARTBEAT_TIMEOU= T") { > + Ok(val) =3D> u64::from_str(&val).map(Duration::from_secs).ok= (), if the var is set but contains a bogus value, shouldn't that be an error? > + Err(_err) =3D> None, > + }; > + > + if let Some(timeout) =3D timeout { > + let (send, mut recv) =3D mpsc::channel(1); > + let backup_reader =3D Arc::new(Self { > + h2, > + abort, > + crypt_config, > + heartbeat: Some(send), > + }); > + let reader_cloned =3D Arc::clone(&backup_reader); > + > + tokio::spawn(async move { > + loop { > + match tokio::time::timeout(timeout, recv.recv()).awa= it { > + Ok(Some(HeartBeatMsg::Reset)) =3D> (), > + Ok(Some(HeartBeatMsg::Abort)) | Ok(None) =3D> br= eak, > + Err(_elapsed) =3D> { > + // connection idle timeout reached, send hea= tbeat > + let _ =3D reader_cloned.h2.get("heartbeat", = None).await; > + } > + } > + } > + }); > + > + backup_reader > + } else { > + Arc::new(Self { > + h2, > + abort, > + crypt_config, > + heartbeat: None, > + }) > + } > } > =20 > /// Create a new instance by upgrading the connection at '/api2/json= /reader' > @@ -79,21 +121,25 @@ impl BackupReader { > =20 > /// Execute a GET request > pub async fn get(&self, path: &str, param: Option) -> Result<= Value, Error> { > + self.send_msg_heartbeat(HeartBeatMsg::Reset); > self.h2.get(path, param).await > } > =20 > /// Execute a PUT request > pub async fn put(&self, path: &str, param: Option) -> Result<= Value, Error> { > + self.send_msg_heartbeat(HeartBeatMsg::Reset); > self.h2.put(path, param).await > } > =20 > /// Execute a POST request > pub async fn post(&self, path: &str, param: Option) -> Result= { > + self.send_msg_heartbeat(HeartBeatMsg::Reset); > self.h2.post(path, param).await > } > =20 > /// Execute a GET request and send output to a writer > pub async fn download(&self, file_name: &str, outpu= t: W) -> Result<(), Error> { > + self.send_msg_heartbeat(HeartBeatMsg::Reset); > let path =3D "download"; > let param =3D json!({ "file-name": file_name }); > self.h2.download(path, Some(param), output).await > @@ -103,6 +149,7 @@ impl BackupReader { > /// > /// This writes random data, and is only useful to test download spe= ed. > pub async fn speedtest(&self, output: W) -> Result<= (), Error> { > + self.send_msg_heartbeat(HeartBeatMsg::Reset); > self.h2.download("speedtest", None, output).await > } > =20 > @@ -112,12 +159,14 @@ impl BackupReader { > digest: &[u8; 32], > output: W, > ) -> Result<(), Error> { > + self.send_msg_heartbeat(HeartBeatMsg::Reset); > let path =3D "chunk"; > let param =3D json!({ "digest": hex::encode(digest) }); > self.h2.download(path, Some(param), output).await > } > =20 > pub fn force_close(self) { > + self.send_msg_heartbeat(HeartBeatMsg::Abort); > self.abort.abort(); > } > =20 > @@ -205,4 +254,14 @@ impl BackupReader { > =20 > Ok(index) > } > + > + /// Send given message to the heartbeat closure. > + /// > + /// All errors are being ignored, since they cannot be handled anywa= ys. > + fn send_msg_heartbeat(&self, msg: HeartBeatMsg) { > + let _ =3D self > + .heartbeat > + .as_ref() > + .map(|heartbeat| heartbeat.try_send(msg)); > + } > } > diff --git a/src/api2/reader/mod.rs b/src/api2/reader/mod.rs > index f7adc366f..1c673d7a6 100644 > --- a/src/api2/reader/mod.rs > +++ b/src/api2/reader/mod.rs > @@ -18,7 +18,7 @@ use proxmox_router::{ > http_err, list_subdirs_api_method, ApiHandler, ApiMethod, ApiRespons= eFuture, Permission, > Router, RpcEnvironment, SubdirMap, > }; > -use proxmox_schema::{BooleanSchema, ObjectSchema}; > +use proxmox_schema::{api, BooleanSchema, ObjectSchema}; > use proxmox_sortable_macro::sortable; > =20 > use pbs_api_types::{ > @@ -227,6 +227,7 @@ const READER_API_SUBDIRS: SubdirMap =3D &[ > "download", > &Router::new().download(&API_METHOD_DOWNLOAD_FILE), > ), > + ("heartbeat", &Router::new().download(&API_METHOD_HEARTBEAT)), shouldn't this be `.get` > ("speedtest", &Router::new().download(&API_METHOD_SPEEDTEST)), > ]; > =20 > @@ -433,3 +434,9 @@ fn speedtest( > =20 > future::ok(response).boxed() > } > + > +#[api()] > +/// HTTP level heartbeat to avoid proxies closing long running idle back= up reader connections. > +pub async fn heartbeat(_rpcenv: &mut dyn RpcEnvironment) -> Result<(), E= rror> { and this a regular `fn`? > + Ok(()) > +} > --=20 > 2.47.3 >=20 >=20 >=20 >=20 >=20 >=20