From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id AC5441FF13C for ; Thu, 30 Apr 2026 17:06:47 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 8E9DDE288; Thu, 30 Apr 2026 17:06:47 +0200 (CEST) From: Robert Obkircher To: pbs-devel@lists.proxmox.com Subject: [PATCH proxmox-backup 06/10] chunk_store: add method to limit file system usage Date: Thu, 30 Apr 2026 17:05:47 +0200 Message-ID: <20260430150607.330413-10-r.obkircher@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260430150607.330413-1-r.obkircher@proxmox.com> References: <20260430150607.330413-1-r.obkircher@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1777561503546 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.060 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: CHROGAQOOOOZ4FGPJPEHRC7Z2SP6VMNG X-Message-ID-Hash: CHROGAQOOOOZ4FGPJPEHRC7Z2SP6VMNG X-MailFrom: r.obkircher@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Backup Server development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Provide a way to check whether enough space is available to write new backup data to a local chunk store. This is especially important on copy-on-write file systems where GC and prune jobs need additional space for metadata updates. The check is not completely safe because multiple threads/processes could perform it at the same time, but with a big enough reservation it should be good enough in practice. Caching is used to avoid unnecessary syscalls, but this is likely only beneficial on NFS. Signed-off-by: Robert Obkircher --- pbs-datastore/src/chunk_store.rs | 12 ++++ pbs-datastore/src/file_system_limit.rs | 87 ++++++++++++++++++++++++++ pbs-datastore/src/lib.rs | 2 + 3 files changed, 101 insertions(+) create mode 100644 pbs-datastore/src/file_system_limit.rs diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs index 68db88eab..a02f437c1 100644 --- a/pbs-datastore/src/chunk_store.rs +++ b/pbs-datastore/src/chunk_store.rs @@ -23,6 +23,7 @@ use crate::data_blob::DataChunkBuilder; use crate::file_formats::{ COMPRESSED_BLOB_MAGIC_1_0, ENCRYPTED_BLOB_MAGIC_1_0, UNCOMPRESSED_BLOB_MAGIC_1_0, }; +use crate::file_system_limit::FileSystemLimit; use crate::{DataBlob, LocalDatastoreLruCache}; const USING_MARKER_FILENAME_EXT: &str = "using"; @@ -35,6 +36,7 @@ pub struct ChunkStore { mutex: Mutex<()>, locker: Option>>, sync_level: DatastoreFSyncLevel, + fs_limit: FileSystemLimit, } // TODO: what about sysctl setting vm.vfs_cache_pressure (0 - 100) ? @@ -82,6 +84,7 @@ impl ChunkStore { mutex: Mutex::new(()), locker: None, sync_level: Default::default(), + fs_limit: FileSystemLimit::new(None), } } @@ -206,6 +209,7 @@ impl ChunkStore { locker: Some(locker), mutex: Mutex::new(()), sync_level, + fs_limit: FileSystemLimit::new(None), }) } @@ -966,6 +970,10 @@ impl ChunkStore { } (chunk_path, counter) } + + pub(crate) fn check_space(&self, size: u64) -> Result<(), Error> { + self.fs_limit.check_available(&self.base, size) + } } #[derive(PartialEq)] @@ -1001,6 +1009,10 @@ fn test_chunk_store1() { .build() .unwrap(); + chunk_store + .check_space(chunk.raw_size()) + .expect("enough space"); + let (exists, _) = chunk_store.insert_chunk(&chunk, &digest).unwrap(); assert!(!exists); diff --git a/pbs-datastore/src/file_system_limit.rs b/pbs-datastore/src/file_system_limit.rs new file mode 100644 index 000000000..fab62d046 --- /dev/null +++ b/pbs-datastore/src/file_system_limit.rs @@ -0,0 +1,87 @@ +use std::path::Path; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{Duration, Instant}; + +use anyhow::{bail, format_err, Error}; + +/// Cached file system space availability check. +/// +/// Supports reserving a safety buffer because multiple threads +/// and processes may pass the check at the same time before they +/// write. +pub struct FileSystemLimit { + reserved: AtomicU64, + base: Instant, + elapsed_nanos: AtomicU64, + available: AtomicU64, +} + +/// Encode `None` as `MAX` because nobody has enough storage to +/// notice the difference. +fn encode_reserved(bytes: Option) -> u64 { + bytes.map_or(u64::MAX, |b| b.min(u64::MAX - 1)) +} + +impl FileSystemLimit { + /// Specify the amount of reserved space for checks, or disable them with `None`. + pub fn new(reserved_space: Option) -> Self { + Self { + reserved: AtomicU64::new(encode_reserved(reserved_space)), + base: Instant::now(), + elapsed_nanos: AtomicU64::new(0), + available: AtomicU64::new(0), + } + } + + /// Specify the amount of reserved space for checks, or disable them with `None`. + pub fn set_reserved_space(&self, bytes: Option) { + self.reserved + .store(encode_reserved(bytes), Ordering::Release); + } + + /// Check if there is probably enough space to write `size` bytes. + /// + /// Repeated calls must specify paths to the same file system. + pub fn check_available(&self, path: &Path, size: u64) -> Result<(), Error> { + let reserved = self.reserved.load(Ordering::Acquire); + if reserved == u64::MAX { + return Ok(()); // disabled + } + let required = reserved.saturating_add(size); + + let since_base = self.base.elapsed().as_nanos() as u64; + let last_update = self.elapsed_nanos.load(Ordering::Acquire); + let since_update = since_base.saturating_sub(last_update); + + // Limit max age in case of unexpected changes like a manual resize of the file system. + if last_update != 0 && since_update as u128 <= Duration::from_secs(1).as_nanos() { + // Assume at most 100 GB/s (1 GB/s = 1 B/ns) + let max_written = 100 * since_update; + + let available = self.available.load(Ordering::Acquire); + if required.saturating_add(max_written) <= available { + log::trace!( "file_system_limit: cached, path={path:?}, available={available}, requested={size}, reserved={reserved}"); + return Ok(()); + } + } + + // Repeated calls on a local file system take less than 2 microseconds, + // so it should be fine if multiple threads get here at the same time + // and race on the stores below. + let info = proxmox_sys::fs::fs_info(path) + .map_err(|e| format_err!("failed to read file system info for {path:?} - {e}"))?; + + let available = info.available; + self.available.store(available, Ordering::Release); + self.elapsed_nanos.store(since_base, Ordering::Release); + + log::trace!( "file_system_limit: uncached, path={path:?}, available={available}, requested={size}, reserved={reserved}"); + if required > available { + // The UI also shows this instead of `info.total` + let total = info.used + info.available; + + bail!("Not enough space: path={path:?}, available={available}/{total}, requested={size}, reserved={reserved}"); + } + Ok(()) + } +} diff --git a/pbs-datastore/src/lib.rs b/pbs-datastore/src/lib.rs index 6647ee2b6..6a0c58a91 100644 --- a/pbs-datastore/src/lib.rs +++ b/pbs-datastore/src/lib.rs @@ -222,6 +222,8 @@ pub use datastore::{ S3_CLIENT_REQUEST_COUNTER_BASE_PATH, S3_DATASTORE_IN_USE_MARKER, }; +mod file_system_limit; + mod hierarchy; pub use hierarchy::{ ListGroups, ListGroupsType, ListNamespaces, ListNamespacesRecursive, ListSnapshots, -- 2.47.3