From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 328B31FF13D for ; Thu, 08 Jan 2026 12:25:16 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id DAFD3209AC; Thu, 8 Jan 2026 11:44:42 +0100 (CET) Message-ID: <311156c7-9bf8-4d2f-9707-a1e88edcaaf0@proxmox.com> Date: Thu, 8 Jan 2026 11:44:08 +0100 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird From: Christian Ebner To: Proxmox Backup Server development discussion , Robert Obkircher References: <20251219161850.244154-1-r.obkircher@proxmox.com> <20251219161850.244154-2-r.obkircher@proxmox.com> Content-Language: en-US, de-DE In-Reply-To: <20251219161850.244154-2-r.obkircher@proxmox.com> X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1767869010703 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.046 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH v2 proxmox-backup 1/5] fix #3847: datastore: support writing fidx files of unknown size X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="us-ascii"; Format="flowed" Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" some nits inline On 12/19/25 5:19 PM, Robert Obkircher wrote: > Use mremap and ftruncate to support growable FixedIndexWriters. Grow > exponentially from a small initial index size for efficiency. Truncate > excessive capacity after encountering a non-full block or on close. > > Signed-off-by: Robert Obkircher > --- > pbs-datastore/src/datastore.rs | 2 +- > pbs-datastore/src/fixed_index.rs | 98 ++++++++++++++++++++++++++++++-- > 2 files changed, 93 insertions(+), 7 deletions(-) > > diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs > index 9c57aaac..af712726 100644 > --- a/pbs-datastore/src/datastore.rs > +++ b/pbs-datastore/src/datastore.rs > @@ -591,7 +591,7 @@ impl DataStore { > pub fn create_fixed_writer>( > &self, > filename: P, > - size: usize, > + size: Option, > chunk_size: usize, > ) -> Result { > let index = FixedIndexWriter::create( > diff --git a/pbs-datastore/src/fixed_index.rs b/pbs-datastore/src/fixed_index.rs > index 6c3be2d4..42b97464 100644 > --- a/pbs-datastore/src/fixed_index.rs > +++ b/pbs-datastore/src/fixed_index.rs > @@ -1,6 +1,7 @@ > use std::fs::File; > use std::io::Write; > use std::io::{Seek, SeekFrom}; > +use std::os::unix::fs::FileExt; > use std::os::unix::io::AsRawFd; > use std::path::{Path, PathBuf}; > use std::ptr::NonNull; > @@ -222,6 +223,8 @@ pub struct FixedIndexWriter { > index: *mut u8, > pub uuid: [u8; 16], > pub ctime: i64, > + growable_size: bool, > + write_size_on_close: bool, > } > > // `index` is mmap()ed which cannot be thread-local so should be sendable > @@ -237,12 +240,15 @@ impl Drop for FixedIndexWriter { > } > > impl FixedIndexWriter { > + // TODO: this is deliberately small at the moment to test resizing > + const INITIAL_CHUNKS_IF_UNKNOWN: usize = 4; nit: this is actually the initial index length, so maybe INITIAL_INDEX_LENGTH or FALLBACK_DEFAULT_INDEX_LENGTH? > + > #[allow(clippy::cast_ptr_alignment)] > // Requires obtaining a shared chunk store lock beforehand > pub fn create( > store: Arc, > path: &Path, > - size: usize, > + known_size: Option, > chunk_size: usize, > ) -> Result { > let full_path = store.relative_path(path); > @@ -264,6 +270,7 @@ impl FixedIndexWriter { > } > > let ctime = proxmox_time::epoch_i64(); > + let size = known_size.unwrap_or(0); > > let uuid = Uuid::generate(); > > @@ -280,7 +287,9 @@ impl FixedIndexWriter { > > file.write_all(&buffer)?; > > - let index_length = size.div_ceil(chunk_size); > + let index_length = known_size > + .map(|s| s.div_ceil(chunk_size)) > + .unwrap_or(Self::INITIAL_CHUNKS_IF_UNKNOWN); > let index_size = index_length * 32; > nix::unistd::ftruncate(&file, (header_size + index_size) as i64)?; > > @@ -308,11 +317,69 @@ impl FixedIndexWriter { > index: data, > ctime, > uuid: *uuid.as_bytes(), > + growable_size: known_size.is_none(), > + write_size_on_close: known_size.is_none(), > }) > } > > + fn resize_index(&mut self, new_index_length: usize) -> Result<(), Error> { > + let old_index_size = self.index_length * 32; > + > + let header_size = std::mem::size_of::(); > + let new_index_size = new_index_length * 32; > + let new_file_size = (header_size + new_index_size) as i64; > + > + let index_addr = NonNull::new(self.index as *mut std::ffi::c_void).ok_or_else(|| { > + format_err!("Can't resize FixedIndexWriter index because the mmap pointer is null.") > + })?; > + > + nix::unistd::ftruncate(&self.file, new_file_size)?; > + > + let new_index = unsafe { > + nix::sys::mman::mremap( > + index_addr, > + old_index_size, > + new_index_size, > + nix::sys::mman::MRemapFlags::MREMAP_MAYMOVE, > + None, > + ) > + }? > + .as_ptr() > + .cast::(); > + > + self.index = new_index; > + self.index_length = new_index_length; > + > + Ok(()) > + } > + nit: include a docstring for this method, although not present for the pub methods we should aim to add them. > + pub fn grow_to_size(&mut self, requested: usize) -> Result<(), Error> { > + if self.size < requested { > + if !self.growable_size { > + bail!("refusing to resize from {} to {}", self.size, requested); > + } > + let len = requested.div_ceil(self.chunk_size); > + if len * self.chunk_size != requested { > + self.growable_size = false; // ensures only the last chunk can be smaller > + self.resize_index(len)?; > + } else { question: what is the reason for the 1.5 factor, why not e.g. doubling the length? Is max virtual memory a concern? > + // grow by 1.5x > + let mut new_len = self.index_length.max(2); > + while new_len < len { > + new_len += new_len / 2; > + } > + debug_assert!(new_len * self.chunk_size >= requested); > + self.resize_index(new_len)?; > + } > + self.size = requested; > + } > + Ok(()) > + } > + nit: this now is the current index length and will change when growing, so the method name should reflect that > pub fn index_length(&self) -> usize { > - self.index_length > + let len = self.size.div_ceil(self.chunk_size); nit: I think we should avoid the possible panic here, and return an error instead. Although it is clear that this should never happen under normal operations. > + assert!((self.write_size_on_close && len <= self.index_length) || len == self.index_length); > + len > } > > fn unmap(&mut self) -> Result<(), Error> { > @@ -336,15 +403,26 @@ impl FixedIndexWriter { > bail!("cannot close already closed index file."); > } > > - let index_size = self.index_length * 32; > + let used_index_length = self.index_length(); > + let index_size = used_index_length * 32; > let data = unsafe { std::slice::from_raw_parts(self.index, index_size) }; > let index_csum = openssl::sha::sha256(data); > > self.unmap()?; > > + if used_index_length < self.index_length { > + let header_size = std::mem::size_of::(); > + nix::unistd::ftruncate(&self.file, (header_size + index_size) as i64)?; > + self.index_length = used_index_length; > + } > + > let csum_offset = std::mem::offset_of!(FixedIndexHeader, index_csum); > - self.file.seek(SeekFrom::Start(csum_offset as u64))?; > - self.file.write_all(&index_csum)?; > + self.file.write_all_at(&index_csum, csum_offset as u64)?; nit: the changes above are a bit independent and might be pulled out into their own patch > + if self.write_size_on_close { > + let size_offset = std::mem::offset_of!(FixedIndexHeader, size); > + self.file > + .write_all_at(&(self.size as u64).to_le_bytes(), size_offset as u64)?; > + } > self.file.flush()?; > > if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) { > @@ -407,6 +485,14 @@ impl FixedIndexWriter { > } > > pub fn clone_data_from(&mut self, reader: &FixedIndexReader) -> Result<(), Error> { > + if self.growable_size { nit: this error might be misunderstood, as the backup is using fixed size chunking. So maybe better to reword this to e.g. "reusing the fixed index is only supported with known input size" Further, this might be > + bail!("reusing the index is only supported with a fixed size"); > + } > + > + if self.chunk_size != reader.chunk_size { > + bail!("chunk size mismatch"); > + } > + > if self.index_length != reader.index_count() { > bail!("clone_data_from failed - index sizes not equal"); > } _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel