From: Robert Obkircher <r.obkircher@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [PATCH v6 proxmox-backup 08/18] datastore: support writing fidx files of unknown size
Date: Tue, 10 Feb 2026 16:06:24 +0100 [thread overview]
Message-ID: <20260210150642.469670-9-r.obkircher@proxmox.com> (raw)
In-Reply-To: <20260210150642.469670-1-r.obkircher@proxmox.com>
Use mremap and ftruncate to support growable FixedIndexWriters. Grow
exponentially from a small initial index size and truncate excessive
capacity after encountering a non-full block or on close.
Signed-off-by: Robert Obkircher <r.obkircher@proxmox.com>
---
pbs-datastore/src/datastore.rs | 2 +-
pbs-datastore/src/fixed_index.rs | 135 ++++++++++++++++++++++++++++++-
src/api2/backup/mod.rs | 6 +-
3 files changed, 136 insertions(+), 7 deletions(-)
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index 3f9c222d..631bdc30 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -695,7 +695,7 @@ impl DataStore {
pub fn create_fixed_writer<P: AsRef<Path>>(
&self,
filename: P,
- size: u64,
+ size: Option<u64>,
chunk_size: u32,
) -> Result<FixedIndexWriter, Error> {
let full_path = self.inner.chunk_store.relative_path(filename.as_ref());
diff --git a/pbs-datastore/src/fixed_index.rs b/pbs-datastore/src/fixed_index.rs
index 953f4a20..3807ba35 100644
--- a/pbs-datastore/src/fixed_index.rs
+++ b/pbs-datastore/src/fixed_index.rs
@@ -225,9 +225,12 @@ pub struct FixedIndexWriter {
chunk_size: u64,
size: u64,
index_length: usize,
+ index_capacity: usize,
index: *mut u8,
pub uuid: [u8; 16],
pub ctime: i64,
+ growable_size: bool,
+ write_size_on_close: bool,
}
// `index` is mmap()ed which cannot be thread-local so should be sendable
@@ -243,11 +246,21 @@ impl Drop for FixedIndexWriter {
}
impl FixedIndexWriter {
+ /// The initial capacity, if the total size is unknown.
+ ///
+ /// This capacity takes up the same amount of space as the header
+ /// and can refer to 128 Blocks * 4 MiB/Block = 512 MiB of content.
+ ///
+ /// On systems with 4 KiB page size this value ensures that the
+ /// mapped length is a multiple of the page size, but this is not
+ /// strictly necessary.
+ const INITIAL_CAPACITY: usize = 4096 / 32;
+
#[allow(clippy::cast_ptr_alignment)]
// Requires obtaining a shared chunk store lock beforehand
pub fn create(
full_path: impl Into<PathBuf>,
- size: u64,
+ known_size: Option<u64>,
chunk_size: u32,
) -> Result<Self, Error> {
let full_path = full_path.into();
@@ -274,6 +287,7 @@ impl FixedIndexWriter {
}
let ctime = proxmox_time::epoch_i64();
+ let size = known_size.unwrap_or(0);
let uuid = Uuid::generate();
@@ -290,8 +304,15 @@ impl FixedIndexWriter {
file.write_all(&buffer)?;
- let index_length = size.div_ceil(chunk_size).try_into()?;
- let index_size = index_length * 32;
+ let (index_length, index_capacity) = match known_size {
+ Some(s) => {
+ let len = s.div_ceil(chunk_size).try_into()?;
+ (len, len)
+ }
+ None => (0, Self::INITIAL_CAPACITY),
+ };
+
+ let index_size = index_capacity * 32;
nix::unistd::ftruncate(&file, (header_size + index_size) as i64)?;
let data = unsafe {
@@ -315,12 +336,90 @@ impl FixedIndexWriter {
chunk_size,
size,
index_length,
+ index_capacity,
index: data,
ctime,
uuid: *uuid.as_bytes(),
+ growable_size: known_size.is_none(),
+ write_size_on_close: known_size.is_none(),
+ })
+ }
+
+ /// If this returns an error, the sizes may be out of sync,
+ /// which is especially bad if the capacity was reduced.
+ fn set_index_capacity(&mut self, new_capacity: usize) -> Result<(), Error> {
+ if new_capacity == self.index_capacity {
+ return Ok(());
+ }
+ let old_index_size = self.index_capacity * 32;
+ let new_index_size = new_capacity * 32;
+ let new_file_size = (size_of::<FixedIndexHeader>() + new_index_size) as i64;
+
+ let index_addr = NonNull::new(self.index as *mut std::ffi::c_void).ok_or_else(|| {
+ format_err!("Can't resize FixedIndexWriter index because the index pointer is null.")
+ })?;
+
+ nix::unistd::ftruncate(&self.file, new_file_size)?;
+
+ let new_index = unsafe {
+ nix::sys::mman::mremap(
+ index_addr,
+ old_index_size,
+ new_index_size,
+ nix::sys::mman::MRemapFlags::MREMAP_MAYMOVE,
+ None,
+ )
+ }?;
+
+ self.index = new_index.as_ptr().cast::<u8>();
+ self.index_capacity = new_capacity;
+ Ok(())
+ }
+
+ /// Unmapping ensures future add and close operations fail.
+ fn set_index_capacity_or_unmap(&mut self, new_capacity: usize) -> Result<(), Error> {
+ self.set_index_capacity(new_capacity).map_err(|e| {
+ let unmap_result = self.unmap();
+ let message = format!(
+ "failed to resize index capacity from {} to {new_capacity} with backing file: {:?}",
+ self.index_capacity, self.tmp_filename
+ );
+ assert!(self.index.is_null(), "{message} {unmap_result:?}");
+ e.context(message)
})
}
+ /// Increase the content size to be at least `requested_size` and
+ /// ensure there is enough capacity.
+ ///
+ /// Only writers that were created without a known size can grow.
+ /// The size also becomes fixed as soon as it is no longer divisible
+ /// by the block size, to ensure that only the last block can be
+ /// smaller.
+ pub fn grow_to_size(&mut self, requested_size: u64) -> Result<(), Error> {
+ if self.size < requested_size {
+ if !self.growable_size {
+ bail!("refusing to resize from {} to {requested_size}", self.size);
+ }
+ let new_len = requested_size.div_ceil(self.chunk_size).try_into()?;
+ if new_len as u64 * self.chunk_size != requested_size {
+ // not a full chunk, so this must be the last one
+ self.growable_size = false;
+ self.set_index_capacity_or_unmap(new_len)?;
+ } else if new_len > self.index_capacity {
+ let new_capacity = new_len
+ .checked_next_power_of_two()
+ .ok_or_else(|| format_err!("capacity overflow"))?;
+ self.set_index_capacity_or_unmap(new_capacity)?;
+ }
+ assert!(new_len <= self.index_capacity);
+ self.index_length = new_len;
+ self.size = requested_size;
+ }
+ Ok(())
+ }
+
+ /// The current length of the index.
pub fn index_length(&self) -> usize {
self.index_length
}
@@ -330,7 +429,7 @@ impl FixedIndexWriter {
return Ok(());
};
- let index_size = self.index_length * 32;
+ let index_size = self.index_capacity * 32;
if let Err(err) = unsafe { nix::sys::mman::munmap(index, index_size) } {
bail!("unmap file {:?} failed - {}", self.tmp_filename, err);
@@ -352,9 +451,24 @@ impl FixedIndexWriter {
self.unmap()?;
+ if self.index_length == 0 {
+ bail!("refusing to close empty fidx file {:?}", self.tmp_filename);
+ } else if self.index_length < self.index_capacity {
+ let file_size = size_of::<FixedIndexHeader>() + index_size;
+ nix::unistd::ftruncate(&self.file, file_size as i64)?;
+ self.index_capacity = self.index_length;
+ }
+
let csum_offset = std::mem::offset_of!(FixedIndexHeader, index_csum);
self.file.seek(SeekFrom::Start(csum_offset as u64))?;
self.file.write_all(&index_csum)?;
+
+ if self.write_size_on_close {
+ let size_offset = std::mem::offset_of!(FixedIndexHeader, size);
+ self.file.seek(SeekFrom::Start(size_offset as u64))?;
+ self.file.write_all(&self.size.to_le_bytes())?;
+ }
+
self.file.flush()?;
if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) {
@@ -419,16 +533,29 @@ impl FixedIndexWriter {
/// The `start` and `size` parameters encode the range of
/// content that is backed up. It is verified that `start` is
/// aligned and that only the last chunk may be smaller.
+ ///
+ /// If this writer has been created without a fixed size, the
+ /// index capacity and content size are increased automatically
+ /// until an incomplete chunk is encountered.
pub fn add_chunk(&mut self, start: u64, size: u32, digest: &[u8; 32]) -> Result<(), Error> {
let size = u64::from(size);
let Some(end) = start.checked_add(size) else {
bail!("add_chunk: start and size are too large: {start}+{size}");
};
+ self.grow_to_size(end)?;
let idx = self.check_chunk_alignment(end, size)?;
self.add_digest(idx, digest)
}
pub fn clone_data_from(&mut self, reader: &FixedIndexReader) -> Result<(), Error> {
+ if self.growable_size {
+ bail!("reusing the index is only supported with known input size");
+ }
+
+ if self.chunk_size != reader.chunk_size as u64 {
+ bail!("can't reuse file with different chunk size");
+ }
+
if self.index_length != reader.index_count() {
bail!("clone_data_from failed - index sizes not equal");
}
diff --git a/src/api2/backup/mod.rs b/src/api2/backup/mod.rs
index 6e3b46c2..b8f34dfa 100644
--- a/src/api2/backup/mod.rs
+++ b/src/api2/backup/mod.rs
@@ -528,13 +528,15 @@ fn create_fixed_index(
reader = Some(index);
}
- let mut writer = env.datastore.create_fixed_writer(&path, size, chunk_size)?;
+ let mut writer = env
+ .datastore
+ .create_fixed_writer(&path, Some(size), chunk_size)?;
if let Some(reader) = reader {
writer.clone_data_from(&reader)?;
}
- let wid = env.register_fixed_writer(writer, name, size, chunk_size as u32, incremental)?;
+ let wid = env.register_fixed_writer(writer, name, size, chunk_size, incremental)?;
env.log(format!("created new fixed index {wid} ({path:?})"));
--
2.47.3
next prev parent reply other threads:[~2026-02-10 15:07 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-10 15:06 [PATCH v6 proxmox-backup 00/18] fix: #3847 pipe from STDIN to proxmox-backup-client Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 01/18] datastore: remove Arc<ChunkStore> from FixedIndexWriter Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 02/18] datastore: remove Arc<ChunkStore> from DynamicIndexWriter Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 03/18] datastore: add TempDir that is automatically deleted on drop Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 04/18] datastore: use temporary directory for chunk store test Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 05/18] datastore: combine public FixedIndexWriter methods into add_chunk Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 06/18] datastore: use fixed size types for FixedIndexWriter Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 07/18] datastore: verify that chunk_size is a power of two Robert Obkircher
2026-02-17 9:13 ` Robert Obkircher
2026-02-17 9:40 ` Christian Ebner
2026-02-10 15:06 ` Robert Obkircher [this message]
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 09/18] datastore: test FixedIndexWriter Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 10/18] api: backup: make fixed index file size optional Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 11/18] api: verify fixed index writer size on close Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 12/18] client: don't poll terminated source in FixedChunkStream Robert Obkircher
2026-02-17 10:01 ` Christian Ebner
2026-02-17 10:06 ` Christian Ebner
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 13/18] client: don't poll terminated source in ChunkStream Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 14/18] fix #3847: client: support fifo pipe inputs for image backups Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 15/18] client: Fail early if the same pipe is specified for multiple inputs Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 16/18] datastore: compute fidx file size with overflow checks Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 17/18] datastore: support writing fidx files on systems with larger page size Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 18/18] datastore: support incremental fidx uploads with different size Robert Obkircher
2026-02-17 12:42 ` [PATCH v6 proxmox-backup 00/18] fix: #3847 pipe from STDIN to proxmox-backup-client Christian Ebner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260210150642.469670-9-r.obkircher@proxmox.com \
--to=r.obkircher@proxmox.com \
--cc=pbs-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox