all lists on lists.proxmox.com
 help / color / mirror / Atom feed
From: Robert Obkircher <r.obkircher@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [PATCH v6 proxmox-backup 08/18] datastore: support writing fidx files of unknown size
Date: Tue, 10 Feb 2026 16:06:24 +0100	[thread overview]
Message-ID: <20260210150642.469670-9-r.obkircher@proxmox.com> (raw)
In-Reply-To: <20260210150642.469670-1-r.obkircher@proxmox.com>

Use mremap and ftruncate to support growable FixedIndexWriters. Grow
exponentially from a small initial index size and truncate excessive
capacity after encountering a non-full block or on close.

Signed-off-by: Robert Obkircher <r.obkircher@proxmox.com>
---
 pbs-datastore/src/datastore.rs   |   2 +-
 pbs-datastore/src/fixed_index.rs | 135 ++++++++++++++++++++++++++++++-
 src/api2/backup/mod.rs           |   6 +-
 3 files changed, 136 insertions(+), 7 deletions(-)

diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index 3f9c222d..631bdc30 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -695,7 +695,7 @@ impl DataStore {
     pub fn create_fixed_writer<P: AsRef<Path>>(
         &self,
         filename: P,
-        size: u64,
+        size: Option<u64>,
         chunk_size: u32,
     ) -> Result<FixedIndexWriter, Error> {
         let full_path = self.inner.chunk_store.relative_path(filename.as_ref());
diff --git a/pbs-datastore/src/fixed_index.rs b/pbs-datastore/src/fixed_index.rs
index 953f4a20..3807ba35 100644
--- a/pbs-datastore/src/fixed_index.rs
+++ b/pbs-datastore/src/fixed_index.rs
@@ -225,9 +225,12 @@ pub struct FixedIndexWriter {
     chunk_size: u64,
     size: u64,
     index_length: usize,
+    index_capacity: usize,
     index: *mut u8,
     pub uuid: [u8; 16],
     pub ctime: i64,
+    growable_size: bool,
+    write_size_on_close: bool,
 }
 
 // `index` is mmap()ed which cannot be thread-local so should be sendable
@@ -243,11 +246,21 @@ impl Drop for FixedIndexWriter {
 }
 
 impl FixedIndexWriter {
+    /// The initial capacity, if the total size is unknown.
+    ///
+    /// This capacity takes up the same amount of space as the header
+    /// and can refer to 128 Blocks * 4 MiB/Block = 512 MiB of content.
+    ///
+    /// On systems with 4 KiB page size this value ensures that the
+    /// mapped length is a multiple of the page size, but this is not
+    /// strictly necessary.
+    const INITIAL_CAPACITY: usize = 4096 / 32;
+
     #[allow(clippy::cast_ptr_alignment)]
     // Requires obtaining a shared chunk store lock beforehand
     pub fn create(
         full_path: impl Into<PathBuf>,
-        size: u64,
+        known_size: Option<u64>,
         chunk_size: u32,
     ) -> Result<Self, Error> {
         let full_path = full_path.into();
@@ -274,6 +287,7 @@ impl FixedIndexWriter {
         }
 
         let ctime = proxmox_time::epoch_i64();
+        let size = known_size.unwrap_or(0);
 
         let uuid = Uuid::generate();
 
@@ -290,8 +304,15 @@ impl FixedIndexWriter {
 
         file.write_all(&buffer)?;
 
-        let index_length = size.div_ceil(chunk_size).try_into()?;
-        let index_size = index_length * 32;
+        let (index_length, index_capacity) = match known_size {
+            Some(s) => {
+                let len = s.div_ceil(chunk_size).try_into()?;
+                (len, len)
+            }
+            None => (0, Self::INITIAL_CAPACITY),
+        };
+
+        let index_size = index_capacity * 32;
         nix::unistd::ftruncate(&file, (header_size + index_size) as i64)?;
 
         let data = unsafe {
@@ -315,12 +336,90 @@ impl FixedIndexWriter {
             chunk_size,
             size,
             index_length,
+            index_capacity,
             index: data,
             ctime,
             uuid: *uuid.as_bytes(),
+            growable_size: known_size.is_none(),
+            write_size_on_close: known_size.is_none(),
+        })
+    }
+
+    /// If this returns an error, the sizes may be out of sync,
+    /// which is especially bad if the capacity was reduced.
+    fn set_index_capacity(&mut self, new_capacity: usize) -> Result<(), Error> {
+        if new_capacity == self.index_capacity {
+            return Ok(());
+        }
+        let old_index_size = self.index_capacity * 32;
+        let new_index_size = new_capacity * 32;
+        let new_file_size = (size_of::<FixedIndexHeader>() + new_index_size) as i64;
+
+        let index_addr = NonNull::new(self.index as *mut std::ffi::c_void).ok_or_else(|| {
+            format_err!("Can't resize FixedIndexWriter index because the index pointer is null.")
+        })?;
+
+        nix::unistd::ftruncate(&self.file, new_file_size)?;
+
+        let new_index = unsafe {
+            nix::sys::mman::mremap(
+                index_addr,
+                old_index_size,
+                new_index_size,
+                nix::sys::mman::MRemapFlags::MREMAP_MAYMOVE,
+                None,
+            )
+        }?;
+
+        self.index = new_index.as_ptr().cast::<u8>();
+        self.index_capacity = new_capacity;
+        Ok(())
+    }
+
+    /// Unmapping ensures future add and close operations fail.
+    fn set_index_capacity_or_unmap(&mut self, new_capacity: usize) -> Result<(), Error> {
+        self.set_index_capacity(new_capacity).map_err(|e| {
+            let unmap_result = self.unmap();
+            let message = format!(
+                "failed to resize index capacity from {} to {new_capacity} with backing file: {:?}",
+                self.index_capacity, self.tmp_filename
+            );
+            assert!(self.index.is_null(), "{message} {unmap_result:?}");
+            e.context(message)
         })
     }
 
+    /// Increase the content size to be at least `requested_size` and
+    /// ensure there is enough capacity.
+    ///
+    /// Only writers that were created without a known size can grow.
+    /// The size also becomes fixed as soon as it is no longer divisible
+    /// by the block size, to ensure that only the last block can be
+    /// smaller.
+    pub fn grow_to_size(&mut self, requested_size: u64) -> Result<(), Error> {
+        if self.size < requested_size {
+            if !self.growable_size {
+                bail!("refusing to resize from {} to {requested_size}", self.size);
+            }
+            let new_len = requested_size.div_ceil(self.chunk_size).try_into()?;
+            if new_len as u64 * self.chunk_size != requested_size {
+                // not a full chunk, so this must be the last one
+                self.growable_size = false;
+                self.set_index_capacity_or_unmap(new_len)?;
+            } else if new_len > self.index_capacity {
+                let new_capacity = new_len
+                    .checked_next_power_of_two()
+                    .ok_or_else(|| format_err!("capacity overflow"))?;
+                self.set_index_capacity_or_unmap(new_capacity)?;
+            }
+            assert!(new_len <= self.index_capacity);
+            self.index_length = new_len;
+            self.size = requested_size;
+        }
+        Ok(())
+    }
+
+    /// The current length of the index.
     pub fn index_length(&self) -> usize {
         self.index_length
     }
@@ -330,7 +429,7 @@ impl FixedIndexWriter {
             return Ok(());
         };
 
-        let index_size = self.index_length * 32;
+        let index_size = self.index_capacity * 32;
 
         if let Err(err) = unsafe { nix::sys::mman::munmap(index, index_size) } {
             bail!("unmap file {:?} failed - {}", self.tmp_filename, err);
@@ -352,9 +451,24 @@ impl FixedIndexWriter {
 
         self.unmap()?;
 
+        if self.index_length == 0 {
+            bail!("refusing to close empty fidx file {:?}", self.tmp_filename);
+        } else if self.index_length < self.index_capacity {
+            let file_size = size_of::<FixedIndexHeader>() + index_size;
+            nix::unistd::ftruncate(&self.file, file_size as i64)?;
+            self.index_capacity = self.index_length;
+        }
+
         let csum_offset = std::mem::offset_of!(FixedIndexHeader, index_csum);
         self.file.seek(SeekFrom::Start(csum_offset as u64))?;
         self.file.write_all(&index_csum)?;
+
+        if self.write_size_on_close {
+            let size_offset = std::mem::offset_of!(FixedIndexHeader, size);
+            self.file.seek(SeekFrom::Start(size_offset as u64))?;
+            self.file.write_all(&self.size.to_le_bytes())?;
+        }
+
         self.file.flush()?;
 
         if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) {
@@ -419,16 +533,29 @@ impl FixedIndexWriter {
     /// The `start` and `size` parameters encode the range of
     /// content that is backed up. It is verified that `start` is
     /// aligned and that only the last chunk may be smaller.
+    ///
+    /// If this writer has been created without a fixed size, the
+    /// index capacity and content size are increased automatically
+    /// until an incomplete chunk is encountered.
     pub fn add_chunk(&mut self, start: u64, size: u32, digest: &[u8; 32]) -> Result<(), Error> {
         let size = u64::from(size);
         let Some(end) = start.checked_add(size) else {
             bail!("add_chunk: start and size are too large: {start}+{size}");
         };
+        self.grow_to_size(end)?;
         let idx = self.check_chunk_alignment(end, size)?;
         self.add_digest(idx, digest)
     }
 
     pub fn clone_data_from(&mut self, reader: &FixedIndexReader) -> Result<(), Error> {
+        if self.growable_size {
+            bail!("reusing the index is only supported with known input size");
+        }
+
+        if self.chunk_size != reader.chunk_size as u64 {
+            bail!("can't reuse file with different chunk size");
+        }
+
         if self.index_length != reader.index_count() {
             bail!("clone_data_from failed - index sizes not equal");
         }
diff --git a/src/api2/backup/mod.rs b/src/api2/backup/mod.rs
index 6e3b46c2..b8f34dfa 100644
--- a/src/api2/backup/mod.rs
+++ b/src/api2/backup/mod.rs
@@ -528,13 +528,15 @@ fn create_fixed_index(
         reader = Some(index);
     }
 
-    let mut writer = env.datastore.create_fixed_writer(&path, size, chunk_size)?;
+    let mut writer = env
+        .datastore
+        .create_fixed_writer(&path, Some(size), chunk_size)?;
 
     if let Some(reader) = reader {
         writer.clone_data_from(&reader)?;
     }
 
-    let wid = env.register_fixed_writer(writer, name, size, chunk_size as u32, incremental)?;
+    let wid = env.register_fixed_writer(writer, name, size, chunk_size, incremental)?;
 
     env.log(format!("created new fixed index {wid} ({path:?})"));
 
-- 
2.47.3





  parent reply	other threads:[~2026-02-10 15:07 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-10 15:06 [PATCH v6 proxmox-backup 00/18] fix: #3847 pipe from STDIN to proxmox-backup-client Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 01/18] datastore: remove Arc<ChunkStore> from FixedIndexWriter Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 02/18] datastore: remove Arc<ChunkStore> from DynamicIndexWriter Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 03/18] datastore: add TempDir that is automatically deleted on drop Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 04/18] datastore: use temporary directory for chunk store test Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 05/18] datastore: combine public FixedIndexWriter methods into add_chunk Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 06/18] datastore: use fixed size types for FixedIndexWriter Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 07/18] datastore: verify that chunk_size is a power of two Robert Obkircher
2026-02-17  9:13   ` Robert Obkircher
2026-02-17  9:40     ` Christian Ebner
2026-02-10 15:06 ` Robert Obkircher [this message]
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 09/18] datastore: test FixedIndexWriter Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 10/18] api: backup: make fixed index file size optional Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 11/18] api: verify fixed index writer size on close Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 12/18] client: don't poll terminated source in FixedChunkStream Robert Obkircher
2026-02-17 10:01   ` Christian Ebner
2026-02-17 10:06     ` Christian Ebner
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 13/18] client: don't poll terminated source in ChunkStream Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 14/18] fix #3847: client: support fifo pipe inputs for image backups Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 15/18] client: Fail early if the same pipe is specified for multiple inputs Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 16/18] datastore: compute fidx file size with overflow checks Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 17/18] datastore: support writing fidx files on systems with larger page size Robert Obkircher
2026-02-10 15:06 ` [PATCH v6 proxmox-backup 18/18] datastore: support incremental fidx uploads with different size Robert Obkircher
2026-02-17 12:42 ` [PATCH v6 proxmox-backup 00/18] fix: #3847 pipe from STDIN to proxmox-backup-client Christian Ebner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260210150642.469670-9-r.obkircher@proxmox.com \
    --to=r.obkircher@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal