public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: "Fabian Grünbichler" <f.gruenbichler@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [RFC proxmox-backup] index writers: remove dead code
Date: Mon, 27 Oct 2025 14:52:40 +0100	[thread overview]
Message-ID: <20251027135435.488551-1-f.gruenbichler@proxmox.com> (raw)

the current code base doesn't use the index writers for inserting chunks into a
chunk store, and it probably shouldn't given the intricate requirements around
interacting with S3.

all of thise code seems to be dead code, remove it to make reasoning about
chunk insertion code paths easier.

Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
stumbled upon this while thinking about S3 synchronization issues..
AFAICT this is leftover from the very initial phase of PBS development

 pbs-datastore/src/dynamic_index.rs | 142 -----------------------------
 pbs-datastore/src/fixed_index.rs   |  38 --------
 2 files changed, 180 deletions(-)

diff --git a/pbs-datastore/src/dynamic_index.rs b/pbs-datastore/src/dynamic_index.rs
index ff6c36782..624df0119 100644
--- a/pbs-datastore/src/dynamic_index.rs
+++ b/pbs-datastore/src/dynamic_index.rs
@@ -16,13 +16,10 @@ use pxar::accessor::{MaybeReady, ReadAt, ReadAtOperation};
 
 use pbs_tools::lru_cache::LruCache;
 
-use crate::chunk_stat::ChunkStat;
 use crate::chunk_store::ChunkStore;
-use crate::data_blob::{DataBlob, DataChunkBuilder};
 use crate::file_formats;
 use crate::index::{ChunkReadInfo, IndexFile};
 use crate::read_chunk::ReadChunk;
-use crate::{Chunker, ChunkerImpl};
 
 /// Header format definition for dynamic index files (`.dixd`)
 #[repr(C)]
@@ -275,7 +272,6 @@ impl IndexFile for DynamicIndexReader {
 
 /// Create dynamic index files (`.dixd`)
 pub struct DynamicIndexWriter {
-    store: Arc<ChunkStore>,
     writer: BufWriter<File>,
     closed: bool,
     filename: PathBuf,
@@ -321,7 +317,6 @@ impl DynamicIndexWriter {
         let csum = Some(openssl::sha::Sha256::new());
 
         Ok(Self {
-            store,
             writer,
             closed: false,
             filename: full_path,
@@ -332,11 +327,6 @@ impl DynamicIndexWriter {
         })
     }
 
-    // fixme: use add_chunk instead?
-    pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> {
-        self.store.insert_chunk(chunk, digest)
-    }
-
     pub fn close(&mut self) -> Result<[u8; 32], Error> {
         if self.closed {
             bail!(
@@ -387,138 +377,6 @@ impl DynamicIndexWriter {
     }
 }
 
-/// Writer which splits a binary stream into dynamic sized chunks
-///
-/// And store the resulting chunk list into the index file.
-pub struct DynamicChunkWriter {
-    index: DynamicIndexWriter,
-    closed: bool,
-    chunker: ChunkerImpl,
-    stat: ChunkStat,
-    chunk_offset: usize,
-    last_chunk: usize,
-    chunk_buffer: Vec<u8>,
-}
-
-impl DynamicChunkWriter {
-    pub fn new(index: DynamicIndexWriter, chunk_size: usize) -> Self {
-        Self {
-            index,
-            closed: false,
-            chunker: ChunkerImpl::new(chunk_size),
-            stat: ChunkStat::new(0),
-            chunk_offset: 0,
-            last_chunk: 0,
-            chunk_buffer: Vec::with_capacity(chunk_size * 4),
-        }
-    }
-
-    pub fn stat(&self) -> &ChunkStat {
-        &self.stat
-    }
-
-    pub fn close(&mut self) -> Result<(), Error> {
-        if self.closed {
-            return Ok(());
-        }
-
-        self.closed = true;
-
-        self.write_chunk_buffer()?;
-
-        self.index.close()?;
-
-        self.stat.size = self.chunk_offset as u64;
-
-        // add size of index file
-        self.stat.size +=
-            (self.stat.chunk_count * 40 + std::mem::size_of::<DynamicIndexHeader>()) as u64;
-
-        Ok(())
-    }
-
-    fn write_chunk_buffer(&mut self) -> Result<(), Error> {
-        let chunk_size = self.chunk_buffer.len();
-
-        if chunk_size == 0 {
-            return Ok(());
-        }
-
-        let expected_chunk_size = self.chunk_offset - self.last_chunk;
-        if expected_chunk_size != self.chunk_buffer.len() {
-            bail!("wrong chunk size {} != {}", expected_chunk_size, chunk_size);
-        }
-
-        self.stat.chunk_count += 1;
-
-        self.last_chunk = self.chunk_offset;
-
-        let (chunk, digest) = DataChunkBuilder::new(&self.chunk_buffer)
-            .compress(true)
-            .build()?;
-
-        match self.index.insert_chunk(&chunk, &digest) {
-            Ok((is_duplicate, compressed_size)) => {
-                self.stat.compressed_size += compressed_size;
-                if is_duplicate {
-                    self.stat.duplicate_chunks += 1;
-                } else {
-                    self.stat.disk_size += compressed_size;
-                }
-
-                log::info!(
-                    "ADD CHUNK {:016x} {} {}% {} {}",
-                    self.chunk_offset,
-                    chunk_size,
-                    (compressed_size * 100) / (chunk_size as u64),
-                    is_duplicate,
-                    hex::encode(digest)
-                );
-                self.index.add_chunk(self.chunk_offset as u64, &digest)?;
-                self.chunk_buffer.truncate(0);
-                Ok(())
-            }
-            Err(err) => {
-                self.chunk_buffer.truncate(0);
-                Err(err)
-            }
-        }
-    }
-}
-
-impl Write for DynamicChunkWriter {
-    fn write(&mut self, data: &[u8]) -> std::result::Result<usize, std::io::Error> {
-        let chunker = &mut self.chunker;
-
-        let ctx = crate::chunker::Context::default();
-        let pos = chunker.scan(data, &ctx);
-
-        if pos > 0 {
-            self.chunk_buffer.extend_from_slice(&data[0..pos]);
-            self.chunk_offset += pos;
-
-            if let Err(err) = self.write_chunk_buffer() {
-                return Err(std::io::Error::new(
-                    std::io::ErrorKind::Other,
-                    err.to_string(),
-                ));
-            }
-            Ok(pos)
-        } else {
-            self.chunk_offset += data.len();
-            self.chunk_buffer.extend_from_slice(data);
-            Ok(data.len())
-        }
-    }
-
-    fn flush(&mut self) -> std::result::Result<(), std::io::Error> {
-        Err(std::io::Error::new(
-            std::io::ErrorKind::Other,
-            "please use close() instead of flush()",
-        ))
-    }
-}
-
 struct CachedChunk {
     range: Range<u64>,
     data: Vec<u8>,
diff --git a/pbs-datastore/src/fixed_index.rs b/pbs-datastore/src/fixed_index.rs
index 0f289543f..6c3be2d49 100644
--- a/pbs-datastore/src/fixed_index.rs
+++ b/pbs-datastore/src/fixed_index.rs
@@ -11,9 +11,7 @@ use anyhow::{bail, format_err, Error};
 use proxmox_io::ReadExt;
 use proxmox_uuid::Uuid;
 
-use crate::chunk_stat::ChunkStat;
 use crate::chunk_store::ChunkStore;
-use crate::data_blob::ChunkInfo;
 use crate::file_formats;
 use crate::index::{ChunkReadInfo, IndexFile};
 
@@ -215,7 +213,6 @@ impl IndexFile for FixedIndexReader {
 }
 
 pub struct FixedIndexWriter {
-    store: Arc<ChunkStore>,
     file: File,
     filename: PathBuf,
     tmp_filename: PathBuf,
@@ -302,7 +299,6 @@ impl FixedIndexWriter {
         .cast::<u8>();
 
         Ok(Self {
-            store,
             file,
             filename: full_path,
             tmp_filename: tmp_path,
@@ -388,40 +384,6 @@ impl FixedIndexWriter {
         Ok(pos / self.chunk_size)
     }
 
-    // Note: We want to add data out of order, so do not assume any order here.
-    pub fn add_chunk(&mut self, chunk_info: &ChunkInfo, stat: &mut ChunkStat) -> Result<(), Error> {
-        let chunk_len = chunk_info.chunk_len as usize;
-        let offset = chunk_info.offset as usize; // end of chunk
-
-        let idx = self.check_chunk_alignment(offset, chunk_len)?;
-
-        let (is_duplicate, compressed_size) = self
-            .store
-            .insert_chunk(&chunk_info.chunk, &chunk_info.digest)?;
-
-        stat.chunk_count += 1;
-        stat.compressed_size += compressed_size;
-
-        let digest = &chunk_info.digest;
-
-        log::info!(
-            "ADD CHUNK {} {} {}% {} {}",
-            idx,
-            chunk_len,
-            (compressed_size * 100) / (chunk_len as u64),
-            is_duplicate,
-            hex::encode(digest)
-        );
-
-        if is_duplicate {
-            stat.duplicate_chunks += 1;
-        } else {
-            stat.disk_size += compressed_size;
-        }
-
-        self.add_digest(idx, digest)
-    }
-
     pub fn add_digest(&mut self, index: usize, digest: &[u8; 32]) -> Result<(), Error> {
         if index >= self.index_length {
             bail!(
-- 
2.47.3



_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel

                 reply	other threads:[~2025-10-27 13:54 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251027135435.488551-1-f.gruenbichler@proxmox.com \
    --to=f.gruenbichler@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal