[pbs-devel] [RFC proxmox-backup] index writers: remove dead code

* [pbs-devel] [RFC proxmox-backup] index writers: remove dead code
@ 2025-10-27 13:52 Fabian Grünbichler
  2025-10-29  8:04 ` Christian Ebner
  0 siblings, 1 reply; 6+ messages in thread
From: Fabian Grünbichler @ 2025-10-27 13:52 UTC (permalink / raw)
  To: pbs-devel

the current code base doesn't use the index writers for inserting chunks into a
chunk store, and it probably shouldn't given the intricate requirements around
interacting with S3.

all of thise code seems to be dead code, remove it to make reasoning about
chunk insertion code paths easier.

Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
stumbled upon this while thinking about S3 synchronization issues..
AFAICT this is leftover from the very initial phase of PBS development

 pbs-datastore/src/dynamic_index.rs | 142 -----------------------------
 pbs-datastore/src/fixed_index.rs   |  38 --------
 2 files changed, 180 deletions(-)

diff --git a/pbs-datastore/src/dynamic_index.rs b/pbs-datastore/src/dynamic_index.rs
index ff6c36782..624df0119 100644
--- a/pbs-datastore/src/dynamic_index.rs
+++ b/pbs-datastore/src/dynamic_index.rs
@@ -16,13 +16,10 @@ use pxar::accessor::{MaybeReady, ReadAt, ReadAtOperation};
 
 use pbs_tools::lru_cache::LruCache;
 
-use crate::chunk_stat::ChunkStat;
 use crate::chunk_store::ChunkStore;
-use crate::data_blob::{DataBlob, DataChunkBuilder};
 use crate::file_formats;
 use crate::index::{ChunkReadInfo, IndexFile};
 use crate::read_chunk::ReadChunk;
-use crate::{Chunker, ChunkerImpl};
 
 /// Header format definition for dynamic index files (`.dixd`)
 #[repr(C)]
@@ -275,7 +272,6 @@ impl IndexFile for DynamicIndexReader {
 
 /// Create dynamic index files (`.dixd`)
 pub struct DynamicIndexWriter {
-    store: Arc<ChunkStore>,
     writer: BufWriter<File>,
     closed: bool,
     filename: PathBuf,
@@ -321,7 +317,6 @@ impl DynamicIndexWriter {
         let csum = Some(openssl::sha::Sha256::new());
 
         Ok(Self {
-            store,
             writer,
             closed: false,
             filename: full_path,
@@ -332,11 +327,6 @@ impl DynamicIndexWriter {
         })
     }
 
-    // fixme: use add_chunk instead?
-    pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> {
-        self.store.insert_chunk(chunk, digest)
-    }
-
     pub fn close(&mut self) -> Result<[u8; 32], Error> {
         if self.closed {
             bail!(
@@ -387,138 +377,6 @@ impl DynamicIndexWriter {
     }
 }
 
-/// Writer which splits a binary stream into dynamic sized chunks
-///
-/// And store the resulting chunk list into the index file.
-pub struct DynamicChunkWriter {
-    index: DynamicIndexWriter,
-    closed: bool,
-    chunker: ChunkerImpl,
-    stat: ChunkStat,
-    chunk_offset: usize,
-    last_chunk: usize,
-    chunk_buffer: Vec<u8>,
-}
-
-impl DynamicChunkWriter {
-    pub fn new(index: DynamicIndexWriter, chunk_size: usize) -> Self {
-        Self {
-            index,
-            closed: false,
-            chunker: ChunkerImpl::new(chunk_size),
-            stat: ChunkStat::new(0),
-            chunk_offset: 0,
-            last_chunk: 0,
-            chunk_buffer: Vec::with_capacity(chunk_size * 4),
-        }
-    }
-
-    pub fn stat(&self) -> &ChunkStat {
-        &self.stat
-    }
-
-    pub fn close(&mut self) -> Result<(), Error> {
-        if self.closed {
-            return Ok(());
-        }
-
-        self.closed = true;
-
-        self.write_chunk_buffer()?;
-
-        self.index.close()?;
-
-        self.stat.size = self.chunk_offset as u64;
-
-        // add size of index file
-        self.stat.size +=
-            (self.stat.chunk_count * 40 + std::mem::size_of::<DynamicIndexHeader>()) as u64;
-
-        Ok(())
-    }
-
-    fn write_chunk_buffer(&mut self) -> Result<(), Error> {
-        let chunk_size = self.chunk_buffer.len();
-
-        if chunk_size == 0 {
-            return Ok(());
-        }
-
-        let expected_chunk_size = self.chunk_offset - self.last_chunk;
-        if expected_chunk_size != self.chunk_buffer.len() {
-            bail!("wrong chunk size {} != {}", expected_chunk_size, chunk_size);
-        }
-
-        self.stat.chunk_count += 1;
-
-        self.last_chunk = self.chunk_offset;
-
-        let (chunk, digest) = DataChunkBuilder::new(&self.chunk_buffer)
-            .compress(true)
-            .build()?;
-
-        match self.index.insert_chunk(&chunk, &digest) {
-            Ok((is_duplicate, compressed_size)) => {
-                self.stat.compressed_size += compressed_size;
-                if is_duplicate {
-                    self.stat.duplicate_chunks += 1;
-                } else {
-                    self.stat.disk_size += compressed_size;
-                }
-
-                log::info!(
-                    "ADD CHUNK {:016x} {} {}% {} {}",
-                    self.chunk_offset,
-                    chunk_size,
-                    (compressed_size * 100) / (chunk_size as u64),
-                    is_duplicate,
-                    hex::encode(digest)
-                );
-                self.index.add_chunk(self.chunk_offset as u64, &digest)?;
-                self.chunk_buffer.truncate(0);
-                Ok(())
-            }
-            Err(err) => {
-                self.chunk_buffer.truncate(0);
-                Err(err)
-            }
-        }
-    }
-}
-
-impl Write for DynamicChunkWriter {
-    fn write(&mut self, data: &[u8]) -> std::result::Result<usize, std::io::Error> {
-        let chunker = &mut self.chunker;
-
-        let ctx = crate::chunker::Context::default();
-        let pos = chunker.scan(data, &ctx);
-
-        if pos > 0 {
-            self.chunk_buffer.extend_from_slice(&data[0..pos]);
-            self.chunk_offset += pos;
-
-            if let Err(err) = self.write_chunk_buffer() {
-                return Err(std::io::Error::new(
-                    std::io::ErrorKind::Other,
-                    err.to_string(),
-                ));
-            }
-            Ok(pos)
-        } else {
-            self.chunk_offset += data.len();
-            self.chunk_buffer.extend_from_slice(data);
-            Ok(data.len())
-        }
-    }
-
-    fn flush(&mut self) -> std::result::Result<(), std::io::Error> {
-        Err(std::io::Error::new(
-            std::io::ErrorKind::Other,
-            "please use close() instead of flush()",
-        ))
-    }
-}
-
 struct CachedChunk {
     range: Range<u64>,
     data: Vec<u8>,
diff --git a/pbs-datastore/src/fixed_index.rs b/pbs-datastore/src/fixed_index.rs
index 0f289543f..6c3be2d49 100644
--- a/pbs-datastore/src/fixed_index.rs
+++ b/pbs-datastore/src/fixed_index.rs
@@ -11,9 +11,7 @@ use anyhow::{bail, format_err, Error};
 use proxmox_io::ReadExt;
 use proxmox_uuid::Uuid;
 
-use crate::chunk_stat::ChunkStat;
 use crate::chunk_store::ChunkStore;
-use crate::data_blob::ChunkInfo;
 use crate::file_formats;
 use crate::index::{ChunkReadInfo, IndexFile};
 
@@ -215,7 +213,6 @@ impl IndexFile for FixedIndexReader {
 }
 
 pub struct FixedIndexWriter {
-    store: Arc<ChunkStore>,
     file: File,
     filename: PathBuf,
     tmp_filename: PathBuf,
@@ -302,7 +299,6 @@ impl FixedIndexWriter {
         .cast::<u8>();
 
         Ok(Self {
-            store,
             file,
             filename: full_path,
             tmp_filename: tmp_path,
@@ -388,40 +384,6 @@ impl FixedIndexWriter {
         Ok(pos / self.chunk_size)
     }
 
-    // Note: We want to add data out of order, so do not assume any order here.
-    pub fn add_chunk(&mut self, chunk_info: &ChunkInfo, stat: &mut ChunkStat) -> Result<(), Error> {
-        let chunk_len = chunk_info.chunk_len as usize;
-        let offset = chunk_info.offset as usize; // end of chunk
-
-        let idx = self.check_chunk_alignment(offset, chunk_len)?;
-
-        let (is_duplicate, compressed_size) = self
-            .store
-            .insert_chunk(&chunk_info.chunk, &chunk_info.digest)?;
-
-        stat.chunk_count += 1;
-        stat.compressed_size += compressed_size;
-
-        let digest = &chunk_info.digest;
-
-        log::info!(
-            "ADD CHUNK {} {} {}% {} {}",
-            idx,
-            chunk_len,
-            (compressed_size * 100) / (chunk_len as u64),
-            is_duplicate,
-            hex::encode(digest)
-        );
-
-        if is_duplicate {
-            stat.duplicate_chunks += 1;
-        } else {
-            stat.disk_size += compressed_size;
-        }
-
-        self.add_digest(idx, digest)
-    }
-
     pub fn add_digest(&mut self, index: usize, digest: &[u8; 32]) -> Result<(), Error> {
         if index >= self.index_length {
             bail!(
-- 
2.47.3



_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread