Re: [pbs-devel] [RFC proxmox-backup] index writers: remove dead code

From: Christian Ebner <c.ebner@proxmox.com>
To: "Proxmox Backup Server development discussion"
	<pbs-devel@lists.proxmox.com>,
	"Fabian Grünbichler" <f.gruenbichler@proxmox.com>
Subject: Re: [pbs-devel] [RFC proxmox-backup] index writers: remove dead code
Date: Wed, 29 Oct 2025 09:04:38 +0100	[thread overview]
Message-ID: <61b23f90-8fd5-4ac4-a34f-f4ed854a9e6e@proxmox.com> (raw)
In-Reply-To: <20251027135435.488551-1-f.gruenbichler@proxmox.com>

Thanks for the cleanup, two questions inline.

On 10/27/25 2:55 PM, Fabian Grünbichler wrote:
> the current code base doesn't use the index writers for inserting chunks into a
> chunk store, and it probably shouldn't given the intricate requirements around
> interacting with S3.
> 
> all of thise code seems to be dead code, remove it to make reasoning about
> chunk insertion code paths easier.
> 
> Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
> ---
> stumbled upon this while thinking about S3 synchronization issues..
> AFAICT this is leftover from the very initial phase of PBS development
> 
>   pbs-datastore/src/dynamic_index.rs | 142 -----------------------------
>   pbs-datastore/src/fixed_index.rs   |  38 --------
>   2 files changed, 180 deletions(-)
> 
> diff --git a/pbs-datastore/src/dynamic_index.rs b/pbs-datastore/src/dynamic_index.rs
> index ff6c36782..624df0119 100644
> --- a/pbs-datastore/src/dynamic_index.rs
> +++ b/pbs-datastore/src/dynamic_index.rs
> @@ -16,13 +16,10 @@ use pxar::accessor::{MaybeReady, ReadAt, ReadAtOperation};
>   
>   use pbs_tools::lru_cache::LruCache;
>   
> -use crate::chunk_stat::ChunkStat;
>   use crate::chunk_store::ChunkStore;
> -use crate::data_blob::{DataBlob, DataChunkBuilder};
>   use crate::file_formats;
>   use crate::index::{ChunkReadInfo, IndexFile};
>   use crate::read_chunk::ReadChunk;
> -use crate::{Chunker, ChunkerImpl};
>   
>   /// Header format definition for dynamic index files (`.dixd`)
>   #[repr(C)]
> @@ -275,7 +272,6 @@ impl IndexFile for DynamicIndexReader {
>   
>   /// Create dynamic index files (`.dixd`)
>   pub struct DynamicIndexWriter {
> -    store: Arc<ChunkStore>,
>       writer: BufWriter<File>,
>       closed: bool,
>       filename: PathBuf,
> @@ -321,7 +317,6 @@ impl DynamicIndexWriter {
>           let csum = Some(openssl::sha::Sha256::new());
>   
>           Ok(Self {
> -            store,

question: not sure if it is fine to drop the chunk store here? The chunk 
store holds the process locker, which should outlive the writer ...

>               writer,
>               closed: false,
>               filename: full_path,
> @@ -332,11 +327,6 @@ impl DynamicIndexWriter {
>           })
>       }
>   
> -    // fixme: use add_chunk instead?
> -    pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> {
> -        self.store.insert_chunk(chunk, digest)
> -    }
> -
>       pub fn close(&mut self) -> Result<[u8; 32], Error> {
>           if self.closed {
>               bail!(
> @@ -387,138 +377,6 @@ impl DynamicIndexWriter {
>       }
>   }
>   
> -/// Writer which splits a binary stream into dynamic sized chunks
> -///
> -/// And store the resulting chunk list into the index file.
> -pub struct DynamicChunkWriter {
> -    index: DynamicIndexWriter,
> -    closed: bool,
> -    chunker: ChunkerImpl,
> -    stat: ChunkStat,
> -    chunk_offset: usize,
> -    last_chunk: usize,
> -    chunk_buffer: Vec<u8>,
> -}
> -
> -impl DynamicChunkWriter {
> -    pub fn new(index: DynamicIndexWriter, chunk_size: usize) -> Self {
> -        Self {
> -            index,
> -            closed: false,
> -            chunker: ChunkerImpl::new(chunk_size),
> -            stat: ChunkStat::new(0),
> -            chunk_offset: 0,
> -            last_chunk: 0,
> -            chunk_buffer: Vec::with_capacity(chunk_size * 4),
> -        }
> -    }
> -
> -    pub fn stat(&self) -> &ChunkStat {
> -        &self.stat
> -    }
> -
> -    pub fn close(&mut self) -> Result<(), Error> {
> -        if self.closed {
> -            return Ok(());
> -        }
> -
> -        self.closed = true;
> -
> -        self.write_chunk_buffer()?;
> -
> -        self.index.close()?;
> -
> -        self.stat.size = self.chunk_offset as u64;
> -
> -        // add size of index file
> -        self.stat.size +=
> -            (self.stat.chunk_count * 40 + std::mem::size_of::<DynamicIndexHeader>()) as u64;
> -
> -        Ok(())
> -    }
> -
> -    fn write_chunk_buffer(&mut self) -> Result<(), Error> {
> -        let chunk_size = self.chunk_buffer.len();
> -
> -        if chunk_size == 0 {
> -            return Ok(());
> -        }
> -
> -        let expected_chunk_size = self.chunk_offset - self.last_chunk;
> -        if expected_chunk_size != self.chunk_buffer.len() {
> -            bail!("wrong chunk size {} != {}", expected_chunk_size, chunk_size);
> -        }
> -
> -        self.stat.chunk_count += 1;
> -
> -        self.last_chunk = self.chunk_offset;
> -
> -        let (chunk, digest) = DataChunkBuilder::new(&self.chunk_buffer)
> -            .compress(true)
> -            .build()?;
> -
> -        match self.index.insert_chunk(&chunk, &digest) {
> -            Ok((is_duplicate, compressed_size)) => {
> -                self.stat.compressed_size += compressed_size;
> -                if is_duplicate {
> -                    self.stat.duplicate_chunks += 1;
> -                } else {
> -                    self.stat.disk_size += compressed_size;
> -                }
> -
> -                log::info!(
> -                    "ADD CHUNK {:016x} {} {}% {} {}",
> -                    self.chunk_offset,
> -                    chunk_size,
> -                    (compressed_size * 100) / (chunk_size as u64),
> -                    is_duplicate,
> -                    hex::encode(digest)
> -                );
> -                self.index.add_chunk(self.chunk_offset as u64, &digest)?;
> -                self.chunk_buffer.truncate(0);
> -                Ok(())
> -            }
> -            Err(err) => {
> -                self.chunk_buffer.truncate(0);
> -                Err(err)
> -            }
> -        }
> -    }
> -}
> -
> -impl Write for DynamicChunkWriter {
> -    fn write(&mut self, data: &[u8]) -> std::result::Result<usize, std::io::Error> {
> -        let chunker = &mut self.chunker;
> -
> -        let ctx = crate::chunker::Context::default();
> -        let pos = chunker.scan(data, &ctx);
> -
> -        if pos > 0 {
> -            self.chunk_buffer.extend_from_slice(&data[0..pos]);
> -            self.chunk_offset += pos;
> -
> -            if let Err(err) = self.write_chunk_buffer() {
> -                return Err(std::io::Error::new(
> -                    std::io::ErrorKind::Other,
> -                    err.to_string(),
> -                ));
> -            }
> -            Ok(pos)
> -        } else {
> -            self.chunk_offset += data.len();
> -            self.chunk_buffer.extend_from_slice(data);
> -            Ok(data.len())
> -        }
> -    }
> -
> -    fn flush(&mut self) -> std::result::Result<(), std::io::Error> {
> -        Err(std::io::Error::new(
> -            std::io::ErrorKind::Other,
> -            "please use close() instead of flush()",
> -        ))
> -    }
> -}
> -
>   struct CachedChunk {
>       range: Range<u64>,
>       data: Vec<u8>,
> diff --git a/pbs-datastore/src/fixed_index.rs b/pbs-datastore/src/fixed_index.rs
> index 0f289543f..6c3be2d49 100644
> --- a/pbs-datastore/src/fixed_index.rs
> +++ b/pbs-datastore/src/fixed_index.rs
> @@ -11,9 +11,7 @@ use anyhow::{bail, format_err, Error};
>   use proxmox_io::ReadExt;
>   use proxmox_uuid::Uuid;
>   
> -use crate::chunk_stat::ChunkStat;
>   use crate::chunk_store::ChunkStore;
> -use crate::data_blob::ChunkInfo;
>   use crate::file_formats;
>   use crate::index::{ChunkReadInfo, IndexFile};
>   
> @@ -215,7 +213,6 @@ impl IndexFile for FixedIndexReader {
>   }
>   
>   pub struct FixedIndexWriter {
> -    store: Arc<ChunkStore>,
>       file: File,
>       filename: PathBuf,
>       tmp_filename: PathBuf,
> @@ -302,7 +299,6 @@ impl FixedIndexWriter {
>           .cast::<u8>();
>   
>           Ok(Self {
> -            store,

question: ... same as above?

>               file,
>               filename: full_path,
>               tmp_filename: tmp_path,
> @@ -388,40 +384,6 @@ impl FixedIndexWriter {
>           Ok(pos / self.chunk_size)
>       }
>   
> -    // Note: We want to add data out of order, so do not assume any order here.
> -    pub fn add_chunk(&mut self, chunk_info: &ChunkInfo, stat: &mut ChunkStat) -> Result<(), Error> {
> -        let chunk_len = chunk_info.chunk_len as usize;
> -        let offset = chunk_info.offset as usize; // end of chunk
> -
> -        let idx = self.check_chunk_alignment(offset, chunk_len)?;
> -
> -        let (is_duplicate, compressed_size) = self
> -            .store
> -            .insert_chunk(&chunk_info.chunk, &chunk_info.digest)?;
> -
> -        stat.chunk_count += 1;
> -        stat.compressed_size += compressed_size;
> -
> -        let digest = &chunk_info.digest;
> -
> -        log::info!(
> -            "ADD CHUNK {} {} {}% {} {}",
> -            idx,
> -            chunk_len,
> -            (compressed_size * 100) / (chunk_len as u64),
> -            is_duplicate,
> -            hex::encode(digest)
> -        );
> -
> -        if is_duplicate {
> -            stat.duplicate_chunks += 1;
> -        } else {
> -            stat.disk_size += compressed_size;
> -        }
> -
> -        self.add_digest(idx, digest)
> -    }
> -
>       pub fn add_digest(&mut self, index: usize, digest: &[u8; 32]) -> Result<(), Error> {
>           if index >= self.index_length {
>               bail!(

_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel