From: Christian Ebner <c.ebner@proxmox.com>
To: "Proxmox Backup Server development discussion"
<pbs-devel@lists.proxmox.com>,
"Fabian Grünbichler" <f.gruenbichler@proxmox.com>
Subject: Re: [pbs-devel] [RFC proxmox-backup] index writers: remove dead code
Date: Wed, 29 Oct 2025 09:04:38 +0100 [thread overview]
Message-ID: <61b23f90-8fd5-4ac4-a34f-f4ed854a9e6e@proxmox.com> (raw)
In-Reply-To: <20251027135435.488551-1-f.gruenbichler@proxmox.com>
Thanks for the cleanup, two questions inline.
On 10/27/25 2:55 PM, Fabian Grünbichler wrote:
> the current code base doesn't use the index writers for inserting chunks into a
> chunk store, and it probably shouldn't given the intricate requirements around
> interacting with S3.
>
> all of thise code seems to be dead code, remove it to make reasoning about
> chunk insertion code paths easier.
>
> Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
> ---
> stumbled upon this while thinking about S3 synchronization issues..
> AFAICT this is leftover from the very initial phase of PBS development
>
> pbs-datastore/src/dynamic_index.rs | 142 -----------------------------
> pbs-datastore/src/fixed_index.rs | 38 --------
> 2 files changed, 180 deletions(-)
>
> diff --git a/pbs-datastore/src/dynamic_index.rs b/pbs-datastore/src/dynamic_index.rs
> index ff6c36782..624df0119 100644
> --- a/pbs-datastore/src/dynamic_index.rs
> +++ b/pbs-datastore/src/dynamic_index.rs
> @@ -16,13 +16,10 @@ use pxar::accessor::{MaybeReady, ReadAt, ReadAtOperation};
>
> use pbs_tools::lru_cache::LruCache;
>
> -use crate::chunk_stat::ChunkStat;
> use crate::chunk_store::ChunkStore;
> -use crate::data_blob::{DataBlob, DataChunkBuilder};
> use crate::file_formats;
> use crate::index::{ChunkReadInfo, IndexFile};
> use crate::read_chunk::ReadChunk;
> -use crate::{Chunker, ChunkerImpl};
>
> /// Header format definition for dynamic index files (`.dixd`)
> #[repr(C)]
> @@ -275,7 +272,6 @@ impl IndexFile for DynamicIndexReader {
>
> /// Create dynamic index files (`.dixd`)
> pub struct DynamicIndexWriter {
> - store: Arc<ChunkStore>,
> writer: BufWriter<File>,
> closed: bool,
> filename: PathBuf,
> @@ -321,7 +317,6 @@ impl DynamicIndexWriter {
> let csum = Some(openssl::sha::Sha256::new());
>
> Ok(Self {
> - store,
question: not sure if it is fine to drop the chunk store here? The chunk
store holds the process locker, which should outlive the writer ...
> writer,
> closed: false,
> filename: full_path,
> @@ -332,11 +327,6 @@ impl DynamicIndexWriter {
> })
> }
>
> - // fixme: use add_chunk instead?
> - pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> {
> - self.store.insert_chunk(chunk, digest)
> - }
> -
> pub fn close(&mut self) -> Result<[u8; 32], Error> {
> if self.closed {
> bail!(
> @@ -387,138 +377,6 @@ impl DynamicIndexWriter {
> }
> }
>
> -/// Writer which splits a binary stream into dynamic sized chunks
> -///
> -/// And store the resulting chunk list into the index file.
> -pub struct DynamicChunkWriter {
> - index: DynamicIndexWriter,
> - closed: bool,
> - chunker: ChunkerImpl,
> - stat: ChunkStat,
> - chunk_offset: usize,
> - last_chunk: usize,
> - chunk_buffer: Vec<u8>,
> -}
> -
> -impl DynamicChunkWriter {
> - pub fn new(index: DynamicIndexWriter, chunk_size: usize) -> Self {
> - Self {
> - index,
> - closed: false,
> - chunker: ChunkerImpl::new(chunk_size),
> - stat: ChunkStat::new(0),
> - chunk_offset: 0,
> - last_chunk: 0,
> - chunk_buffer: Vec::with_capacity(chunk_size * 4),
> - }
> - }
> -
> - pub fn stat(&self) -> &ChunkStat {
> - &self.stat
> - }
> -
> - pub fn close(&mut self) -> Result<(), Error> {
> - if self.closed {
> - return Ok(());
> - }
> -
> - self.closed = true;
> -
> - self.write_chunk_buffer()?;
> -
> - self.index.close()?;
> -
> - self.stat.size = self.chunk_offset as u64;
> -
> - // add size of index file
> - self.stat.size +=
> - (self.stat.chunk_count * 40 + std::mem::size_of::<DynamicIndexHeader>()) as u64;
> -
> - Ok(())
> - }
> -
> - fn write_chunk_buffer(&mut self) -> Result<(), Error> {
> - let chunk_size = self.chunk_buffer.len();
> -
> - if chunk_size == 0 {
> - return Ok(());
> - }
> -
> - let expected_chunk_size = self.chunk_offset - self.last_chunk;
> - if expected_chunk_size != self.chunk_buffer.len() {
> - bail!("wrong chunk size {} != {}", expected_chunk_size, chunk_size);
> - }
> -
> - self.stat.chunk_count += 1;
> -
> - self.last_chunk = self.chunk_offset;
> -
> - let (chunk, digest) = DataChunkBuilder::new(&self.chunk_buffer)
> - .compress(true)
> - .build()?;
> -
> - match self.index.insert_chunk(&chunk, &digest) {
> - Ok((is_duplicate, compressed_size)) => {
> - self.stat.compressed_size += compressed_size;
> - if is_duplicate {
> - self.stat.duplicate_chunks += 1;
> - } else {
> - self.stat.disk_size += compressed_size;
> - }
> -
> - log::info!(
> - "ADD CHUNK {:016x} {} {}% {} {}",
> - self.chunk_offset,
> - chunk_size,
> - (compressed_size * 100) / (chunk_size as u64),
> - is_duplicate,
> - hex::encode(digest)
> - );
> - self.index.add_chunk(self.chunk_offset as u64, &digest)?;
> - self.chunk_buffer.truncate(0);
> - Ok(())
> - }
> - Err(err) => {
> - self.chunk_buffer.truncate(0);
> - Err(err)
> - }
> - }
> - }
> -}
> -
> -impl Write for DynamicChunkWriter {
> - fn write(&mut self, data: &[u8]) -> std::result::Result<usize, std::io::Error> {
> - let chunker = &mut self.chunker;
> -
> - let ctx = crate::chunker::Context::default();
> - let pos = chunker.scan(data, &ctx);
> -
> - if pos > 0 {
> - self.chunk_buffer.extend_from_slice(&data[0..pos]);
> - self.chunk_offset += pos;
> -
> - if let Err(err) = self.write_chunk_buffer() {
> - return Err(std::io::Error::new(
> - std::io::ErrorKind::Other,
> - err.to_string(),
> - ));
> - }
> - Ok(pos)
> - } else {
> - self.chunk_offset += data.len();
> - self.chunk_buffer.extend_from_slice(data);
> - Ok(data.len())
> - }
> - }
> -
> - fn flush(&mut self) -> std::result::Result<(), std::io::Error> {
> - Err(std::io::Error::new(
> - std::io::ErrorKind::Other,
> - "please use close() instead of flush()",
> - ))
> - }
> -}
> -
> struct CachedChunk {
> range: Range<u64>,
> data: Vec<u8>,
> diff --git a/pbs-datastore/src/fixed_index.rs b/pbs-datastore/src/fixed_index.rs
> index 0f289543f..6c3be2d49 100644
> --- a/pbs-datastore/src/fixed_index.rs
> +++ b/pbs-datastore/src/fixed_index.rs
> @@ -11,9 +11,7 @@ use anyhow::{bail, format_err, Error};
> use proxmox_io::ReadExt;
> use proxmox_uuid::Uuid;
>
> -use crate::chunk_stat::ChunkStat;
> use crate::chunk_store::ChunkStore;
> -use crate::data_blob::ChunkInfo;
> use crate::file_formats;
> use crate::index::{ChunkReadInfo, IndexFile};
>
> @@ -215,7 +213,6 @@ impl IndexFile for FixedIndexReader {
> }
>
> pub struct FixedIndexWriter {
> - store: Arc<ChunkStore>,
> file: File,
> filename: PathBuf,
> tmp_filename: PathBuf,
> @@ -302,7 +299,6 @@ impl FixedIndexWriter {
> .cast::<u8>();
>
> Ok(Self {
> - store,
question: ... same as above?
> file,
> filename: full_path,
> tmp_filename: tmp_path,
> @@ -388,40 +384,6 @@ impl FixedIndexWriter {
> Ok(pos / self.chunk_size)
> }
>
> - // Note: We want to add data out of order, so do not assume any order here.
> - pub fn add_chunk(&mut self, chunk_info: &ChunkInfo, stat: &mut ChunkStat) -> Result<(), Error> {
> - let chunk_len = chunk_info.chunk_len as usize;
> - let offset = chunk_info.offset as usize; // end of chunk
> -
> - let idx = self.check_chunk_alignment(offset, chunk_len)?;
> -
> - let (is_duplicate, compressed_size) = self
> - .store
> - .insert_chunk(&chunk_info.chunk, &chunk_info.digest)?;
> -
> - stat.chunk_count += 1;
> - stat.compressed_size += compressed_size;
> -
> - let digest = &chunk_info.digest;
> -
> - log::info!(
> - "ADD CHUNK {} {} {}% {} {}",
> - idx,
> - chunk_len,
> - (compressed_size * 100) / (chunk_len as u64),
> - is_duplicate,
> - hex::encode(digest)
> - );
> -
> - if is_duplicate {
> - stat.duplicate_chunks += 1;
> - } else {
> - stat.disk_size += compressed_size;
> - }
> -
> - self.add_digest(idx, digest)
> - }
> -
> pub fn add_digest(&mut self, index: usize, digest: &[u8; 32]) -> Result<(), Error> {
> if index >= self.index_length {
> bail!(
_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
prev parent reply other threads:[~2025-10-29 8:04 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-27 13:52 Fabian Grünbichler
2025-10-29 8:04 ` Christian Ebner [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=61b23f90-8fd5-4ac4-a34f-f4ed854a9e6e@proxmox.com \
--to=c.ebner@proxmox.com \
--cc=f.gruenbichler@proxmox.com \
--cc=pbs-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox