public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Dominik Csapak <d.csapak@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH proxmox-backup] tape/pool_writer: skip already backed up chunks in iterator
Date: Thu, 17 Feb 2022 14:50:40 +0100	[thread overview]
Message-ID: <20220217135040.512359-1-d.csapak@proxmox.com> (raw)

currently, the iterator goes over *all* chunks of the index, even
those already backed up by a previous snapshots in the same tape
backup. this is bad since for each iterator, we stat each chunk to
sort by inode number. so to avoid stat'ing the same chunks over
and over for consecutive snapshots, add a 'skip_fn' to the iterator
and in the pool writer and check the catalog_set if we can skip it

this means we can drop the later check for the catalog_set
(since we don't modify that here)

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
i am not completely sure about the 'catalog_set' check removal in the
loop itself, but i believe that we do not modify that in parallel, since
on tape backup, we only ever backup a single snapshot concurrently.

 pbs-datastore/src/snapshot_reader.rs        | 19 ++++++++++---------
 src/tape/pool_writer/new_chunks_iterator.rs | 14 +++++++++-----
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/pbs-datastore/src/snapshot_reader.rs b/pbs-datastore/src/snapshot_reader.rs
index 18bc0d83..1bbf57e7 100644
--- a/pbs-datastore/src/snapshot_reader.rs
+++ b/pbs-datastore/src/snapshot_reader.rs
@@ -87,9 +87,9 @@ impl SnapshotReader {
         Ok(file)
     }
 
-    /// Returns an iterator for all used chunks.
-    pub fn chunk_iterator(&self) -> Result<SnapshotChunkIterator, Error> {
-        SnapshotChunkIterator::new(self)
+    /// Returns an iterator for all chunks not skipped by `skip_fn`.
+    pub fn chunk_iterator<F: Fn(&[u8;32]) -> bool>(&self, skip_fn: F) -> Result<SnapshotChunkIterator<F>, Error> {
+        SnapshotChunkIterator::new(self, skip_fn)
     }
 }
 
@@ -98,13 +98,14 @@ impl SnapshotReader {
 /// Note: The iterator returns a `Result`, and the iterator state is
 /// undefined after the first error. So it make no sense to continue
 /// iteration after the first error.
-pub struct SnapshotChunkIterator<'a> {
+pub struct SnapshotChunkIterator<'a, F: Fn(&[u8;32]) -> bool> {
     snapshot_reader: &'a SnapshotReader,
     todo_list: Vec<String>,
+    skip_fn: F,
     current_index: Option<(Arc<Box<dyn IndexFile + Send>>, usize, Vec<(usize, u64)>)>,
 }
 
-impl <'a> Iterator for SnapshotChunkIterator<'a> {
+impl <'a, F: Fn(&[u8;32]) -> bool> Iterator for SnapshotChunkIterator<'a, F> {
     type Item = Result<[u8; 32], Error>;
 
     fn next(&mut self) -> Option<Self::Item> {
@@ -121,7 +122,7 @@ impl <'a> Iterator for SnapshotChunkIterator<'a> {
 
                         let datastore =
                             DataStore::lookup_datastore(self.snapshot_reader.datastore_name())?;
-                        let order = datastore.get_chunks_in_order(&index, |_| false, |_| Ok(()))?;
+                        let order = datastore.get_chunks_in_order(&index, &self.skip_fn, |_| Ok(()))?;
 
                         self.current_index = Some((Arc::new(index), 0, order));
                     } else {
@@ -142,9 +143,9 @@ impl <'a> Iterator for SnapshotChunkIterator<'a> {
     }
 }
 
-impl <'a> SnapshotChunkIterator<'a> {
+impl <'a, F: Fn(&[u8;32]) -> bool> SnapshotChunkIterator<'a, F> {
 
-    pub fn new(snapshot_reader: &'a SnapshotReader) -> Result<Self, Error> {
+    pub fn new(snapshot_reader: &'a SnapshotReader, skip_fn: F) -> Result<Self, Error> {
 
         let mut todo_list = Vec::new();
 
@@ -157,6 +158,6 @@ impl <'a> SnapshotChunkIterator<'a> {
             }
         }
 
-        Ok(Self { snapshot_reader, todo_list, current_index: None })
+        Ok(Self { snapshot_reader, todo_list, current_index: None, skip_fn })
     }
 }
diff --git a/src/tape/pool_writer/new_chunks_iterator.rs b/src/tape/pool_writer/new_chunks_iterator.rs
index 381b51d3..73cf58ee 100644
--- a/src/tape/pool_writer/new_chunks_iterator.rs
+++ b/src/tape/pool_writer/new_chunks_iterator.rs
@@ -38,7 +38,15 @@ impl NewChunksIterator {
 
             let result: Result<(), Error> = proxmox_lang::try_block!({
 
-                let mut chunk_iter = snapshot_reader.chunk_iterator()?;
+                let mut chunk_iter = {
+                    let datastore_name = datastore_name.to_string();
+                    snapshot_reader.chunk_iterator(move |digest| {
+                        catalog_set
+                            .lock()
+                            .unwrap()
+                            .contains_chunk(&datastore_name, digest)
+                    })
+                }?;
 
                 loop {
                     let digest = match chunk_iter.next() {
@@ -53,10 +61,6 @@ impl NewChunksIterator {
                         continue;
                     }
 
-                    if catalog_set.lock().unwrap().contains_chunk(datastore_name, &digest) {
-                        continue;
-                    };
-
                     let blob = datastore.load_chunk(&digest)?;
                     //println!("LOAD CHUNK {}", hex::encode(&digest));
                     match tx.send(Ok(Some((digest, blob)))) {
-- 
2.30.2





             reply	other threads:[~2022-02-17 13:51 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-17 13:50 Dominik Csapak [this message]
2022-02-18  9:45 ` [pbs-devel] applied: " Dietmar Maurer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220217135040.512359-1-d.csapak@proxmox.com \
    --to=d.csapak@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal