From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 2DE5060D76 for ; Thu, 17 Feb 2022 14:51:12 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 24BEE1FF45 for ; Thu, 17 Feb 2022 14:50:42 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 676961FF3C for ; Thu, 17 Feb 2022 14:50:41 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 3CBEB46D07 for ; Thu, 17 Feb 2022 14:50:41 +0100 (CET) From: Dominik Csapak To: pbs-devel@lists.proxmox.com Date: Thu, 17 Feb 2022 14:50:40 +0100 Message-Id: <20220217135040.512359-1-d.csapak@proxmox.com> X-Mailer: git-send-email 2.30.2 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.007 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment POISEN_SPAM_PILL 0.1 Meta: its spam POISEN_SPAM_PILL_2 0.1 random spam to be learned in bayes POISEN_SPAM_PILL_4 0.1 random spam to be learned in bayes SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: [pbs-devel] [PATCH proxmox-backup] tape/pool_writer: skip already backed up chunks in iterator X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 17 Feb 2022 13:51:12 -0000 currently, the iterator goes over *all* chunks of the index, even those already backed up by a previous snapshots in the same tape backup. this is bad since for each iterator, we stat each chunk to sort by inode number. so to avoid stat'ing the same chunks over and over for consecutive snapshots, add a 'skip_fn' to the iterator and in the pool writer and check the catalog_set if we can skip it this means we can drop the later check for the catalog_set (since we don't modify that here) Signed-off-by: Dominik Csapak --- i am not completely sure about the 'catalog_set' check removal in the loop itself, but i believe that we do not modify that in parallel, since on tape backup, we only ever backup a single snapshot concurrently. pbs-datastore/src/snapshot_reader.rs | 19 ++++++++++--------- src/tape/pool_writer/new_chunks_iterator.rs | 14 +++++++++----- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/pbs-datastore/src/snapshot_reader.rs b/pbs-datastore/src/snapshot_reader.rs index 18bc0d83..1bbf57e7 100644 --- a/pbs-datastore/src/snapshot_reader.rs +++ b/pbs-datastore/src/snapshot_reader.rs @@ -87,9 +87,9 @@ impl SnapshotReader { Ok(file) } - /// Returns an iterator for all used chunks. - pub fn chunk_iterator(&self) -> Result { - SnapshotChunkIterator::new(self) + /// Returns an iterator for all chunks not skipped by `skip_fn`. + pub fn chunk_iterator bool>(&self, skip_fn: F) -> Result, Error> { + SnapshotChunkIterator::new(self, skip_fn) } } @@ -98,13 +98,14 @@ impl SnapshotReader { /// Note: The iterator returns a `Result`, and the iterator state is /// undefined after the first error. So it make no sense to continue /// iteration after the first error. -pub struct SnapshotChunkIterator<'a> { +pub struct SnapshotChunkIterator<'a, F: Fn(&[u8;32]) -> bool> { snapshot_reader: &'a SnapshotReader, todo_list: Vec, + skip_fn: F, current_index: Option<(Arc>, usize, Vec<(usize, u64)>)>, } -impl <'a> Iterator for SnapshotChunkIterator<'a> { +impl <'a, F: Fn(&[u8;32]) -> bool> Iterator for SnapshotChunkIterator<'a, F> { type Item = Result<[u8; 32], Error>; fn next(&mut self) -> Option { @@ -121,7 +122,7 @@ impl <'a> Iterator for SnapshotChunkIterator<'a> { let datastore = DataStore::lookup_datastore(self.snapshot_reader.datastore_name())?; - let order = datastore.get_chunks_in_order(&index, |_| false, |_| Ok(()))?; + let order = datastore.get_chunks_in_order(&index, &self.skip_fn, |_| Ok(()))?; self.current_index = Some((Arc::new(index), 0, order)); } else { @@ -142,9 +143,9 @@ impl <'a> Iterator for SnapshotChunkIterator<'a> { } } -impl <'a> SnapshotChunkIterator<'a> { +impl <'a, F: Fn(&[u8;32]) -> bool> SnapshotChunkIterator<'a, F> { - pub fn new(snapshot_reader: &'a SnapshotReader) -> Result { + pub fn new(snapshot_reader: &'a SnapshotReader, skip_fn: F) -> Result { let mut todo_list = Vec::new(); @@ -157,6 +158,6 @@ impl <'a> SnapshotChunkIterator<'a> { } } - Ok(Self { snapshot_reader, todo_list, current_index: None }) + Ok(Self { snapshot_reader, todo_list, current_index: None, skip_fn }) } } diff --git a/src/tape/pool_writer/new_chunks_iterator.rs b/src/tape/pool_writer/new_chunks_iterator.rs index 381b51d3..73cf58ee 100644 --- a/src/tape/pool_writer/new_chunks_iterator.rs +++ b/src/tape/pool_writer/new_chunks_iterator.rs @@ -38,7 +38,15 @@ impl NewChunksIterator { let result: Result<(), Error> = proxmox_lang::try_block!({ - let mut chunk_iter = snapshot_reader.chunk_iterator()?; + let mut chunk_iter = { + let datastore_name = datastore_name.to_string(); + snapshot_reader.chunk_iterator(move |digest| { + catalog_set + .lock() + .unwrap() + .contains_chunk(&datastore_name, digest) + }) + }?; loop { let digest = match chunk_iter.next() { @@ -53,10 +61,6 @@ impl NewChunksIterator { continue; } - if catalog_set.lock().unwrap().contains_chunk(datastore_name, &digest) { - continue; - }; - let blob = datastore.load_chunk(&digest)?; //println!("LOAD CHUNK {}", hex::encode(&digest)); match tx.send(Ok(Some((digest, blob)))) { -- 2.30.2