From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 962A41FF185 for ; Mon, 7 Jul 2025 15:26:29 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id BD64633BBC; Mon, 7 Jul 2025 15:27:10 +0200 (CEST) From: Dominik Csapak To: pbs-devel@lists.proxmox.com Date: Mon, 7 Jul 2025 15:27:06 +0200 Message-Id: <20250707132706.2854973-4-d.csapak@proxmox.com> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250707132706.2854973-1-d.csapak@proxmox.com> References: <20250707132706.2854973-1-d.csapak@proxmox.com> MIME-Version: 1.0 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.021 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [RFC PATCH proxmox-backup 3/3] verify: use separate read pool for reading chunks X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" instead of having each 'worker thread' read and then verify it's chunk, use a separate 'reader pool' that reads chunks in parallell but independent from verifying. While this does introduces 4 new threads, they should be mostly busy with reading from disk and not doing anything cpu intensive. The advantage vs the current system is that the threads can start to read the next chunks while the previous ones are still being verified. Due to the nature of the ParallelHandler, the channel is bounded to the number of threads, so there won't be more than 4 chunks read in advance. In my local tests I measured the following speed difference: verified a single snapshot with ~64 GiB (4x the RAM size) with 12 cores current: ~550MiB/s previous patch (moving loading into threads): ~950MiB/s this patch: ~1150MiB/s Obviously it increased the IO and CPU load in line with the throughput. Signed-off-by: Dominik Csapak --- src/backup/verify.rs | 49 +++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/src/backup/verify.rs b/src/backup/verify.rs index 83dd0d9a3..b139819a6 100644 --- a/src/backup/verify.rs +++ b/src/backup/verify.rs @@ -17,7 +17,7 @@ use pbs_api_types::{ use pbs_datastore::backup_info::{BackupDir, BackupGroup, BackupInfo}; use pbs_datastore::index::IndexFile; use pbs_datastore::manifest::{BackupManifest, FileInfo}; -use pbs_datastore::{DataStore, StoreProgress}; +use pbs_datastore::{DataBlob, DataStore, StoreProgress}; use crate::tools::parallel_handler::ParallelHandler; @@ -114,24 +114,8 @@ fn verify_index_chunks( let corrupt_chunks = Arc::clone(&verify_worker.corrupt_chunks); let verified_chunks = Arc::clone(&verify_worker.verified_chunks); let errors = Arc::clone(&errors); - let read_bytes = Arc::clone(&read_bytes); - let decoded_bytes = Arc::clone(&decoded_bytes); - move |(digest, size): ([u8; 32], u64)| { - let chunk = match datastore.load_chunk(&digest) { - Err(err) => { - corrupt_chunks.lock().unwrap().insert(digest); - error!("can't verify chunk, load failed - {err}"); - errors.fetch_add(1, Ordering::SeqCst); - rename_corrupted_chunk(datastore.clone(), &digest); - return Ok(()); - } - Ok(chunk) => { - read_bytes.fetch_add(chunk.raw_size(), Ordering::SeqCst); - decoded_bytes.fetch_add(size, Ordering::SeqCst); - chunk - } - }; + move |(chunk, digest, size): (DataBlob, [u8; 32], u64)| { let chunk_crypt_mode = match chunk.crypt_mode() { Err(err) => { corrupt_chunks.lock().unwrap().insert(digest); @@ -162,6 +146,32 @@ fn verify_index_chunks( } }); + let reader_pool = ParallelHandler::new("read chunks", 4, { + let datastore = Arc::clone(&verify_worker.datastore); + let corrupt_chunks = Arc::clone(&verify_worker.corrupt_chunks); + let errors = Arc::clone(&errors); + let read_bytes = Arc::clone(&read_bytes); + let decoded_bytes = Arc::clone(&decoded_bytes); + let decoder_pool = decoder_pool.channel(); + + move |(digest, size): ([u8; 32], u64)| { + match datastore.load_chunk(&digest) { + Err(err) => { + corrupt_chunks.lock().unwrap().insert(digest); + error!("can't verify chunk, load failed - {err}"); + errors.fetch_add(1, Ordering::SeqCst); + rename_corrupted_chunk(datastore.clone(), &digest); + } + Ok(chunk) => { + read_bytes.fetch_add(chunk.raw_size(), Ordering::SeqCst); + decoded_bytes.fetch_add(size, Ordering::SeqCst); + decoder_pool.send((chunk, digest, size))?; + } + } + Ok(()) + } + }); + let skip_chunk = |digest: &[u8; 32]| -> bool { if verify_worker .verified_chunks @@ -209,9 +219,10 @@ fn verify_index_chunks( continue; // already verified or marked corrupt } - decoder_pool.send((info.digest, info.size()))?; + reader_pool.send((info.digest, info.size()))?; } + reader_pool.complete()?; decoder_pool.complete()?; let elapsed = start_time.elapsed().as_secs_f64(); -- 2.39.5 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel