From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 25F2820EC91 for ; Tue, 30 Apr 2024 11:39:36 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 81A2B1FFE8; Tue, 30 Apr 2024 11:39:42 +0200 (CEST) From: Dominik Csapak To: pbs-devel@lists.proxmox.com Date: Tue, 30 Apr 2024 11:39:39 +0200 Message-Id: <20240430093939.1318786-2-d.csapak@proxmox.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240430093939.1318786-1-d.csapak@proxmox.com> References: <20240430093939.1318786-1-d.csapak@proxmox.com> MIME-Version: 1.0 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.134 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment POISEN_SPAM_PILL 0.1 Meta: its spam POISEN_SPAM_PILL_2 0.1 random spam to be learned in bayes POISEN_SPAM_PILL_4 0.1 random spam to be learned in bayes SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [datastore.rs] Subject: [pbs-devel] [PATCH proxmox-backup 2/2] tape: use datastores 'read-thread' for tape backup X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" using a single thread for reading is not optimal in some cases, e.g. when the underlying storage can handle more reads in parallel than with a single thread. This depends largely on the storage and cpu. We use the ParallelHandler to handle the actual reads. Make the sync_channel buffer size depending on the number of threads so we have space for two chunks per thread. Did some benchmarks on my (virtual) pbs with a real tape drive (lto8 tape in an lto9 drive): For my NVME datastore it did not matter much how many threads were used so i guess the bottleneck was either in the hba/drive or cable rather than the disks/cpu. (Always got around ~300MB/s from the task log) For a datastore on a single HDD, the results are much more interesting: 1 Thread: ~55MB/s 2 Threads: ~70MB/s 4 Threads: ~80MB/s 8 Threads: ~95MB/s So the fact that multiple IO request are done in parallel does speed up the tape backup in general. Signed-off-by: Dominik Csapak --- altough i did benchmark, i would be very grateful if other people could test this (and the previous) change in their varying disk setups, so we can verify that it really makes a difference and is worth it to have it configurable pbs-api-types/src/datastore.rs | 2 +- src/tape/pool_writer/new_chunks_iterator.rs | 42 +++++++++++++-------- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/pbs-api-types/src/datastore.rs b/pbs-api-types/src/datastore.rs index 2ad2ae063..243c4759f 100644 --- a/pbs-api-types/src/datastore.rs +++ b/pbs-api-types/src/datastore.rs @@ -210,7 +210,7 @@ pub enum DatastoreFSyncLevel { optional: true, }, "read-threads": { - description: "Controls how many threads are used for reading from the datastore for verification.", + description: "Controls how many threads are used for reading from the datastore for verify and tape backup.", type: usize, optional: true, minimum: 1, diff --git a/src/tape/pool_writer/new_chunks_iterator.rs b/src/tape/pool_writer/new_chunks_iterator.rs index 1454b33d2..63b10c9f8 100644 --- a/src/tape/pool_writer/new_chunks_iterator.rs +++ b/src/tape/pool_writer/new_chunks_iterator.rs @@ -6,8 +6,9 @@ use anyhow::{format_err, Error}; use pbs_datastore::{DataBlob, DataStore, SnapshotReader}; use crate::tape::CatalogSet; +use crate::tools::parallel_handler::ParallelHandler; -/// Chunk iterator which use a separate thread to read chunks +/// Chunk iterator which use separate threads to read chunks /// /// The iterator skips duplicate chunks and chunks already in the /// catalog. @@ -25,7 +26,8 @@ impl NewChunksIterator { snapshot_reader: Arc>, catalog_set: Arc>, ) -> Result<(std::thread::JoinHandle<()>, Self), Error> { - let (tx, rx) = std::sync::mpsc::sync_channel(3); + let read_threads = datastore.get_read_threads(); + let (tx, rx) = std::sync::mpsc::sync_channel(read_threads * 2); let reader_thread = std::thread::spawn(move || { let snapshot_reader = snapshot_reader.lock().unwrap(); @@ -35,36 +37,44 @@ impl NewChunksIterator { let datastore_name = snapshot_reader.datastore_name().to_string(); let result: Result<(), Error> = proxmox_lang::try_block!({ - let mut chunk_iter = snapshot_reader.chunk_iterator(move |digest| { + let chunk_iter = snapshot_reader.chunk_iterator(move |digest| { catalog_set .lock() .unwrap() .contains_chunk(&datastore_name, digest) })?; - loop { - let digest = match chunk_iter.next() { - None => { - let _ = tx.send(Ok(None)); // ignore send error - break; + let reader_pool = + ParallelHandler::new("tape backup chunk reader pool", read_threads, { + let tx = tx.clone(); + move |digest| { + let blob = datastore.load_chunk(&digest)?; + //println!("LOAD CHUNK {}", hex::encode(&digest)); + + tx.send(Ok(Some((digest, blob)))).map_err(|err| { + format_err!("error sending result from reader thread: {err}") + })?; + + Ok(()) } - Some(digest) => digest?, - }; + }); + + for digest in chunk_iter { + let digest = digest?; if chunk_index.contains(&digest) { continue; } - let blob = datastore.load_chunk(&digest)?; - //println!("LOAD CHUNK {}", hex::encode(&digest)); - if let Err(err) = tx.send(Ok(Some((digest, blob)))) { - eprintln!("could not send chunk to reader thread: {err}"); - break; - } + reader_pool.send(digest)?; chunk_index.insert(digest); } + reader_pool.complete()?; + + let _ = tx.send(Ok(None)); // ignore send error + Ok(()) }); if let Err(err) = result { -- 2.39.2 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel