From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 358221FF38E for ; Tue, 7 May 2024 09:30:13 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 5C95C811A; Tue, 7 May 2024 09:30:00 +0200 (CEST) From: Dominik Csapak To: pbs-devel@lists.proxmox.com Date: Tue, 7 May 2024 09:29:53 +0200 Message-Id: <20240507072955.364206-2-d.csapak@proxmox.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240507072955.364206-1-d.csapak@proxmox.com> References: <20240507072955.364206-1-d.csapak@proxmox.com> MIME-Version: 1.0 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.134 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment POISEN_SPAM_PILL 0.1 Meta: its spam POISEN_SPAM_PILL_2 0.1 random spam to be learned in bayes POISEN_SPAM_PILL_4 0.1 random spam to be learned in bayes SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [datastore.rs] Subject: [pbs-devel] [PATCH proxmox-backup v2 1/3] tape: introduce a tape backup read thread tuning option X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" using a single thread for reading is not optimal in some cases, e.g. when the underlying storage can handle more reads in parallel than with a single thread. We use the ParallelHandler to handle the actual reads. Make the sync_channel buffer size depending on the number of threads so we have space for two chunks per thread. How this impacts the backup speed largely depends on the underlying storage and how the backup is laid out on it. I benchmarked the following setups: * Setup A: relatively spread out backup on a virtualized pbs on single HDDs * Setup B: mostly sequential chunks on a virtualized pbs on single HDDs * Setup C: backup on virtualized pbs on a fast NVME * Setup D: backup on bare metal pbs with ZFS in a RAID10 with 6 HDDs and 2 fast special devices in a mirror (values are reported in MB/s as seen in the task log, caches were cleared between runs, backups were bigger than the memory available) setup 1 thread 2 threads 4 threads 8 threads A 55 70 80 95 B 110 89 100 108 C 294 294 294 294 D 118 180 300 300 So there are cases where multiple read threads speed up the tape backup (dramatically). On the other hand there are situations where reading from a single thread is actually faster, probably because we can read from the HDD sequentially. I used a new default value of '4' here since that gave good performance on all setups (not necessarily the best) and we also have a default value of '4' for verification threads. We use a struct here for the datastore since we want to introduce other thread tuning options too. Signed-off-by: Dominik Csapak --- pbs-api-types/src/datastore.rs | 8 ++++ pbs-datastore/src/datastore.rs | 26 ++++++++++++ src/tape/pool_writer/new_chunks_iterator.rs | 45 +++++++++++++-------- www/Utils.js | 5 +++ www/datastore/OptionView.js | 8 ++++ 5 files changed, 76 insertions(+), 16 deletions(-) diff --git a/pbs-api-types/src/datastore.rs b/pbs-api-types/src/datastore.rs index 31767417a..1dae3867f 100644 --- a/pbs-api-types/src/datastore.rs +++ b/pbs-api-types/src/datastore.rs @@ -209,6 +209,11 @@ pub enum DatastoreFSyncLevel { type: ChunkOrder, optional: true, }, + "tape-backup-read-threads": { + type: usize, + optional: true, + minimum: 1, + }, }, )] #[derive(Serialize, Deserialize, Default)] @@ -220,6 +225,9 @@ pub struct DatastoreTuning { pub chunk_order: Option, #[serde(skip_serializing_if = "Option::is_none")] pub sync_level: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// Configures how many threads to use to read from the datastore while backing up to tape. + pub tape_backup_read_threads: Option, } pub const DATASTORE_TUNING_STRING_SCHEMA: Schema = StringSchema::new("Datastore tuning options") diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index f95da7615..73a1cfa39 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -49,6 +49,19 @@ pub fn check_backup_owner(owner: &Authid, auth_id: &Authid) -> Result<(), Error> Ok(()) } +/// Contains the configuration of how many threads to use in various situations +pub struct ThreadConfiguration { + pub tape_backup_read_threads: usize, +} + +impl Default for ThreadConfiguration { + fn default() -> Self { + Self { + tape_backup_read_threads: 4, + } + } +} + /// Datastore Management /// /// A Datastore can store severals backups, and provides the @@ -61,6 +74,7 @@ pub struct DataStoreImpl { chunk_order: ChunkOrder, last_digest: Option<[u8; 32]>, sync_level: DatastoreFSyncLevel, + thread_config: ThreadConfiguration, } impl DataStoreImpl { @@ -75,6 +89,7 @@ impl DataStoreImpl { chunk_order: Default::default(), last_digest: None, sync_level: Default::default(), + thread_config: Default::default(), }) } } @@ -305,6 +320,11 @@ impl DataStore { .parse_property_string(config.tuning.as_deref().unwrap_or(""))?, )?; + let mut thread_config = ThreadConfiguration::default(); + if let Some(value) = tuning.tape_backup_read_threads { + thread_config.tape_backup_read_threads = value; + } + Ok(DataStoreImpl { chunk_store, gc_mutex: Mutex::new(()), @@ -313,6 +333,7 @@ impl DataStore { chunk_order: tuning.chunk_order.unwrap_or_default(), last_digest, sync_level: tuning.sync_level.unwrap_or_default(), + thread_config, }) } @@ -1377,6 +1398,11 @@ impl DataStore { Ok(()) } + /// returns the datatstore thread configuration + pub fn get_thread_configuration(&self) -> &ThreadConfiguration { + &self.inner.thread_config + } + /// Destroy a datastore. This requires that there are no active operations on the datastore. /// /// This is a synchronous operation and should be run in a worker-thread. diff --git a/src/tape/pool_writer/new_chunks_iterator.rs b/src/tape/pool_writer/new_chunks_iterator.rs index 1454b33d2..a2a8091f6 100644 --- a/src/tape/pool_writer/new_chunks_iterator.rs +++ b/src/tape/pool_writer/new_chunks_iterator.rs @@ -6,8 +6,9 @@ use anyhow::{format_err, Error}; use pbs_datastore::{DataBlob, DataStore, SnapshotReader}; use crate::tape::CatalogSet; +use crate::tools::parallel_handler::ParallelHandler; -/// Chunk iterator which use a separate thread to read chunks +/// Chunk iterator which uses separate threads to read chunks /// /// The iterator skips duplicate chunks and chunks already in the /// catalog. @@ -25,7 +26,11 @@ impl NewChunksIterator { snapshot_reader: Arc>, catalog_set: Arc>, ) -> Result<(std::thread::JoinHandle<()>, Self), Error> { - let (tx, rx) = std::sync::mpsc::sync_channel(3); + let read_threads = datastore + .get_thread_configuration() + .tape_backup_read_threads; + + let (tx, rx) = std::sync::mpsc::sync_channel(read_threads * 2); let reader_thread = std::thread::spawn(move || { let snapshot_reader = snapshot_reader.lock().unwrap(); @@ -35,36 +40,44 @@ impl NewChunksIterator { let datastore_name = snapshot_reader.datastore_name().to_string(); let result: Result<(), Error> = proxmox_lang::try_block!({ - let mut chunk_iter = snapshot_reader.chunk_iterator(move |digest| { + let chunk_iter = snapshot_reader.chunk_iterator(move |digest| { catalog_set .lock() .unwrap() .contains_chunk(&datastore_name, digest) })?; - loop { - let digest = match chunk_iter.next() { - None => { - let _ = tx.send(Ok(None)); // ignore send error - break; + let reader_pool = + ParallelHandler::new("tape backup chunk reader pool", read_threads, { + let tx = tx.clone(); + move |digest| { + let blob = datastore.load_chunk(&digest)?; + //println!("LOAD CHUNK {}", hex::encode(&digest)); + + tx.send(Ok(Some((digest, blob)))).map_err(|err| { + format_err!("error sending result from reader thread: {err}") + })?; + + Ok(()) } - Some(digest) => digest?, - }; + }); + + for digest in chunk_iter { + let digest = digest?; if chunk_index.contains(&digest) { continue; } - let blob = datastore.load_chunk(&digest)?; - //println!("LOAD CHUNK {}", hex::encode(&digest)); - if let Err(err) = tx.send(Ok(Some((digest, blob)))) { - eprintln!("could not send chunk to reader thread: {err}"); - break; - } + reader_pool.send(digest)?; chunk_index.insert(digest); } + reader_pool.complete()?; + + let _ = tx.send(Ok(None)); // ignore send error + Ok(()) }); if let Err(err) = result { diff --git a/www/Utils.js b/www/Utils.js index 1d7351a32..4d224cd4a 100644 --- a/www/Utils.js +++ b/www/Utils.js @@ -790,6 +790,11 @@ Ext.define('PBS.Utils', { sync = PBS.Utils.tuningOptions['sync-level'][sync ?? '__default__']; options.push(`${gettext('Sync Level')}: ${sync}`); + let tapeBackupRT = tuning['tape-backup-read-threads']; + delete tuning['tape-backup-read-threads']; + tapeBackupRT ??= Proxmox.Utils.defaultText + ` (4)`; + options.push(`${gettext('Tape Backup Read Threads')}: ${tapeBackupRT}`); + for (const [k, v] of Object.entries(tuning)) { options.push(`${k}: ${v}`); } diff --git a/www/datastore/OptionView.js b/www/datastore/OptionView.js index e1f38af6f..cfbb89151 100644 --- a/www/datastore/OptionView.js +++ b/www/datastore/OptionView.js @@ -271,6 +271,14 @@ Ext.define('PBS.Datastore.Options', { deleteEmpty: true, value: '__default__', }, + { + xtype: 'proxmoxintegerfield', + name: 'tape-backup-read-threads', + fieldLabel: gettext('Tape Backup Read Threads'), + min: 1, + emptyText: '4', + deleteEmpty: true, + }, ], }, }, -- 2.39.2 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel