From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <pbs-devel-bounces@lists.proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9])
	by lore.proxmox.com (Postfix) with ESMTPS id 25F2820EC91
	for <inbox@lore.proxmox.com>; Tue, 30 Apr 2024 11:39:36 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
	by firstgate.proxmox.com (Proxmox) with ESMTP id 81A2B1FFE8;
	Tue, 30 Apr 2024 11:39:42 +0200 (CEST)
From: Dominik Csapak <d.csapak@proxmox.com>
To: pbs-devel@lists.proxmox.com
Date: Tue, 30 Apr 2024 11:39:39 +0200
Message-Id: <20240430093939.1318786-2-d.csapak@proxmox.com>
X-Mailer: git-send-email 2.39.2
In-Reply-To: <20240430093939.1318786-1-d.csapak@proxmox.com>
References: <20240430093939.1318786-1-d.csapak@proxmox.com>
MIME-Version: 1.0
X-SPAM-LEVEL: Spam detection results:  0
 AWL -0.134 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 POISEN_SPAM_PILL          0.1 Meta: its spam
 POISEN_SPAM_PILL_2        0.1 random spam to be learned in bayes
 POISEN_SPAM_PILL_4        0.1 random spam to be learned in bayes
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See
 http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more
 information. [datastore.rs]
Subject: [pbs-devel] [PATCH proxmox-backup 2/2] tape: use datastores
 'read-thread' for tape backup
X-BeenThere: pbs-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe>
Reply-To: Proxmox Backup Server development discussion
 <pbs-devel@lists.proxmox.com>
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: pbs-devel-bounces@lists.proxmox.com
Sender: "pbs-devel" <pbs-devel-bounces@lists.proxmox.com>

using a single thread for reading is not optimal in some cases, e.g.
when the underlying storage can handle more reads in parallel than
with a single thread.

This depends largely on the storage and cpu.

We use the ParallelHandler to handle the actual reads.
Make the sync_channel buffer size depending on the number of threads
so we have space for two chunks per thread.

Did some benchmarks on my (virtual) pbs with a real tape drive (lto8
tape in an lto9 drive):

For my NVME datastore it did not matter much how many threads were used
so i guess the bottleneck was either in the hba/drive or cable rather
than the disks/cpu. (Always got around ~300MB/s from the task log)

For a datastore on a single HDD, the results are much more interesting:

1 Thread:  ~55MB/s
2 Threads: ~70MB/s
4 Threads: ~80MB/s
8 Threads: ~95MB/s

So the fact that multiple IO request are done in parallel does speed up
the tape backup in general.

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---

altough i did benchmark, i would be very grateful if other people could
test this (and the previous) change in their varying disk setups, so we
can verify that it really makes a difference and is worth it to have it
configurable

 pbs-api-types/src/datastore.rs              |  2 +-
 src/tape/pool_writer/new_chunks_iterator.rs | 42 +++++++++++++--------
 2 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/pbs-api-types/src/datastore.rs b/pbs-api-types/src/datastore.rs
index 2ad2ae063..243c4759f 100644
--- a/pbs-api-types/src/datastore.rs
+++ b/pbs-api-types/src/datastore.rs
@@ -210,7 +210,7 @@ pub enum DatastoreFSyncLevel {
             optional: true,
         },
         "read-threads": {
-            description: "Controls how many threads are used for reading from the datastore for verification.",
+            description: "Controls how many threads are used for reading from the datastore for verify and tape backup.",
             type: usize,
             optional: true,
             minimum: 1,
diff --git a/src/tape/pool_writer/new_chunks_iterator.rs b/src/tape/pool_writer/new_chunks_iterator.rs
index 1454b33d2..63b10c9f8 100644
--- a/src/tape/pool_writer/new_chunks_iterator.rs
+++ b/src/tape/pool_writer/new_chunks_iterator.rs
@@ -6,8 +6,9 @@ use anyhow::{format_err, Error};
 use pbs_datastore::{DataBlob, DataStore, SnapshotReader};
 
 use crate::tape::CatalogSet;
+use crate::tools::parallel_handler::ParallelHandler;
 
-/// Chunk iterator which use a separate thread to read chunks
+/// Chunk iterator which use separate threads to read chunks
 ///
 /// The iterator skips duplicate chunks and chunks already in the
 /// catalog.
@@ -25,7 +26,8 @@ impl NewChunksIterator {
         snapshot_reader: Arc<Mutex<SnapshotReader>>,
         catalog_set: Arc<Mutex<CatalogSet>>,
     ) -> Result<(std::thread::JoinHandle<()>, Self), Error> {
-        let (tx, rx) = std::sync::mpsc::sync_channel(3);
+        let read_threads = datastore.get_read_threads();
+        let (tx, rx) = std::sync::mpsc::sync_channel(read_threads * 2);
 
         let reader_thread = std::thread::spawn(move || {
             let snapshot_reader = snapshot_reader.lock().unwrap();
@@ -35,36 +37,44 @@ impl NewChunksIterator {
             let datastore_name = snapshot_reader.datastore_name().to_string();
 
             let result: Result<(), Error> = proxmox_lang::try_block!({
-                let mut chunk_iter = snapshot_reader.chunk_iterator(move |digest| {
+                let chunk_iter = snapshot_reader.chunk_iterator(move |digest| {
                     catalog_set
                         .lock()
                         .unwrap()
                         .contains_chunk(&datastore_name, digest)
                 })?;
 
-                loop {
-                    let digest = match chunk_iter.next() {
-                        None => {
-                            let _ = tx.send(Ok(None)); // ignore send error
-                            break;
+                let reader_pool =
+                    ParallelHandler::new("tape backup chunk reader pool", read_threads, {
+                        let tx = tx.clone();
+                        move |digest| {
+                            let blob = datastore.load_chunk(&digest)?;
+                            //println!("LOAD CHUNK {}", hex::encode(&digest));
+
+                            tx.send(Ok(Some((digest, blob)))).map_err(|err| {
+                                format_err!("error sending result from reader thread: {err}")
+                            })?;
+
+                            Ok(())
                         }
-                        Some(digest) => digest?,
-                    };
+                    });
+
+                for digest in chunk_iter {
+                    let digest = digest?;
 
                     if chunk_index.contains(&digest) {
                         continue;
                     }
 
-                    let blob = datastore.load_chunk(&digest)?;
-                    //println!("LOAD CHUNK {}", hex::encode(&digest));
-                    if let Err(err) = tx.send(Ok(Some((digest, blob)))) {
-                        eprintln!("could not send chunk to reader thread: {err}");
-                        break;
-                    }
+                    reader_pool.send(digest)?;
 
                     chunk_index.insert(digest);
                 }
 
+                reader_pool.complete()?;
+
+                let _ = tx.send(Ok(None)); // ignore send error
+
                 Ok(())
             });
             if let Err(err) = result {
-- 
2.39.2



_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel