* [pbs-devel] [PATCH proxmox-backup 1/2] datastore: add tuning options for the number of reading threads @ 2024-04-30 9:39 Dominik Csapak 2024-04-30 9:39 ` [pbs-devel] [PATCH proxmox-backup 2/2] tape: use datastores 'read-thread' for tape backup Dominik Csapak 2024-05-02 12:22 ` [pbs-devel] [PATCH proxmox-backup 1/2] datastore: add tuning options for the number of reading threads Thomas Lamprecht 0 siblings, 2 replies; 5+ messages in thread From: Dominik Csapak @ 2024-04-30 9:39 UTC (permalink / raw) To: pbs-devel adds a new 'read-threads' tuning options to datastores that control how many threads are used for verification operations depending on the underlying storage and the used cpu, this can make a big difference in verification time. the default remains at the current 4 threads. Signed-off-by: Dominik Csapak <d.csapak@proxmox.com> --- pbs-api-types/src/datastore.rs | 9 +++++++++ pbs-datastore/src/datastore.rs | 14 ++++++++++++++ src/backup/verify.rs | 2 +- www/Utils.js | 5 +++++ www/datastore/OptionView.js | 8 ++++++++ 5 files changed, 37 insertions(+), 1 deletion(-) diff --git a/pbs-api-types/src/datastore.rs b/pbs-api-types/src/datastore.rs index 31767417a..2ad2ae063 100644 --- a/pbs-api-types/src/datastore.rs +++ b/pbs-api-types/src/datastore.rs @@ -209,6 +209,13 @@ pub enum DatastoreFSyncLevel { type: ChunkOrder, optional: true, }, + "read-threads": { + description: "Controls how many threads are used for reading from the datastore for verification.", + type: usize, + optional: true, + minimum: 1, + default: 4, + }, }, )] #[derive(Serialize, Deserialize, Default)] @@ -220,6 +227,8 @@ pub struct DatastoreTuning { pub chunk_order: Option<ChunkOrder>, #[serde(skip_serializing_if = "Option::is_none")] pub sync_level: Option<DatastoreFSyncLevel>, + #[serde(skip_serializing_if = "Option::is_none")] + pub read_threads: Option<usize>, } pub const DATASTORE_TUNING_STRING_SCHEMA: Schema = StringSchema::new("Datastore tuning options") diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index f95da7615..2c23bd1f1 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -14,6 +14,7 @@ use proxmox_schema::ApiType; use proxmox_sys::error::SysError; use proxmox_sys::fs::{file_read_optional_string, replace_file, CreateOptions}; use proxmox_sys::fs::{lock_dir_noblock, DirLockGuard}; +use proxmox_sys::linux::procfs::read_proc_stat; use proxmox_sys::process_locker::ProcessLockSharedGuard; use proxmox_sys::WorkerTaskContext; use proxmox_sys::{task_log, task_warn}; @@ -61,6 +62,7 @@ pub struct DataStoreImpl { chunk_order: ChunkOrder, last_digest: Option<[u8; 32]>, sync_level: DatastoreFSyncLevel, + read_threads: usize, } impl DataStoreImpl { @@ -75,6 +77,7 @@ impl DataStoreImpl { chunk_order: Default::default(), last_digest: None, sync_level: Default::default(), + read_threads: 0, }) } } @@ -313,6 +316,7 @@ impl DataStore { chunk_order: tuning.chunk_order.unwrap_or_default(), last_digest, sync_level: tuning.sync_level.unwrap_or_default(), + read_threads: tuning.read_threads.unwrap_or(4), }) } @@ -1377,6 +1381,16 @@ impl DataStore { Ok(()) } + /// returns the number of read thread that should be used for operations + /// limits to the number of available cores (if reading from /proc/stat succeeds) + pub fn get_read_threads(&self) -> usize { + // if we cannot get the real cpu count, don't limit us + let core_count = read_proc_stat() + .map(|proc| proc.cpu_count as usize) + .unwrap_or(usize::max_value()); + self.inner.read_threads.clamp(1, core_count) + } + /// Destroy a datastore. This requires that there are no active operations on the datastore. /// /// This is a synchronous operation and should be run in a worker-thread. diff --git a/src/backup/verify.rs b/src/backup/verify.rs index c972e5328..4537dbdc9 100644 --- a/src/backup/verify.rs +++ b/src/backup/verify.rs @@ -125,7 +125,7 @@ fn verify_index_chunks( let decoder_pool = ParallelHandler::new( "verify chunk decoder", - 4, + datastore2.get_read_threads(), move |(chunk, digest, size): (DataBlob, [u8; 32], u64)| { let chunk_crypt_mode = match chunk.crypt_mode() { Err(err) => { diff --git a/www/Utils.js b/www/Utils.js index 1d7351a32..8fd102486 100644 --- a/www/Utils.js +++ b/www/Utils.js @@ -790,6 +790,11 @@ Ext.define('PBS.Utils', { sync = PBS.Utils.tuningOptions['sync-level'][sync ?? '__default__']; options.push(`${gettext('Sync Level')}: ${sync}`); + let readThreads = tuning['read-threads']; + delete tuning['read-threads']; + readThreads ??= Proxmox.Utils.defaultText + ` (4)`; + options.push(`${gettext('Read Threads')}: ${readThreads}`); + for (const [k, v] of Object.entries(tuning)) { options.push(`${k}: ${v}`); } diff --git a/www/datastore/OptionView.js b/www/datastore/OptionView.js index e1f38af6f..ee90f1dca 100644 --- a/www/datastore/OptionView.js +++ b/www/datastore/OptionView.js @@ -271,6 +271,14 @@ Ext.define('PBS.Datastore.Options', { deleteEmpty: true, value: '__default__', }, + { + xtype: 'proxmoxintegerfield', + name: 'read-threads', + fieldLabel: gettext('Read Threads'), + min: 0, + emptyText: '4', + deleteEmpty: true, + }, ], }, }, -- 2.39.2 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel ^ permalink raw reply [flat|nested] 5+ messages in thread
* [pbs-devel] [PATCH proxmox-backup 2/2] tape: use datastores 'read-thread' for tape backup 2024-04-30 9:39 [pbs-devel] [PATCH proxmox-backup 1/2] datastore: add tuning options for the number of reading threads Dominik Csapak @ 2024-04-30 9:39 ` Dominik Csapak 2024-04-30 11:00 ` Dominik Csapak 2024-05-02 12:22 ` [pbs-devel] [PATCH proxmox-backup 1/2] datastore: add tuning options for the number of reading threads Thomas Lamprecht 1 sibling, 1 reply; 5+ messages in thread From: Dominik Csapak @ 2024-04-30 9:39 UTC (permalink / raw) To: pbs-devel using a single thread for reading is not optimal in some cases, e.g. when the underlying storage can handle more reads in parallel than with a single thread. This depends largely on the storage and cpu. We use the ParallelHandler to handle the actual reads. Make the sync_channel buffer size depending on the number of threads so we have space for two chunks per thread. Did some benchmarks on my (virtual) pbs with a real tape drive (lto8 tape in an lto9 drive): For my NVME datastore it did not matter much how many threads were used so i guess the bottleneck was either in the hba/drive or cable rather than the disks/cpu. (Always got around ~300MB/s from the task log) For a datastore on a single HDD, the results are much more interesting: 1 Thread: ~55MB/s 2 Threads: ~70MB/s 4 Threads: ~80MB/s 8 Threads: ~95MB/s So the fact that multiple IO request are done in parallel does speed up the tape backup in general. Signed-off-by: Dominik Csapak <d.csapak@proxmox.com> --- altough i did benchmark, i would be very grateful if other people could test this (and the previous) change in their varying disk setups, so we can verify that it really makes a difference and is worth it to have it configurable pbs-api-types/src/datastore.rs | 2 +- src/tape/pool_writer/new_chunks_iterator.rs | 42 +++++++++++++-------- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/pbs-api-types/src/datastore.rs b/pbs-api-types/src/datastore.rs index 2ad2ae063..243c4759f 100644 --- a/pbs-api-types/src/datastore.rs +++ b/pbs-api-types/src/datastore.rs @@ -210,7 +210,7 @@ pub enum DatastoreFSyncLevel { optional: true, }, "read-threads": { - description: "Controls how many threads are used for reading from the datastore for verification.", + description: "Controls how many threads are used for reading from the datastore for verify and tape backup.", type: usize, optional: true, minimum: 1, diff --git a/src/tape/pool_writer/new_chunks_iterator.rs b/src/tape/pool_writer/new_chunks_iterator.rs index 1454b33d2..63b10c9f8 100644 --- a/src/tape/pool_writer/new_chunks_iterator.rs +++ b/src/tape/pool_writer/new_chunks_iterator.rs @@ -6,8 +6,9 @@ use anyhow::{format_err, Error}; use pbs_datastore::{DataBlob, DataStore, SnapshotReader}; use crate::tape::CatalogSet; +use crate::tools::parallel_handler::ParallelHandler; -/// Chunk iterator which use a separate thread to read chunks +/// Chunk iterator which use separate threads to read chunks /// /// The iterator skips duplicate chunks and chunks already in the /// catalog. @@ -25,7 +26,8 @@ impl NewChunksIterator { snapshot_reader: Arc<Mutex<SnapshotReader>>, catalog_set: Arc<Mutex<CatalogSet>>, ) -> Result<(std::thread::JoinHandle<()>, Self), Error> { - let (tx, rx) = std::sync::mpsc::sync_channel(3); + let read_threads = datastore.get_read_threads(); + let (tx, rx) = std::sync::mpsc::sync_channel(read_threads * 2); let reader_thread = std::thread::spawn(move || { let snapshot_reader = snapshot_reader.lock().unwrap(); @@ -35,36 +37,44 @@ impl NewChunksIterator { let datastore_name = snapshot_reader.datastore_name().to_string(); let result: Result<(), Error> = proxmox_lang::try_block!({ - let mut chunk_iter = snapshot_reader.chunk_iterator(move |digest| { + let chunk_iter = snapshot_reader.chunk_iterator(move |digest| { catalog_set .lock() .unwrap() .contains_chunk(&datastore_name, digest) })?; - loop { - let digest = match chunk_iter.next() { - None => { - let _ = tx.send(Ok(None)); // ignore send error - break; + let reader_pool = + ParallelHandler::new("tape backup chunk reader pool", read_threads, { + let tx = tx.clone(); + move |digest| { + let blob = datastore.load_chunk(&digest)?; + //println!("LOAD CHUNK {}", hex::encode(&digest)); + + tx.send(Ok(Some((digest, blob)))).map_err(|err| { + format_err!("error sending result from reader thread: {err}") + })?; + + Ok(()) } - Some(digest) => digest?, - }; + }); + + for digest in chunk_iter { + let digest = digest?; if chunk_index.contains(&digest) { continue; } - let blob = datastore.load_chunk(&digest)?; - //println!("LOAD CHUNK {}", hex::encode(&digest)); - if let Err(err) = tx.send(Ok(Some((digest, blob)))) { - eprintln!("could not send chunk to reader thread: {err}"); - break; - } + reader_pool.send(digest)?; chunk_index.insert(digest); } + reader_pool.complete()?; + + let _ = tx.send(Ok(None)); // ignore send error + Ok(()) }); if let Err(err) = result { -- 2.39.2 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [pbs-devel] [PATCH proxmox-backup 2/2] tape: use datastores 'read-thread' for tape backup 2024-04-30 9:39 ` [pbs-devel] [PATCH proxmox-backup 2/2] tape: use datastores 'read-thread' for tape backup Dominik Csapak @ 2024-04-30 11:00 ` Dominik Csapak 0 siblings, 0 replies; 5+ messages in thread From: Dominik Csapak @ 2024-04-30 11:00 UTC (permalink / raw) To: pbs-devel On 4/30/24 11:39, Dominik Csapak wrote: > using a single thread for reading is not optimal in some cases, e.g. > when the underlying storage can handle more reads in parallel than > with a single thread. > > This depends largely on the storage and cpu. > > We use the ParallelHandler to handle the actual reads. > Make the sync_channel buffer size depending on the number of threads > so we have space for two chunks per thread. > > Did some benchmarks on my (virtual) pbs with a real tape drive (lto8 > tape in an lto9 drive): > > For my NVME datastore it did not matter much how many threads were used > so i guess the bottleneck was either in the hba/drive or cable rather > than the disks/cpu. (Always got around ~300MB/s from the task log) > > For a datastore on a single HDD, the results are much more interesting: > > 1 Thread: ~55MB/s > 2 Threads: ~70MB/s > 4 Threads: ~80MB/s > 8 Threads: ~95MB/s > > So the fact that multiple IO request are done in parallel does speed up > the tape backup in general. > eh that sentence might be misleading, what i meant was not 'in general' but for the case of spinning disks could be amended before applying or in a v2 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [pbs-devel] [PATCH proxmox-backup 1/2] datastore: add tuning options for the number of reading threads 2024-04-30 9:39 [pbs-devel] [PATCH proxmox-backup 1/2] datastore: add tuning options for the number of reading threads Dominik Csapak 2024-04-30 9:39 ` [pbs-devel] [PATCH proxmox-backup 2/2] tape: use datastores 'read-thread' for tape backup Dominik Csapak @ 2024-05-02 12:22 ` Thomas Lamprecht 2024-05-02 13:14 ` Dominik Csapak 1 sibling, 1 reply; 5+ messages in thread From: Thomas Lamprecht @ 2024-05-02 12:22 UTC (permalink / raw) To: Proxmox Backup Server development discussion, Dominik Csapak subject should have s/reading/verification read/ and it's one option so singular please. On 30/04/2024 11:39, Dominik Csapak wrote: > adds a new 'read-threads' tuning options to datastores that control > how many threads are used for verification operations > > depending on the underlying storage and the used cpu, this can make a > big difference in verification time. Not that I don't believe you, but actual numbers would be nice to have here ;-) > > the default remains at the current 4 threads. > > Signed-off-by: Dominik Csapak <d.csapak@proxmox.com> > --- > pbs-api-types/src/datastore.rs | 9 +++++++++ > pbs-datastore/src/datastore.rs | 14 ++++++++++++++ > src/backup/verify.rs | 2 +- > www/Utils.js | 5 +++++ > www/datastore/OptionView.js | 8 ++++++++ > 5 files changed, 37 insertions(+), 1 deletion(-) > > diff --git a/pbs-api-types/src/datastore.rs b/pbs-api-types/src/datastore.rs > index 31767417a..2ad2ae063 100644 > --- a/pbs-api-types/src/datastore.rs > +++ b/pbs-api-types/src/datastore.rs > @@ -209,6 +209,13 @@ pub enum DatastoreFSyncLevel { > type: ChunkOrder, > optional: true, > }, > + "read-threads": { the name is confusing for users reading configs and devs reading the code using this, as one would expect that this is for all read operations, e.g., backup restore or even GC. A API description can be great to clear up subtle things, but it should not be required to get the very basic understanding on what this is used for. I'd rather have two different settings for this and the tape one, e.g. `verification-read-threads` and `tape-backup-read-threads` What I'd even prefer more is not having this at all, but rather auto-determine it from the amount of cores and the disk types (and possibly its bus), e.g.: - if hdd (/sys/block/sda/queue/rotational is 1) then always use one thread - if sdd then depending on thread count use 2 to 4 – for example, as pseudo code calculate it through #threads/4.max(4).min(1) Could be even better to have a pool per backing storage to reserve threads from, which is naturally more complex but the default behaviour should ideally be OK for most users, so IMO at least worth a good amount of consideration in how to makes this smarter per default – I could imagine that one might even get some hints from the kernel (IO) scheduler. > + description: "Controls how many threads are used for reading from the datastore for verification.", > + type: usize, > + optional: true, > + minimum: 1, > + default: 4, > + }, > }, > )] > #[derive(Serialize, Deserialize, Default)] > @@ -220,6 +227,8 @@ pub struct DatastoreTuning { > pub chunk_order: Option<ChunkOrder>, > #[serde(skip_serializing_if = "Option::is_none")] > pub sync_level: Option<DatastoreFSyncLevel>, > + #[serde(skip_serializing_if = "Option::is_none")] > + pub read_threads: Option<usize>, > } > > pub const DATASTORE_TUNING_STRING_SCHEMA: Schema = StringSchema::new("Datastore tuning options") > diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs > index f95da7615..2c23bd1f1 100644 > --- a/pbs-datastore/src/datastore.rs > +++ b/pbs-datastore/src/datastore.rs > @@ -14,6 +14,7 @@ use proxmox_schema::ApiType; > use proxmox_sys::error::SysError; > use proxmox_sys::fs::{file_read_optional_string, replace_file, CreateOptions}; > use proxmox_sys::fs::{lock_dir_noblock, DirLockGuard}; > +use proxmox_sys::linux::procfs::read_proc_stat; > use proxmox_sys::process_locker::ProcessLockSharedGuard; > use proxmox_sys::WorkerTaskContext; > use proxmox_sys::{task_log, task_warn}; > @@ -61,6 +62,7 @@ pub struct DataStoreImpl { > chunk_order: ChunkOrder, > last_digest: Option<[u8; 32]>, > sync_level: DatastoreFSyncLevel, > + read_threads: usize, if we really add this I'd do so through a struct that impls default and added as something 'thread_limits' here, having a `verification`, `tape_read`, ... members. > } > > impl DataStoreImpl { > @@ -75,6 +77,7 @@ impl DataStoreImpl { > chunk_order: Default::default(), > last_digest: None, > sync_level: Default::default(), > + read_threads: 0, > }) > } > } > @@ -313,6 +316,7 @@ impl DataStore { > chunk_order: tuning.chunk_order.unwrap_or_default(), > last_digest, > sync_level: tuning.sync_level.unwrap_or_default(), > + read_threads: tuning.read_threads.unwrap_or(4), > }) > } > > @@ -1377,6 +1381,16 @@ impl DataStore { > Ok(()) > } > > + /// returns the number of read thread that should be used for operations > + /// limits to the number of available cores (if reading from /proc/stat succeeds) > + pub fn get_read_threads(&self) -> usize { > + // if we cannot get the real cpu count, don't limit us > + let core_count = read_proc_stat() > + .map(|proc| proc.cpu_count as usize) > + .unwrap_or(usize::max_value()); > + self.inner.read_threads.clamp(1, core_count) > + } > + > /// Destroy a datastore. This requires that there are no active operations on the datastore. > /// > /// This is a synchronous operation and should be run in a worker-thread. > diff --git a/src/backup/verify.rs b/src/backup/verify.rs > index c972e5328..4537dbdc9 100644 > --- a/src/backup/verify.rs > +++ b/src/backup/verify.rs > @@ -125,7 +125,7 @@ fn verify_index_chunks( > > let decoder_pool = ParallelHandler::new( > "verify chunk decoder", > - 4, > + datastore2.get_read_threads(), > move |(chunk, digest, size): (DataBlob, [u8; 32], u64)| { > let chunk_crypt_mode = match chunk.crypt_mode() { > Err(err) => { > diff --git a/www/Utils.js b/www/Utils.js > index 1d7351a32..8fd102486 100644 > --- a/www/Utils.js > +++ b/www/Utils.js > @@ -790,6 +790,11 @@ Ext.define('PBS.Utils', { > sync = PBS.Utils.tuningOptions['sync-level'][sync ?? '__default__']; > options.push(`${gettext('Sync Level')}: ${sync}`); > > + let readThreads = tuning['read-threads']; > + delete tuning['read-threads']; > + readThreads ??= Proxmox.Utils.defaultText + ` (4)`; > + options.push(`${gettext('Read Threads')}: ${readThreads}`); > + > for (const [k, v] of Object.entries(tuning)) { > options.push(`${k}: ${v}`); > } > diff --git a/www/datastore/OptionView.js b/www/datastore/OptionView.js > index e1f38af6f..ee90f1dca 100644 > --- a/www/datastore/OptionView.js > +++ b/www/datastore/OptionView.js > @@ -271,6 +271,14 @@ Ext.define('PBS.Datastore.Options', { > deleteEmpty: true, > value: '__default__', > }, > + { > + xtype: 'proxmoxintegerfield', > + name: 'read-threads', > + fieldLabel: gettext('Read Threads'), without any hint or tooltip this is just a completely confusingly option.. If it would be specialized for the actual different operations (now and future one) > + min: 0, > + emptyText: '4', > + deleteEmpty: true, > + }, > ], > }, > }, _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [pbs-devel] [PATCH proxmox-backup 1/2] datastore: add tuning options for the number of reading threads 2024-05-02 12:22 ` [pbs-devel] [PATCH proxmox-backup 1/2] datastore: add tuning options for the number of reading threads Thomas Lamprecht @ 2024-05-02 13:14 ` Dominik Csapak 0 siblings, 0 replies; 5+ messages in thread From: Dominik Csapak @ 2024-05-02 13:14 UTC (permalink / raw) To: Thomas Lamprecht, Proxmox Backup Server development discussion On 5/2/24 14:22, Thomas Lamprecht wrote: > subject should have s/reading/verification read/ and it's one option > so singular please. sure > > On 30/04/2024 11:39, Dominik Csapak wrote: >> adds a new 'read-threads' tuning options to datastores that control >> how many threads are used for verification operations >> >> depending on the underlying storage and the used cpu, this can make a >> big difference in verification time. > > Not that I don't believe you, but actual numbers would be nice to have > here ;-) of course. i assumed that there would be a v2 anyway, so i sent it out without doing any big benchmarks and the goal that maybe some other people could test other setups besides what i have here i'd include those benchmarks then in the commit message ofc > >> >> the default remains at the current 4 threads. >> >> Signed-off-by: Dominik Csapak <d.csapak@proxmox.com> >> --- >> pbs-api-types/src/datastore.rs | 9 +++++++++ >> pbs-datastore/src/datastore.rs | 14 ++++++++++++++ >> src/backup/verify.rs | 2 +- >> www/Utils.js | 5 +++++ >> www/datastore/OptionView.js | 8 ++++++++ >> 5 files changed, 37 insertions(+), 1 deletion(-) >> >> diff --git a/pbs-api-types/src/datastore.rs b/pbs-api-types/src/datastore.rs >> index 31767417a..2ad2ae063 100644 >> --- a/pbs-api-types/src/datastore.rs >> +++ b/pbs-api-types/src/datastore.rs >> @@ -209,6 +209,13 @@ pub enum DatastoreFSyncLevel { >> type: ChunkOrder, >> optional: true, >> }, >> + "read-threads": { > > the name is confusing for users reading configs and devs reading the code > using this, as one would expect that this is for all read operations, e.g., > backup restore or even GC. A API description can be great to clear up subtle > things, but it should not be required to get the very basic understanding > on what this is used for. > > I'd rather have two different settings for this and the tape one, e.g. > `verification-read-threads` and `tape-backup-read-threads` yes that makes sense. i really should have reversed the patches as that was what i was doing: improving tape backup and then noticing that we hard coded the 4 verify threads. > > What I'd even prefer more is not having this at all, but rather auto-determine > it from the amount of cores and the disk types (and possibly its bus), e.g.: > > - if hdd (/sys/block/sda/queue/rotational is 1) then always use one thread > - if sdd then depending on thread count use 2 to 4 – for example, as pseudo > code calculate it through #threads/4.max(4).min(1) we could do such a thing, but imho only as a default. I think there are too many variables to consider to guess right for every setup. E.g. the single thread for even a single spinning disk is super bad for the tape backup (at least in my setup) Also in some setups detecting the underlying storage configuration is impossible e.g. behind a raid controller. Also fabian mentioned off-list to me, that we cannot know how much resources pbs should use of the host machine. E.g. if one regularly schedules multiple verification/tape backup jobs in parallel. > > Could be even better to have a pool per backing storage to reserve threads > from, which is naturally more complex but the default behaviour should > ideally be OK for most users, so IMO at least worth a good amount of > consideration in how to makes this smarter per default – I could imagine > that one might even get some hints from the kernel (IO) scheduler. mhmm yeah i can look into that, it should not be that hard to have some 'maximum-tape-backup-threads' where each job 'borrows' some for each run. Problem is what to do with jobs that start when all threads are used up? > >> + description: "Controls how many threads are used for reading from the datastore for verification.", >> + type: usize, >> + optional: true, >> + minimum: 1, >> + default: 4, >> + }, >> }, >> )] >> #[derive(Serialize, Deserialize, Default)] >> @@ -220,6 +227,8 @@ pub struct DatastoreTuning { >> pub chunk_order: Option<ChunkOrder>, >> #[serde(skip_serializing_if = "Option::is_none")] >> pub sync_level: Option<DatastoreFSyncLevel>, >> + #[serde(skip_serializing_if = "Option::is_none")] >> + pub read_threads: Option<usize>, >> } >> >> pub const DATASTORE_TUNING_STRING_SCHEMA: Schema = StringSchema::new("Datastore tuning options") >> diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs >> index f95da7615..2c23bd1f1 100644 >> --- a/pbs-datastore/src/datastore.rs >> +++ b/pbs-datastore/src/datastore.rs >> @@ -14,6 +14,7 @@ use proxmox_schema::ApiType; >> use proxmox_sys::error::SysError; >> use proxmox_sys::fs::{file_read_optional_string, replace_file, CreateOptions}; >> use proxmox_sys::fs::{lock_dir_noblock, DirLockGuard}; >> +use proxmox_sys::linux::procfs::read_proc_stat; >> use proxmox_sys::process_locker::ProcessLockSharedGuard; >> use proxmox_sys::WorkerTaskContext; >> use proxmox_sys::{task_log, task_warn}; >> @@ -61,6 +62,7 @@ pub struct DataStoreImpl { >> chunk_order: ChunkOrder, >> last_digest: Option<[u8; 32]>, >> sync_level: DatastoreFSyncLevel, >> + read_threads: usize, > > if we really add this I'd do so through a struct that impls default and added > as something 'thread_limits' here, having a `verification`, `tape_read`, ... > members. sure > >> } >> >> impl DataStoreImpl { >> @@ -75,6 +77,7 @@ impl DataStoreImpl { >> chunk_order: Default::default(), >> last_digest: None, >> sync_level: Default::default(), >> + read_threads: 0, >> }) >> } >> } >> @@ -313,6 +316,7 @@ impl DataStore { >> chunk_order: tuning.chunk_order.unwrap_or_default(), >> last_digest, >> sync_level: tuning.sync_level.unwrap_or_default(), >> + read_threads: tuning.read_threads.unwrap_or(4), >> }) >> } >> >> @@ -1377,6 +1381,16 @@ impl DataStore { >> Ok(()) >> } >> >> + /// returns the number of read thread that should be used for operations >> + /// limits to the number of available cores (if reading from /proc/stat succeeds) >> + pub fn get_read_threads(&self) -> usize { >> + // if we cannot get the real cpu count, don't limit us >> + let core_count = read_proc_stat() >> + .map(|proc| proc.cpu_count as usize) >> + .unwrap_or(usize::max_value()); >> + self.inner.read_threads.clamp(1, core_count) >> + } >> + >> /// Destroy a datastore. This requires that there are no active operations on the datastore. >> /// >> /// This is a synchronous operation and should be run in a worker-thread. >> diff --git a/src/backup/verify.rs b/src/backup/verify.rs >> index c972e5328..4537dbdc9 100644 >> --- a/src/backup/verify.rs >> +++ b/src/backup/verify.rs >> @@ -125,7 +125,7 @@ fn verify_index_chunks( >> >> let decoder_pool = ParallelHandler::new( >> "verify chunk decoder", >> - 4, >> + datastore2.get_read_threads(), >> move |(chunk, digest, size): (DataBlob, [u8; 32], u64)| { >> let chunk_crypt_mode = match chunk.crypt_mode() { >> Err(err) => { >> diff --git a/www/Utils.js b/www/Utils.js >> index 1d7351a32..8fd102486 100644 >> --- a/www/Utils.js >> +++ b/www/Utils.js >> @@ -790,6 +790,11 @@ Ext.define('PBS.Utils', { >> sync = PBS.Utils.tuningOptions['sync-level'][sync ?? '__default__']; >> options.push(`${gettext('Sync Level')}: ${sync}`); >> >> + let readThreads = tuning['read-threads']; >> + delete tuning['read-threads']; >> + readThreads ??= Proxmox.Utils.defaultText + ` (4)`; >> + options.push(`${gettext('Read Threads')}: ${readThreads}`); >> + >> for (const [k, v] of Object.entries(tuning)) { >> options.push(`${k}: ${v}`); >> } >> diff --git a/www/datastore/OptionView.js b/www/datastore/OptionView.js >> index e1f38af6f..ee90f1dca 100644 >> --- a/www/datastore/OptionView.js >> +++ b/www/datastore/OptionView.js >> @@ -271,6 +271,14 @@ Ext.define('PBS.Datastore.Options', { >> deleteEmpty: true, >> value: '__default__', >> }, >> + { >> + xtype: 'proxmoxintegerfield', >> + name: 'read-threads', >> + fieldLabel: gettext('Read Threads'), > > without any hint or tooltip this is just a completely confusingly > option.. If it would be specialized for the actual different > operations (now and future one) ok > >> + min: 0, >> + emptyText: '4', >> + deleteEmpty: true, >> + }, >> ], >> }, >> }, > _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2024-05-02 13:14 UTC | newest] Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2024-04-30 9:39 [pbs-devel] [PATCH proxmox-backup 1/2] datastore: add tuning options for the number of reading threads Dominik Csapak 2024-04-30 9:39 ` [pbs-devel] [PATCH proxmox-backup 2/2] tape: use datastores 'read-thread' for tape backup Dominik Csapak 2024-04-30 11:00 ` Dominik Csapak 2024-05-02 12:22 ` [pbs-devel] [PATCH proxmox-backup 1/2] datastore: add tuning options for the number of reading threads Thomas Lamprecht 2024-05-02 13:14 ` Dominik Csapak
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox