public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Christian Ebner <c.ebner@proxmox.com>
To: "Proxmox Backup Server development discussion"
	<pbs-devel@lists.proxmox.com>,
	"Fabian Grünbichler" <f.gruenbichler@proxmox.com>
Subject: Re: [pbs-devel] [PATCH v3 proxmox-backup 40/58] client: chunk stream: add dynamic entries injection queues
Date: Mon, 8 Apr 2024 15:54:31 +0200	[thread overview]
Message-ID: <f8d0c57b-27d4-469b-8148-d34863a4152f@proxmox.com> (raw)
In-Reply-To: <1712241225.maig1bup9p.astroid@yuna.none>

On 4/4/24 16:52, Fabian Grünbichler wrote:
> On March 28, 2024 1:36 pm, Christian Ebner wrote:
>> Adds a queue to the chunk stream to request forced boundaries at a
>> given offset within the stream and inject reused dynamic entries
>> after this boundary.
>>
>> The chunks are then passed along to the uploader stream using the
>> injection queue, which inserts them during upload.
>>
>> Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
>> ---
>> changes since version 2:
>> - combined queues into new optional struct
>> - refactoring
>>
>>   examples/test_chunk_speed2.rs                 |  2 +-
>>   pbs-client/src/backup_writer.rs               | 89 +++++++++++--------
>>   pbs-client/src/chunk_stream.rs                | 36 +++++++-
>>   pbs-client/src/pxar/create.rs                 |  6 +-
>>   pbs-client/src/pxar_backup_stream.rs          |  7 +-
>>   proxmox-backup-client/src/main.rs             | 31 ++++---
>>   .../src/proxmox_restore_daemon/api.rs         |  1 +
>>   pxar-bin/src/main.rs                          |  1 +
>>   tests/catar.rs                                |  1 +
>>   9 files changed, 121 insertions(+), 53 deletions(-)
>>
>> diff --git a/examples/test_chunk_speed2.rs b/examples/test_chunk_speed2.rs
>> index 3f69b436d..22dd14ce2 100644
>> --- a/examples/test_chunk_speed2.rs
>> +++ b/examples/test_chunk_speed2.rs
>> @@ -26,7 +26,7 @@ async fn run() -> Result<(), Error> {
>>           .map_err(Error::from);
>>   
>>       //let chunk_stream = FixedChunkStream::new(stream, 4*1024*1024);
>> -    let mut chunk_stream = ChunkStream::new(stream, None);
>> +    let mut chunk_stream = ChunkStream::new(stream, None, None);
>>   
>>       let start_time = std::time::Instant::now();
>>   
>> diff --git a/pbs-client/src/backup_writer.rs b/pbs-client/src/backup_writer.rs
>> index 8bd0e4f36..032d93da7 100644
>> --- a/pbs-client/src/backup_writer.rs
>> +++ b/pbs-client/src/backup_writer.rs
>> @@ -1,4 +1,4 @@
>> -use std::collections::HashSet;
>> +use std::collections::{HashSet, VecDeque};
>>   use std::future::Future;
>>   use std::os::unix::fs::OpenOptionsExt;
>>   use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
>> @@ -23,6 +23,7 @@ use pbs_tools::crypt_config::CryptConfig;
>>   
>>   use proxmox_human_byte::HumanByte;
>>   
>> +use super::inject_reused_chunks::{InjectChunks, InjectReusedChunks, InjectedChunksInfo};
>>   use super::merge_known_chunks::{MergeKnownChunks, MergedChunkInfo};
>>   
>>   use super::{H2Client, HttpClient};
>> @@ -265,6 +266,7 @@ impl BackupWriter {
>>           archive_name: &str,
>>           stream: impl Stream<Item = Result<bytes::BytesMut, Error>>,
>>           options: UploadOptions,
>> +        injection_queue: Option<Arc<Mutex<VecDeque<InjectChunks>>>>,
>>       ) -> Result<BackupStats, Error> {
>>           let known_chunks = Arc::new(Mutex::new(HashSet::new()));
>>   
>> @@ -341,6 +343,7 @@ impl BackupWriter {
>>                   None
>>               },
>>               options.compress,
>> +            injection_queue,
>>           )
>>           .await?;
>>   
>> @@ -637,6 +640,7 @@ impl BackupWriter {
>>           known_chunks: Arc<Mutex<HashSet<[u8; 32]>>>,
>>           crypt_config: Option<Arc<CryptConfig>>,
>>           compress: bool,
>> +        injection_queue: Option<Arc<Mutex<VecDeque<InjectChunks>>>>,
>>       ) -> impl Future<Output = Result<UploadStats, Error>> {
>>           let total_chunks = Arc::new(AtomicUsize::new(0));
>>           let total_chunks2 = total_chunks.clone();
>> @@ -663,48 +667,63 @@ impl BackupWriter {
>>           let index_csum_2 = index_csum.clone();
>>   
>>           stream
>> -            .and_then(move |data| {
>> -                let chunk_len = data.len();
>> +            .inject_reused_chunks(
>> +                injection_queue.unwrap_or_default(),
>> +                stream_len,
>> +                reused_len.clone(),
>> +                index_csum.clone(),
>> +            )
>> +            .and_then(move |chunk_info| match chunk_info {
> 
> for this part here I am still not sure whether doing all of the
> accounting here wouldn't be nicer..
> 

Moved almost all the accounting to here, only stream len is still 
required for the offset calculation in `inject_reused_chunks`.

> 
>> diff --git a/pbs-client/src/chunk_stream.rs b/pbs-client/src/chunk_stream.rs
>> index a45420ca0..6ac0c638b 100644
>> --- a/pbs-client/src/chunk_stream.rs
>> +++ b/pbs-client/src/chunk_stream.rs
>> @@ -38,15 +38,17 @@ pub struct ChunkStream<S: Unpin> {
>>       chunker: Chunker,
>>       buffer: BytesMut,
>>       scan_pos: usize,
>> +    injection_data: Option<InjectionData>,
>>   }
>>   
>>   impl<S: Unpin> ChunkStream<S> {
>> -    pub fn new(input: S, chunk_size: Option<usize>) -> Self {
>> +    pub fn new(input: S, chunk_size: Option<usize>, injection_data: Option<InjectionData>) -> Self {
>>           Self {
>>               input,
>>               chunker: Chunker::new(chunk_size.unwrap_or(4 * 1024 * 1024)),
>>               buffer: BytesMut::new(),
>>               scan_pos: 0,
>> +            injection_data,
>>           }
>>       }
>>   }
>> @@ -64,6 +66,34 @@ where
>>       fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
>>           let this = self.get_mut();
>>           loop {
>> +            if let Some(InjectionData {
>> +                boundaries,
>> +                injections,
>> +                consumed,
>> +            }) = this.injection_data.as_mut()
>> +            {
>> +                // Make sure to release this lock as soon as possible
>> +                let mut boundaries = boundaries.lock().unwrap();
>> +                if let Some(inject) = boundaries.pop_front() {
> 
> here I am a bit more wary that this popping and re-pushing might hurt
> performance..

> 
>> +                    let max = *consumed + this.buffer.len() as u64;
>> +                    if inject.boundary <= max {
>> +                        let chunk_size = (inject.boundary - *consumed) as usize;
>> +                        let result = this.buffer.split_to(chunk_size);
> 
> a comment or better variable naming would make this easier to follow
> along.. >
> "result" is a forced chunk that is created here because we've reached a
> point where we want to inject something afterwards..
> 

Improved the variable naming and added comments to clarify the 
functionality for the upcoming version of the patches.

> once more I am wondering here whether for the payload stream, a vastly
> simplified chunker that just picks the boundaries based on re-use and
> payload size(s) (to avoid the one file == one chunk pathological case
> for lots of small files) wouldn't improve performance :)

Do you suggest to have 2 chunker implementations and for the payload 
stream, instead of performing chunking by the statistical sliding window 
approach use the  provide the chunk boundaries by some interface rather 
than performing the chunking based on the statistical approach with the 
sliding window? As you mentioned in response to Dietmar on patch 49 of 
this patch series version?




  reply	other threads:[~2024-04-08 13:54 UTC|newest]

Thread overview: 122+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-28 12:36 [pbs-devel] [PATCH v3 pxar proxmox-backup 00/58] fix #3174: improve file-level backup Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 01/58] encoder: fix two typos in comments Christian Ebner
2024-04-03  9:12   ` [pbs-devel] applied: " Fabian Grünbichler
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 02/58] format/examples: add PXAR_PAYLOAD_REF entry header Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 03/58] decoder: add method to read payload references Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 04/58] decoder: factor out skip part from skip_entry Christian Ebner
2024-04-03  9:18   ` Fabian Grünbichler
2024-04-03 11:02     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 05/58] encoder: add optional output writer for file payloads Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 06/58] encoder: move to stack based state tracking Christian Ebner
2024-04-03  9:54   ` Fabian Grünbichler
2024-04-03 11:01     ` Christian Ebner
2024-04-04  8:48       ` Fabian Grünbichler
2024-04-04  9:04         ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 07/58] decoder/accessor: add optional payload input stream Christian Ebner
2024-04-03 10:38   ` Fabian Grünbichler
2024-04-03 11:47     ` Christian Ebner
2024-04-03 12:18     ` Christian Ebner
2024-04-04  8:46       ` Fabian Grünbichler
2024-04-04  9:49         ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 08/58] encoder: add payload reference capability Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 09/58] encoder: add payload position capability Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 10/58] encoder: add payload advance capability Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 11/58] encoder/format: finish payload stream with marker Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 12/58] format: add payload stream start marker Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 13/58] format: add pxar format version entry Christian Ebner
2024-04-03 11:41   ` Fabian Grünbichler
2024-04-03 13:31     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 14/58] format/encoder/decoder: add entry type cli params Christian Ebner
2024-04-03 12:01   ` Fabian Grünbichler
2024-04-03 14:41     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 15/58] client: pxar: switch to stack based encoder state Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 16/58] client: backup writer: only borrow http client Christian Ebner
2024-04-08  9:04   ` [pbs-devel] applied: " Fabian Grünbichler
2024-04-08  9:17     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 17/58] client: backup: factor out extension from backup target Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 18/58] client: backup: early check for fixed index type Christian Ebner
2024-04-08  9:05   ` [pbs-devel] applied: " Fabian Grünbichler
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 19/58] client: pxar: combine writer params into struct Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 20/58] client: backup: split payload to dedicated stream Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 21/58] client: helper: add helpers for creating reader instances Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 22/58] client: helper: add method for split archive name mapping Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 23/58] client: restore: read payload from dedicated index Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 24/58] tools: cover meta extension for pxar archives Christian Ebner
2024-04-04  9:01   ` Fabian Grünbichler
2024-04-04  9:06     ` Christian Ebner
2024-04-04  9:10       ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 25/58] restore: " Christian Ebner
2024-04-04  9:02   ` Fabian Grünbichler
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 26/58] client: mount: make split pxar archives mountable Christian Ebner
2024-04-04  9:43   ` Fabian Grünbichler
2024-04-04 13:29     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 27/58] api: datastore: refactor getting local chunk reader Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 28/58] api: datastore: attach optional payload " Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 29/58] catalog: shell: factor out pxar fuse reader instantiation Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 30/58] catalog: shell: redirect payload reader for split streams Christian Ebner
2024-04-04  9:49   ` Fabian Grünbichler
2024-04-04 15:52     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 31/58] www: cover meta extension for pxar archives Christian Ebner
2024-04-04 10:01   ` Fabian Grünbichler
2024-04-04 14:51     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 32/58] pxar: add optional payload input for achive restore Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 33/58] pxar: add more context to extraction error Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 34/58] client: pxar: include payload offset in output Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 35/58] pxar: show padding in debug output on archive list Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 36/58] datastore: dynamic index: add method to get digest Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 37/58] client: pxar: helper for lookup of reusable dynamic entries Christian Ebner
2024-04-04 12:54   ` Fabian Grünbichler
2024-04-04 17:13     ` Christian Ebner
2024-04-05  7:22       ` Christian Ebner
2024-04-05 11:28   ` Fabian Grünbichler
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 38/58] upload stream: impl reused chunk injector Christian Ebner
2024-04-04 14:24   ` Fabian Grünbichler
2024-04-05 10:26     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 39/58] client: chunk stream: add struct to hold injection state Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 40/58] client: chunk stream: add dynamic entries injection queues Christian Ebner
2024-04-04 14:52   ` Fabian Grünbichler
2024-04-08 13:54     ` Christian Ebner [this message]
2024-04-09  7:19       ` Fabian Grünbichler
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 41/58] specs: add backup detection mode specification Christian Ebner
2024-04-04 14:54   ` Fabian Grünbichler
2024-04-08 13:36     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 42/58] client: implement prepare reference method Christian Ebner
2024-04-05  8:01   ` Fabian Grünbichler
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 43/58] client: pxar: implement store to insert chunks on caching Christian Ebner
2024-04-05  7:52   ` Fabian Grünbichler
2024-04-09  9:12     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 44/58] client: pxar: add previous reference to archiver Christian Ebner
2024-04-04 15:04   ` Fabian Grünbichler
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 45/58] client: pxar: add method for metadata comparison Christian Ebner
2024-04-05  8:08   ` Fabian Grünbichler
2024-04-05  8:14     ` Christian Ebner
2024-04-09 12:52       ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 46/58] pxar: caching: add look-ahead cache types Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 47/58] client: pxar: add look-ahead caching Christian Ebner
2024-04-05  8:33   ` Fabian Grünbichler
2024-04-09 14:53     ` Christian Ebner
     [not found]       ` <<dce38c53-f3e7-47ac-b1fd-a63daaabbcec@proxmox.com>
2024-04-10  7:03         ` Fabian Grünbichler
2024-04-10  7:11           ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 48/58] fix #3174: client: pxar: enable caching and meta comparison Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 49/58] client: backup: increase average chunk size for metadata Christian Ebner
2024-04-05  9:42   ` Fabian Grünbichler
2024-04-05 10:49     ` Dietmar Maurer
2024-04-08  8:28       ` Fabian Grünbichler
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 50/58] client: backup writer: add injected chunk count to stats Christian Ebner
2024-03-28 12:37 ` [pbs-devel] [PATCH v3 proxmox-backup 51/58] pxar: create: show chunk injection stats debug output Christian Ebner
2024-04-05  9:47   ` Fabian Grünbichler
2024-04-10 10:00     ` Christian Ebner
2024-03-28 12:37 ` [pbs-devel] [PATCH v3 proxmox-backup 52/58] client: pxar: add entry kind format version Christian Ebner
2024-03-28 12:37 ` [pbs-devel] [PATCH v3 proxmox-backup 53/58] client: pxar: opt encode cli exclude patterns as CliParams Christian Ebner
2024-04-05  9:49   ` Fabian Grünbichler
2024-03-28 12:37 ` [pbs-devel] [PATCH v3 proxmox-backup 54/58] client: pxar: add flow chart for metadata change detection Christian Ebner
2024-04-05 10:16   ` Fabian Grünbichler
2024-04-10 10:04     ` Christian Ebner
2024-03-28 12:37 ` [pbs-devel] [PATCH v3 proxmox-backup 55/58] docs: describe file format for split payload files Christian Ebner
2024-04-05 10:26   ` Fabian Grünbichler
2024-03-28 12:37 ` [pbs-devel] [PATCH v3 proxmox-backup 56/58] docs: add section describing change detection mode Christian Ebner
2024-04-05 11:22   ` Fabian Grünbichler
2024-03-28 12:37 ` [pbs-devel] [PATCH v3 proxmox-backup 57/58] test-suite: add detection mode change benchmark Christian Ebner
2024-03-28 12:37 ` [pbs-devel] [PATCH v3 proxmox-backup 58/58] test-suite: add bin to deb, add shell completions Christian Ebner
2024-04-05 11:39 ` [pbs-devel] [PATCH v3 pxar proxmox-backup 00/58] fix #3174: improve file-level backup Fabian Grünbichler
2024-04-29 12:13 ` Christian Ebner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f8d0c57b-27d4-469b-8148-d34863a4152f@proxmox.com \
    --to=c.ebner@proxmox.com \
    --cc=f.gruenbichler@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal