public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: "Fabian Grünbichler" <f.gruenbichler@proxmox.com>
To: Proxmox Backup Server development discussion
	<pbs-devel@lists.proxmox.com>
Subject: Re: [pbs-devel] [PATCH v3 proxmox-backup 40/58] client: chunk stream: add dynamic entries injection queues
Date: Thu, 04 Apr 2024 16:52:15 +0200	[thread overview]
Message-ID: <1712241225.maig1bup9p.astroid@yuna.none> (raw)
In-Reply-To: <20240328123707.336951-41-c.ebner@proxmox.com>

On March 28, 2024 1:36 pm, Christian Ebner wrote:
> Adds a queue to the chunk stream to request forced boundaries at a
> given offset within the stream and inject reused dynamic entries
> after this boundary.
> 
> The chunks are then passed along to the uploader stream using the
> injection queue, which inserts them during upload.
> 
> Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
> ---
> changes since version 2:
> - combined queues into new optional struct
> - refactoring
> 
>  examples/test_chunk_speed2.rs                 |  2 +-
>  pbs-client/src/backup_writer.rs               | 89 +++++++++++--------
>  pbs-client/src/chunk_stream.rs                | 36 +++++++-
>  pbs-client/src/pxar/create.rs                 |  6 +-
>  pbs-client/src/pxar_backup_stream.rs          |  7 +-
>  proxmox-backup-client/src/main.rs             | 31 ++++---
>  .../src/proxmox_restore_daemon/api.rs         |  1 +
>  pxar-bin/src/main.rs                          |  1 +
>  tests/catar.rs                                |  1 +
>  9 files changed, 121 insertions(+), 53 deletions(-)
> 
> diff --git a/examples/test_chunk_speed2.rs b/examples/test_chunk_speed2.rs
> index 3f69b436d..22dd14ce2 100644
> --- a/examples/test_chunk_speed2.rs
> +++ b/examples/test_chunk_speed2.rs
> @@ -26,7 +26,7 @@ async fn run() -> Result<(), Error> {
>          .map_err(Error::from);
>  
>      //let chunk_stream = FixedChunkStream::new(stream, 4*1024*1024);
> -    let mut chunk_stream = ChunkStream::new(stream, None);
> +    let mut chunk_stream = ChunkStream::new(stream, None, None);
>  
>      let start_time = std::time::Instant::now();
>  
> diff --git a/pbs-client/src/backup_writer.rs b/pbs-client/src/backup_writer.rs
> index 8bd0e4f36..032d93da7 100644
> --- a/pbs-client/src/backup_writer.rs
> +++ b/pbs-client/src/backup_writer.rs
> @@ -1,4 +1,4 @@
> -use std::collections::HashSet;
> +use std::collections::{HashSet, VecDeque};
>  use std::future::Future;
>  use std::os::unix::fs::OpenOptionsExt;
>  use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
> @@ -23,6 +23,7 @@ use pbs_tools::crypt_config::CryptConfig;
>  
>  use proxmox_human_byte::HumanByte;
>  
> +use super::inject_reused_chunks::{InjectChunks, InjectReusedChunks, InjectedChunksInfo};
>  use super::merge_known_chunks::{MergeKnownChunks, MergedChunkInfo};
>  
>  use super::{H2Client, HttpClient};
> @@ -265,6 +266,7 @@ impl BackupWriter {
>          archive_name: &str,
>          stream: impl Stream<Item = Result<bytes::BytesMut, Error>>,
>          options: UploadOptions,
> +        injection_queue: Option<Arc<Mutex<VecDeque<InjectChunks>>>>,
>      ) -> Result<BackupStats, Error> {
>          let known_chunks = Arc::new(Mutex::new(HashSet::new()));
>  
> @@ -341,6 +343,7 @@ impl BackupWriter {
>                  None
>              },
>              options.compress,
> +            injection_queue,
>          )
>          .await?;
>  
> @@ -637,6 +640,7 @@ impl BackupWriter {
>          known_chunks: Arc<Mutex<HashSet<[u8; 32]>>>,
>          crypt_config: Option<Arc<CryptConfig>>,
>          compress: bool,
> +        injection_queue: Option<Arc<Mutex<VecDeque<InjectChunks>>>>,
>      ) -> impl Future<Output = Result<UploadStats, Error>> {
>          let total_chunks = Arc::new(AtomicUsize::new(0));
>          let total_chunks2 = total_chunks.clone();
> @@ -663,48 +667,63 @@ impl BackupWriter {
>          let index_csum_2 = index_csum.clone();
>  
>          stream
> -            .and_then(move |data| {
> -                let chunk_len = data.len();
> +            .inject_reused_chunks(
> +                injection_queue.unwrap_or_default(),
> +                stream_len,
> +                reused_len.clone(),
> +                index_csum.clone(),
> +            )
> +            .and_then(move |chunk_info| match chunk_info {

for this part here I am still not sure whether doing all of the
accounting here wouldn't be nicer..

> [..]

> diff --git a/pbs-client/src/chunk_stream.rs b/pbs-client/src/chunk_stream.rs
> index a45420ca0..6ac0c638b 100644
> --- a/pbs-client/src/chunk_stream.rs
> +++ b/pbs-client/src/chunk_stream.rs
> @@ -38,15 +38,17 @@ pub struct ChunkStream<S: Unpin> {
>      chunker: Chunker,
>      buffer: BytesMut,
>      scan_pos: usize,
> +    injection_data: Option<InjectionData>,
>  }
>  
>  impl<S: Unpin> ChunkStream<S> {
> -    pub fn new(input: S, chunk_size: Option<usize>) -> Self {
> +    pub fn new(input: S, chunk_size: Option<usize>, injection_data: Option<InjectionData>) -> Self {
>          Self {
>              input,
>              chunker: Chunker::new(chunk_size.unwrap_or(4 * 1024 * 1024)),
>              buffer: BytesMut::new(),
>              scan_pos: 0,
> +            injection_data,
>          }
>      }
>  }
> @@ -64,6 +66,34 @@ where
>      fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
>          let this = self.get_mut();
>          loop {
> +            if let Some(InjectionData {
> +                boundaries,
> +                injections,
> +                consumed,
> +            }) = this.injection_data.as_mut()
> +            {
> +                // Make sure to release this lock as soon as possible
> +                let mut boundaries = boundaries.lock().unwrap();
> +                if let Some(inject) = boundaries.pop_front() {

here I am a bit more wary that this popping and re-pushing might hurt
performance..

> +                    let max = *consumed + this.buffer.len() as u64;
> +                    if inject.boundary <= max {
> +                        let chunk_size = (inject.boundary - *consumed) as usize;
> +                        let result = this.buffer.split_to(chunk_size);

a comment or better variable naming would make this easier to follow
along..

"result" is a forced chunk that is created here because we've reached a
point where we want to inject something afterwards..

once more I am wondering here whether for the payload stream, a vastly
simplified chunker that just picks the boundaries based on re-use and
payload size(s) (to avoid the one file == one chunk pathological case
for lots of small files) wouldn't improve performance :)

> +                        *consumed += chunk_size as u64;
> +                        this.scan_pos = 0;
> +
> +                        // Add the size of the injected chunks to consumed, so chunk stream offsets
> +                        // are in sync with the rest of the archive.
> +                        *consumed += inject.size as u64;
> +
> +                        injections.lock().unwrap().push_back(inject);
> +
> +                        return Poll::Ready(Some(Ok(result)));
> +                    }
> +                    boundaries.push_front(inject);
> +                }
> +            }
> +
>              if this.scan_pos < this.buffer.len() {
>                  let boundary = this.chunker.scan(&this.buffer[this.scan_pos..]);
>  
> @@ -74,7 +104,11 @@ where
>                      // continue poll
>                  } else if chunk_size <= this.buffer.len() {
>                      let result = this.buffer.split_to(chunk_size);
> +                    if let Some(InjectionData { consumed, .. }) = this.injection_data.as_mut() {
> +                        *consumed += chunk_size as u64;
> +                    }
>                      this.scan_pos = 0;
> +
>                      return Poll::Ready(Some(Ok(result)));
>                  } else {
>                      panic!("got unexpected chunk boundary from chunker");

> [..]




  reply	other threads:[~2024-04-04 14:52 UTC|newest]

Thread overview: 122+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-28 12:36 [pbs-devel] [PATCH v3 pxar proxmox-backup 00/58] fix #3174: improve file-level backup Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 01/58] encoder: fix two typos in comments Christian Ebner
2024-04-03  9:12   ` [pbs-devel] applied: " Fabian Grünbichler
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 02/58] format/examples: add PXAR_PAYLOAD_REF entry header Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 03/58] decoder: add method to read payload references Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 04/58] decoder: factor out skip part from skip_entry Christian Ebner
2024-04-03  9:18   ` Fabian Grünbichler
2024-04-03 11:02     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 05/58] encoder: add optional output writer for file payloads Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 06/58] encoder: move to stack based state tracking Christian Ebner
2024-04-03  9:54   ` Fabian Grünbichler
2024-04-03 11:01     ` Christian Ebner
2024-04-04  8:48       ` Fabian Grünbichler
2024-04-04  9:04         ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 07/58] decoder/accessor: add optional payload input stream Christian Ebner
2024-04-03 10:38   ` Fabian Grünbichler
2024-04-03 11:47     ` Christian Ebner
2024-04-03 12:18     ` Christian Ebner
2024-04-04  8:46       ` Fabian Grünbichler
2024-04-04  9:49         ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 08/58] encoder: add payload reference capability Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 09/58] encoder: add payload position capability Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 10/58] encoder: add payload advance capability Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 11/58] encoder/format: finish payload stream with marker Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 12/58] format: add payload stream start marker Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 13/58] format: add pxar format version entry Christian Ebner
2024-04-03 11:41   ` Fabian Grünbichler
2024-04-03 13:31     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 pxar 14/58] format/encoder/decoder: add entry type cli params Christian Ebner
2024-04-03 12:01   ` Fabian Grünbichler
2024-04-03 14:41     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 15/58] client: pxar: switch to stack based encoder state Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 16/58] client: backup writer: only borrow http client Christian Ebner
2024-04-08  9:04   ` [pbs-devel] applied: " Fabian Grünbichler
2024-04-08  9:17     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 17/58] client: backup: factor out extension from backup target Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 18/58] client: backup: early check for fixed index type Christian Ebner
2024-04-08  9:05   ` [pbs-devel] applied: " Fabian Grünbichler
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 19/58] client: pxar: combine writer params into struct Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 20/58] client: backup: split payload to dedicated stream Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 21/58] client: helper: add helpers for creating reader instances Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 22/58] client: helper: add method for split archive name mapping Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 23/58] client: restore: read payload from dedicated index Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 24/58] tools: cover meta extension for pxar archives Christian Ebner
2024-04-04  9:01   ` Fabian Grünbichler
2024-04-04  9:06     ` Christian Ebner
2024-04-04  9:10       ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 25/58] restore: " Christian Ebner
2024-04-04  9:02   ` Fabian Grünbichler
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 26/58] client: mount: make split pxar archives mountable Christian Ebner
2024-04-04  9:43   ` Fabian Grünbichler
2024-04-04 13:29     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 27/58] api: datastore: refactor getting local chunk reader Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 28/58] api: datastore: attach optional payload " Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 29/58] catalog: shell: factor out pxar fuse reader instantiation Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 30/58] catalog: shell: redirect payload reader for split streams Christian Ebner
2024-04-04  9:49   ` Fabian Grünbichler
2024-04-04 15:52     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 31/58] www: cover meta extension for pxar archives Christian Ebner
2024-04-04 10:01   ` Fabian Grünbichler
2024-04-04 14:51     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 32/58] pxar: add optional payload input for achive restore Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 33/58] pxar: add more context to extraction error Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 34/58] client: pxar: include payload offset in output Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 35/58] pxar: show padding in debug output on archive list Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 36/58] datastore: dynamic index: add method to get digest Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 37/58] client: pxar: helper for lookup of reusable dynamic entries Christian Ebner
2024-04-04 12:54   ` Fabian Grünbichler
2024-04-04 17:13     ` Christian Ebner
2024-04-05  7:22       ` Christian Ebner
2024-04-05 11:28   ` Fabian Grünbichler
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 38/58] upload stream: impl reused chunk injector Christian Ebner
2024-04-04 14:24   ` Fabian Grünbichler
2024-04-05 10:26     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 39/58] client: chunk stream: add struct to hold injection state Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 40/58] client: chunk stream: add dynamic entries injection queues Christian Ebner
2024-04-04 14:52   ` Fabian Grünbichler [this message]
2024-04-08 13:54     ` Christian Ebner
2024-04-09  7:19       ` Fabian Grünbichler
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 41/58] specs: add backup detection mode specification Christian Ebner
2024-04-04 14:54   ` Fabian Grünbichler
2024-04-08 13:36     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 42/58] client: implement prepare reference method Christian Ebner
2024-04-05  8:01   ` Fabian Grünbichler
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 43/58] client: pxar: implement store to insert chunks on caching Christian Ebner
2024-04-05  7:52   ` Fabian Grünbichler
2024-04-09  9:12     ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 44/58] client: pxar: add previous reference to archiver Christian Ebner
2024-04-04 15:04   ` Fabian Grünbichler
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 45/58] client: pxar: add method for metadata comparison Christian Ebner
2024-04-05  8:08   ` Fabian Grünbichler
2024-04-05  8:14     ` Christian Ebner
2024-04-09 12:52       ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 46/58] pxar: caching: add look-ahead cache types Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 47/58] client: pxar: add look-ahead caching Christian Ebner
2024-04-05  8:33   ` Fabian Grünbichler
2024-04-09 14:53     ` Christian Ebner
     [not found]       ` <<dce38c53-f3e7-47ac-b1fd-a63daaabbcec@proxmox.com>
2024-04-10  7:03         ` Fabian Grünbichler
2024-04-10  7:11           ` Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 48/58] fix #3174: client: pxar: enable caching and meta comparison Christian Ebner
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 49/58] client: backup: increase average chunk size for metadata Christian Ebner
2024-04-05  9:42   ` Fabian Grünbichler
2024-04-05 10:49     ` Dietmar Maurer
2024-04-08  8:28       ` Fabian Grünbichler
2024-03-28 12:36 ` [pbs-devel] [PATCH v3 proxmox-backup 50/58] client: backup writer: add injected chunk count to stats Christian Ebner
2024-03-28 12:37 ` [pbs-devel] [PATCH v3 proxmox-backup 51/58] pxar: create: show chunk injection stats debug output Christian Ebner
2024-04-05  9:47   ` Fabian Grünbichler
2024-04-10 10:00     ` Christian Ebner
2024-03-28 12:37 ` [pbs-devel] [PATCH v3 proxmox-backup 52/58] client: pxar: add entry kind format version Christian Ebner
2024-03-28 12:37 ` [pbs-devel] [PATCH v3 proxmox-backup 53/58] client: pxar: opt encode cli exclude patterns as CliParams Christian Ebner
2024-04-05  9:49   ` Fabian Grünbichler
2024-03-28 12:37 ` [pbs-devel] [PATCH v3 proxmox-backup 54/58] client: pxar: add flow chart for metadata change detection Christian Ebner
2024-04-05 10:16   ` Fabian Grünbichler
2024-04-10 10:04     ` Christian Ebner
2024-03-28 12:37 ` [pbs-devel] [PATCH v3 proxmox-backup 55/58] docs: describe file format for split payload files Christian Ebner
2024-04-05 10:26   ` Fabian Grünbichler
2024-03-28 12:37 ` [pbs-devel] [PATCH v3 proxmox-backup 56/58] docs: add section describing change detection mode Christian Ebner
2024-04-05 11:22   ` Fabian Grünbichler
2024-03-28 12:37 ` [pbs-devel] [PATCH v3 proxmox-backup 57/58] test-suite: add detection mode change benchmark Christian Ebner
2024-03-28 12:37 ` [pbs-devel] [PATCH v3 proxmox-backup 58/58] test-suite: add bin to deb, add shell completions Christian Ebner
2024-04-05 11:39 ` [pbs-devel] [PATCH v3 pxar proxmox-backup 00/58] fix #3174: improve file-level backup Fabian Grünbichler
2024-04-29 12:13 ` Christian Ebner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1712241225.maig1bup9p.astroid@yuna.none \
    --to=f.gruenbichler@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal