public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Christian Ebner <c.ebner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [RFC proxmox-backup 16/20] fix #3174: upload stream: impl reused chunk injector
Date: Fri, 22 Sep 2023 09:16:17 +0200	[thread overview]
Message-ID: <20230922071621.12670-17-c.ebner@proxmox.com> (raw)
In-Reply-To: <20230922071621.12670-1-c.ebner@proxmox.com>

In order to be included in the backups index file, the reused chunks
which store the payload of skipped files during pxar encoding have to be
inserted after the encoder has written the pxar appendix entry type.

The chunker forces a chunk boundary after this marker and queues the
list of chunks to be uploaded thereafter.
This implements the logic to inject the chunks into the chunk upload
stream after such a boundary is requested, by looping over the queued
chunks and inserting them into the stream.

Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
---
 pbs-client/src/inject_reused_chunks.rs | 123 +++++++++++++++++++++++++
 pbs-client/src/lib.rs                  |   1 +
 2 files changed, 124 insertions(+)
 create mode 100644 pbs-client/src/inject_reused_chunks.rs

diff --git a/pbs-client/src/inject_reused_chunks.rs b/pbs-client/src/inject_reused_chunks.rs
new file mode 100644
index 00000000..01cb1350
--- /dev/null
+++ b/pbs-client/src/inject_reused_chunks.rs
@@ -0,0 +1,123 @@
+use std::collections::VecDeque;
+use std::pin::Pin;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::{Arc, Mutex};
+use std::task::{Context, Poll};
+
+use anyhow::Error;
+use futures::{ready, Stream};
+use pin_project_lite::pin_project;
+
+use pbs_datastore::dynamic_index::DynamicEntry;
+
+pin_project! {
+    pub struct InjectReusedChunksQueue<S> {
+        #[pin]
+        input: S,
+        current: Option<InjectChunks>,
+        injection_queue: Arc<Mutex<VecDeque<InjectChunks>>>,
+        stream_len: Arc<AtomicUsize>,
+        index_csum: Arc<Mutex<Option<openssl::sha::Sha256>>>,
+    }
+}
+
+#[derive(Debug)]
+pub struct InjectChunks {
+    pub boundary: u64,
+    pub chunks: Vec<DynamicEntry>,
+    pub size: usize,
+}
+
+pub enum InjectedChunksInfo {
+    Known(Vec<(u64, [u8; 32])>),
+    Raw((u64, bytes::BytesMut)),
+}
+
+pub trait InjectReusedChunks: Sized {
+    fn inject_reused_chunks(
+        self,
+        injection_queue: Arc<Mutex<VecDeque<InjectChunks>>>,
+        stream_len: Arc<AtomicUsize>,
+        index_csum: Arc<Mutex<Option<openssl::sha::Sha256>>>,
+    ) -> InjectReusedChunksQueue<Self>;
+}
+
+impl<S> InjectReusedChunks for S
+where
+    S: Stream<Item = Result<bytes::BytesMut, Error>>,
+{
+    fn inject_reused_chunks(
+        self,
+        injection_queue: Arc<Mutex<VecDeque<InjectChunks>>>,
+        stream_len: Arc<AtomicUsize>,
+        index_csum: Arc<Mutex<Option<openssl::sha::Sha256>>>,
+    ) -> InjectReusedChunksQueue<Self> {
+        let current = injection_queue.lock().unwrap().pop_front();
+
+        InjectReusedChunksQueue {
+            input: self,
+            current,
+            injection_queue,
+            stream_len,
+            index_csum,
+        }
+    }
+}
+
+impl<S> Stream for InjectReusedChunksQueue<S>
+where
+    S: Stream<Item = Result<bytes::BytesMut, Error>>,
+{
+    type Item = Result<InjectedChunksInfo, Error>;
+
+    fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
+        let mut this = self.project();
+        loop {
+            let current = this.current.take();
+            if let Some(current) = current {
+                let mut chunks = Vec::new();
+                let mut guard = this.index_csum.lock().unwrap();
+                let csum = guard.as_mut().unwrap();
+
+                for chunk in current.chunks {
+                    let offset = this
+                        .stream_len
+                        .fetch_add(chunk.end() as usize, Ordering::SeqCst)
+                        as u64;
+                    let digest = chunk.digest();
+                    chunks.push((offset, digest));
+                    // Chunk end is assumed to be normalized to chunk size here
+                    let end_offset = offset + chunk.end();
+                    csum.update(&end_offset.to_le_bytes());
+                    csum.update(&digest);
+                }
+                let chunk_info = InjectedChunksInfo::Known(chunks);
+                return Poll::Ready(Some(Ok(chunk_info)));
+            }
+
+            match ready!(this.input.as_mut().poll_next(cx)) {
+                None => return Poll::Ready(None),
+                Some(Err(err)) => return Poll::Ready(Some(Err(err))),
+                Some(Ok(raw)) => {
+                    let chunk_size = raw.len();
+                    let offset = this.stream_len.fetch_add(chunk_size, Ordering::SeqCst) as u64;
+                    let mut injections = this.injection_queue.lock().unwrap();
+                    if let Some(inject) = injections.pop_front() {
+                        if inject.boundary == offset {
+                            let _ = this.current.insert(inject);
+                            // Should be injected here, directly jump to next loop iteration
+                            continue;
+                        } else if inject.boundary <= offset + chunk_size as u64 {
+                            let _ = this.current.insert(inject);
+                        } else {
+                            injections.push_front(inject);
+                        }
+                    }
+                    let data = InjectedChunksInfo::Raw((offset, raw));
+
+                    return Poll::Ready(Some(Ok(data)));
+                }
+            }
+        }
+    }
+}
diff --git a/pbs-client/src/lib.rs b/pbs-client/src/lib.rs
index 21cf8556..8bf26381 100644
--- a/pbs-client/src/lib.rs
+++ b/pbs-client/src/lib.rs
@@ -8,6 +8,7 @@ pub mod pxar;
 pub mod tools;
 
 mod merge_known_chunks;
+mod inject_reused_chunks;
 pub mod pipe_to_stream;
 
 mod http_client;
-- 
2.39.2





  parent reply	other threads:[~2023-09-22  7:25 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-22  7:16 [pbs-devel] [RFC pxar proxmox-backup 00/20] fix #3174: improve file-level backup Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 1/20] fix #3174: encoder: impl fn new for LinkOffset Christian Ebner
2023-09-27 12:08   ` Wolfgang Bumiller
2023-09-27 12:26     ` Christian Ebner
2023-09-28  6:49       ` Wolfgang Bumiller
2023-09-28  7:52         ` Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 2/20] fix #3174: decoder: factor out skip_bytes from skip_entry Christian Ebner
2023-09-27 11:32   ` Wolfgang Bumiller
2023-09-27 11:53     ` Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 3/20] fix #3174: decoder: impl skip_bytes for sync dec Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 4/20] fix #3174: metadata: impl fn to calc byte size Christian Ebner
2023-09-27 11:38   ` Wolfgang Bumiller
2023-09-27 11:55     ` Christian Ebner
2023-09-28  8:07       ` Christian Ebner
2023-09-28  9:00         ` Wolfgang Bumiller
2023-09-28  9:27           ` Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 5/20] fix #3174: enc/dec: impl PXAR_APPENDIX_REF entrytype Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 6/20] fix #3174: enc/dec: impl PXAR_APPENDIX entrytype Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 7/20] fix #3174: encoder: add helper to incr encoder pos Christian Ebner
2023-09-27 12:07   ` Wolfgang Bumiller
2023-09-27 12:20     ` Christian Ebner
2023-09-28  7:04       ` Wolfgang Bumiller
2023-09-28  7:50         ` Christian Ebner
2023-09-28  8:32           ` Wolfgang Bumiller
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 8/20] fix #3174: enc/dec: impl PXAR_APPENDIX_TAIL entrytype Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 09/20] fix #3174: index: add fn index list from start/end-offsets Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 10/20] fix #3174: index: add fn digest for DynamicEntry Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 11/20] fix #3174: api: double catalog upload size Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 12/20] fix #3174: catalog: incl pxar archives file offset Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 13/20] fix #3174: archiver/extractor: impl appendix ref Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 14/20] fix #3174: extractor: impl seq restore from appendix Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 15/20] fix #3174: archiver: store ref to previous backup Christian Ebner
2023-09-22  7:16 ` Christian Ebner [this message]
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 17/20] fix #3174: chunker: add forced boundaries Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 18/20] fix #3174: backup writer: inject queued chunk in upload steam Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 19/20] fix #3174: archiver: reuse files with unchanged metadata Christian Ebner
2023-09-26  7:01   ` Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 20/20] fix #3174: client: Add incremental flag to backup creation Christian Ebner
2023-09-26  7:11   ` Christian Ebner
2023-09-26  7:15 ` [pbs-devel] [RFC pxar proxmox-backup 00/20] fix #3174: improve file-level backup Christian Ebner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230922071621.12670-17-c.ebner@proxmox.com \
    --to=c.ebner@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal