From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <c.ebner@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id C08B7988DE
 for <pbs-devel@lists.proxmox.com>; Mon,  9 Oct 2023 13:52:39 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id A041716BBE
 for <pbs-devel@lists.proxmox.com>; Mon,  9 Oct 2023 13:52:09 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS
 for <pbs-devel@lists.proxmox.com>; Mon,  9 Oct 2023 13:52:08 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 61D494493E
 for <pbs-devel@lists.proxmox.com>; Mon,  9 Oct 2023 13:52:08 +0200 (CEST)
From: Christian Ebner <c.ebner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Date: Mon,  9 Oct 2023 13:51:19 +0200
Message-Id: <20231009115139.1417886-4-c.ebner@proxmox.com>
X-Mailer: git-send-email 2.39.2
In-Reply-To: <20231009115139.1417886-1-c.ebner@proxmox.com>
References: <20231009115139.1417886-1-c.ebner@proxmox.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.091 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See
 http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more
 information. [mod.rs, sync.rs]
Subject: [pbs-devel] [RFC v2 pxar 3/23] fix #3174: encoder: calc filename +
 metadata byte size
X-BeenThere: pbs-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Mon, 09 Oct 2023 11:52:39 -0000

Introduce SeqSink and impl SeqWrite in order to create an encoder
implementation which instead of writing data to a stream, consumes
the encoded stream and returns the consumed bytes for that stream.

Based on this, implement a helper function `byte_len` which returns the
byte size of the filename entry and metadata entry as encoded by the
archive.

Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
---
Changes since v1:
- Instead of calculating the metadata size based on the known encoding
  sizes, implement an Encoder instance which counts the encoded bytes.

 src/encoder/mod.rs  | 37 +++++++++++++++++++++++++++++++++++++
 src/encoder/sync.rs |  9 ++++++++-
 2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs
index 0d342ec..a209ee7 100644
--- a/src/encoder/mod.rs
+++ b/src/encoder/mod.rs
@@ -85,6 +85,24 @@ where
     }
 }
 
+#[derive(Default)]
+/// Sink to consume sequential byte stream
+pub struct SeqSink;
+
+impl SeqWrite for SeqSink {
+    fn poll_seq_write(
+        self: Pin<&mut Self>,
+        _cx: &mut Context,
+        buf: &[u8],
+    ) -> Poll<io::Result<usize>> {
+        Poll::Ready(Ok(buf.len()))
+    }
+
+    fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<io::Result<()>> {
+        Poll::Ready(Ok(()))
+    }
+}
+
 /// awaitable verison of `poll_seq_write`.
 async fn seq_write<T: SeqWrite + ?Sized>(
     output: &mut T,
@@ -833,6 +851,25 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
     }
 }
 
+impl EncoderImpl<'_, SeqSink> {
+    /// Calculate the encoded byte len of filename and metadata struct
+    async fn byte_len(filename: &std::ffi::CStr, metadata: &Metadata) -> io::Result<u64> {
+        let mut this = Self {
+            output: EncoderOutput::Owned(SeqSink::default()),
+            state: EncoderState::default(),
+            parent: None,
+            finished: false,
+            file_copy_buffer: Arc::new(Mutex::new(unsafe {
+                crate::util::vec_new_uninitialized(1024 * 1024)
+            })),
+        };
+
+        this.start_file_do(Some(metadata), filename.to_bytes())
+            .await?;
+        Ok(this.position())
+    }
+}
+
 /// Writer for a file object in a directory.
 pub(crate) struct FileImpl<'a, S: SeqWrite> {
     output: &'a mut S,
diff --git a/src/encoder/sync.rs b/src/encoder/sync.rs
index 1ec91b8..ac0025c 100644
--- a/src/encoder/sync.rs
+++ b/src/encoder/sync.rs
@@ -6,7 +6,7 @@ use std::pin::Pin;
 use std::task::{Context, Poll};
 
 use crate::decoder::sync::StandardReader;
-use crate::encoder::{self, LinkOffset, SeqWrite};
+use crate::encoder::{self, LinkOffset, SeqSink, SeqWrite};
 use crate::format;
 use crate::util::poll_result_once;
 use crate::Metadata;
@@ -165,6 +165,13 @@ impl<'a, T: SeqWrite + 'a> Encoder<'a, T> {
     }
 }
 
+impl<'a> Encoder<'a, SeqSink> {
+    /// Calculate the encoded byte len of filename and metadata struct
+    pub fn byte_len(filename: &std::ffi::CStr, metadata: &Metadata) -> io::Result<u64> {
+        poll_result_once(encoder::EncoderImpl::byte_len(filename, metadata))
+    }
+}
+
 /// This is a "file" inside a pxar archive, to which the initially declared amount of data should
 /// be written.
 ///
-- 
2.39.2