From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id A8B7AB8C11 for ; Mon, 11 Mar 2024 14:22:14 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 80A1D9559 for ; Mon, 11 Mar 2024 14:21:44 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Mon, 11 Mar 2024 14:21:41 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id DB43D488EF for ; Mon, 11 Mar 2024 14:21:40 +0100 (CET) Date: Mon, 11 Mar 2024 14:21:34 +0100 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Proxmox Backup Server development discussion References: <20240305092703.126906-1-c.ebner@proxmox.com> <20240305092703.126906-5-c.ebner@proxmox.com> In-Reply-To: <20240305092703.126906-5-c.ebner@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.16.0 (https://github.com/astroidmail/astroid) Message-Id: <1710153663.gtljd8y4z5.astroid@yuna.none> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-SPAM-LEVEL: Spam detection results: 0 AWL 0.066 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: Re: [pbs-devel] [RFC v2 pxar 04/36] decoder: add optional payload input stream X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 11 Mar 2024 13:22:14 -0000 On March 5, 2024 10:26 am, Christian Ebner wrote: > Implement an optional redirection to read the payload for regular files > from a different input stream. >=20 > This allows to decode split stream archives. >=20 > Signed-off-by: Christian Ebner > --- > changes since version 1: > - refactor to use new PayloadRef type and decoder method >=20 > src/accessor/mod.rs | 2 ++ > src/decoder/mod.rs | 78 +++++++++++++++++++++++++++++++++++++++++---- > src/decoder/sync.rs | 7 ++++ > src/lib.rs | 3 ++ > 4 files changed, 83 insertions(+), 7 deletions(-) >=20 > diff --git a/src/accessor/mod.rs b/src/accessor/mod.rs > index 6a2de73..ed99c85 100644 > --- a/src/accessor/mod.rs > +++ b/src/accessor/mod.rs > @@ -342,6 +342,7 @@ impl AccessorImpl { > EntryKind::File { > offset: Some(offset), > size, > + .. > } =3D> { > let meta_size =3D offset - link_offset; > let entry_end =3D link_offset + meta_size + size; > @@ -711,6 +712,7 @@ impl FileEntryImpl { > EntryKind::File { > size, > offset: Some(offset), > + .. > } =3D> Ok(Some(offset..(offset + size))), > _ =3D> Ok(None), > } > diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs > index 70c44ce..7b8254d 100644 > --- a/src/decoder/mod.rs > +++ b/src/decoder/mod.rs > @@ -157,6 +157,10 @@ pub(crate) struct DecoderImpl { > state: State, > with_goodbye_tables: bool, > =20 > + // Payload of regular files might be provided by a different reader > + payload_input: Option, > + payload_consumed: u64, > + > /// The random access code uses decoders for sub-ranges which may no= t end in a `PAYLOAD` for > /// entries like FIFOs or sockets, so there we explicitly allow an i= tem to terminate with EOF. > eof_after_entry: bool, > @@ -167,6 +171,7 @@ enum State { > Default, > InPayload { > offset: u64, > + size: u64, > }, > =20 > /// file entries with no data (fifo, socket) > @@ -199,6 +204,11 @@ impl DecoderImpl { > Self::new_full(input, "/".into(), false).await > } > =20 > + pub fn redirect_payload_input(mut self, payload_input: I) -> Self { > + self.payload_input =3D Some(payload_input); same question as for the encoder - do we want to prevent misuse here and check/ensure that no payload_input has already been set before? > + self > + } > + > pub(crate) fn input(&self) -> &I { > &self.input > } > @@ -219,6 +229,8 @@ impl DecoderImpl { > path_lengths: Vec::new(), > state: State::Begin, > with_goodbye_tables: false, > + payload_input: None, > + payload_consumed: 0, > eof_after_entry, > }; > =20 > @@ -242,9 +254,14 @@ impl DecoderImpl { > // hierarchy and parse the next PXAR_FILENAME or the= PXAR_GOODBYE: > self.read_next_item().await?; > } > - State::InPayload { offset } =3D> { > - // We need to skip the current payload first. > - self.skip_entry(offset).await?; > + State::InPayload { offset, .. } =3D> { > + if self.payload_input.is_none() { > + // Skip remaining payload of current entry in re= gular stream > + self.skip_entry(offset).await?; > + } else { > + // Update consumed payload as given by the offse= t referenced by the content reader > + self.payload_consumed +=3D offset; > + } > self.read_next_item().await?; > } > State::InGoodbyeTable =3D> { > @@ -308,11 +325,11 @@ impl DecoderImpl { > } > =20 > pub fn content_reader(&mut self) -> Option> { > - if let State::InPayload { offset } =3D &mut self.state { > + if let State::InPayload { offset, size } =3D &mut self.state { > Some(Contents::new( > - &mut self.input, > + self.payload_input.as_mut().unwrap_or(&mut self.input), > offset, > - self.current_header.content_size(), > + *size, > )) > } else { > None > @@ -531,8 +548,40 @@ impl DecoderImpl { > self.entry.kind =3D EntryKind::File { > size: self.current_header.content_size(), > offset, > + payload_offset: None, > + }; > + self.state =3D State::InPayload { > + offset: 0, > + size: self.current_header.content_size(), > + }; > + return Ok(ItemResult::Entry); > + } > + format::PXAR_PAYLOAD_REF =3D> { > + let offset =3D seq_read_position(&mut self.input).await.= transpose()?; > + let payload_ref =3D self.read_payload_ref().await?; > + > + let payload_input_offset =3D if let Some(payload_input) = =3D self.payload_input.as_mut() > + { > + seq_read_position(payload_input).await.transpose()? > + } else { > + None > + }; > + > + // Skip payload padding for injected chunks in sync deco= der > + if self.payload_input.is_some() && payload_input_offset.= is_none() { > + let to_skip =3D payload_ref.offset - self.payload_co= nsumed; > + self.skip_payload(to_skip).await?; > + } style: these two could be combined into if let Some(payload_input) =3D self.payload_input.as_mut() { if seq_read_position(payload_input).await.transpose()?.is_none() { // Skip payload padding for injected chunks in sync decoder let to_skip =3D payload_ref.offset - self.payload_consumed; self.skip_payload(to_skip).await?; } } > + self.entry.kind =3D EntryKind::File { > + size: payload_ref.size, > + offset, > + payload_offset: Some(payload_ref.offset), > + }; > + self.state =3D State::InPayload { > + offset: 0, > + size: payload_ref.size, > }; > - self.state =3D State::InPayload { offset: 0 }; > return Ok(ItemResult::Entry); > } > format::PXAR_FILENAME | format::PXAR_GOODBYE =3D> { > @@ -576,6 +625,21 @@ impl DecoderImpl { > Ok(()) > } > =20 > + async fn skip_payload(&mut self, length: u64) -> io::Result<()> { > + let mut len =3D length; > + let scratch =3D scratch_buffer(); > + while len >=3D (scratch.len() as u64) { > + seq_read_exact(self.payload_input.as_mut().unwrap(), scratch= ).await?; > + len -=3D scratch.len() as u64; > + } > + let len =3D len as usize; > + if len > 0 { > + seq_read_exact(self.payload_input.as_mut().unwrap(), &mut sc= ratch[..len]).await?; > + } > + self.payload_consumed +=3D length; > + Ok(()) > + } nit: this could also share the "skip" part with `skip_entry`, and take the input and length as parameter? nit: casting to usize at the start would make the code easier to parse IMHO > async fn read_entry_as_bytes(&mut self) -> io::Result> { > let size =3D usize::try_from(self.current_header.content_size())= .map_err(io_err_other)?; > let data =3D seq_read_exact_data(&mut self.input, size).await?; > diff --git a/src/decoder/sync.rs b/src/decoder/sync.rs > index 5597a03..b22b341 100644 > --- a/src/decoder/sync.rs > +++ b/src/decoder/sync.rs > @@ -53,6 +53,13 @@ impl Decoder { > }) > } > =20 > + /// Take the file payloads from the provided input stream rather tha= n the regular pxar stream > + pub fn redirect_payload_input(self, payload_input: T) -> Self { > + Self { > + inner: self.inner.redirect_payload_input(payload_input), same question here about being called twice > + } > + } > + > /// Internal helper for `Accessor`. In this case we have the low-lev= el state machine, and the > /// layer "above" the `Accessor` propagates the actual type (sync vs= async). > pub(crate) fn from_impl(inner: decoder::DecoderImpl) -> Self { > diff --git a/src/lib.rs b/src/lib.rs > index 210c4b1..ef81a85 100644 > --- a/src/lib.rs > +++ b/src/lib.rs > @@ -364,6 +364,9 @@ pub enum EntryKind { > =20 > /// The file's byte offset inside the archive, if available. > offset: Option, > + > + /// The file's byte offset inside the payload stream, if availab= le. > + payload_offset: Option, > }, > =20 > /// Directory entry. When iterating through an archive, the contents= follow next. > --=20 > 2.39.2 >=20 >=20 >=20 > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel >=20 >=20 >=20