From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id C058191240 for ; Wed, 3 Apr 2024 13:42:20 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 93A6016DB2 for ; Wed, 3 Apr 2024 13:41:50 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Wed, 3 Apr 2024 13:41:49 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 21CEA44D58 for ; Wed, 3 Apr 2024 13:41:49 +0200 (CEST) Date: Wed, 03 Apr 2024 13:41:41 +0200 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Proxmox Backup Server development discussion References: <20240328123707.336951-1-c.ebner@proxmox.com> <20240328123707.336951-14-c.ebner@proxmox.com> In-Reply-To: <20240328123707.336951-14-c.ebner@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.16.0 (https://github.com/astroidmail/astroid) Message-Id: <1712143636.hs6xlwzexi.astroid@yuna.none> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-SPAM-LEVEL: Spam detection results: 0 AWL 0.059 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH v3 pxar 13/58] format: add pxar format version entry X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 03 Apr 2024 11:42:20 -0000 On March 28, 2024 1:36 pm, Christian Ebner wrote: > Adds an additional entry type at the start of each pxar archive > signaling the encoding format version. If not present, the default > version 1 is assumed. >=20 > This allows to early on detect the pxar encoding version, allowing tools > to switch mode or bail on non compatible encoder/decoder functionality. >=20 > Signed-off-by: Christian Ebner > --- > changes since version 2: > - not present in previous version >=20 > examples/mk-format-hashes.rs | 5 +++++ > src/decoder/mod.rs | 29 ++++++++++++++++++++++++++-- > src/encoder/mod.rs | 37 +++++++++++++++++++++++++++++++++--- > src/format/mod.rs | 11 +++++++++++ > src/lib.rs | 3 +++ > 5 files changed, 80 insertions(+), 5 deletions(-) >=20 > diff --git a/examples/mk-format-hashes.rs b/examples/mk-format-hashes.rs > index 35cff99..e5d69b1 100644 > --- a/examples/mk-format-hashes.rs > +++ b/examples/mk-format-hashes.rs > @@ -1,6 +1,11 @@ > use pxar::format::hash_filename; > =20 > const CONSTANTS: &[(&str, &str, &str)] =3D &[ > + ( > + "Pxar format version entry, fallback to version 1 if not present= ", > + "PXAR_FORMAT_VERSION", > + "__PROXMOX_FORMAT_VERSION__", > + ), > ( > "Beginning of an entry (current version).", > "PXAR_ENTRY", > diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs > index 00d9abf..5b2fafb 100644 > --- a/src/decoder/mod.rs > +++ b/src/decoder/mod.rs > @@ -17,7 +17,7 @@ use std::task::{Context, Poll}; > =20 > use endian_trait::Endian; > =20 > -use crate::format::{self, Header}; > +use crate::format::{self, FormatVersion, Header}; > use crate::util::{self, io_err_other}; > use crate::{Entry, EntryKind, Metadata}; > =20 > @@ -164,6 +164,8 @@ pub(crate) struct DecoderImpl { > /// The random access code uses decoders for sub-ranges which may no= t end in a `PAYLOAD` for > /// entries like FIFOs or sockets, so there we explicitly allow an i= tem to terminate with EOF. > eof_after_entry: bool, > + /// The format version as determined by the format version header > + version: format::FormatVersion, > } > =20 > enum State { > @@ -242,6 +244,7 @@ impl DecoderImpl { > payload_input, > payload_consumed, > eof_after_entry, > + version: FormatVersion::default(), > }; > =20 > // this.read_next_entry().await?; > @@ -258,7 +261,16 @@ impl DecoderImpl { > loop { > match self.state { > State::Eof =3D> return Ok(None), > - State::Begin =3D> return self.read_next_entry().await.ma= p(Some), > + State::Begin =3D> { > + let entry =3D self.read_next_entry().await.map(Some)= ; > + if let Ok(Some(ref entry)) =3D entry { > + if let EntryKind::Version(version) =3D entry.kin= d() { > + self.version =3D version.clone(); > + return self.read_next_entry().await.map(Some= ); > + } > + } > + return entry; a bit unsure here, if we want to enforce the order, wouldn't it be more clean to transition to a new state here rather than adding more nested ifs over time? ;) > + } > State::Default =3D> { > // we completely finished an entry, so now we're goi= ng "up" in the directory > // hierarchy and parse the next PXAR_FILENAME or the= PXAR_GOODBYE: > @@ -412,6 +424,11 @@ impl DecoderImpl { > self.entry.metadata =3D Metadata::default(); > self.entry.kind =3D EntryKind::Hardlink(self.read_hardlink()= .await?); > =20 > + Ok(Some(self.entry.take())) > + } else if header.htype =3D=3D format::PXAR_FORMAT_VERSION { > + self.current_header =3D header; > + self.entry.kind =3D EntryKind::Version(self.read_format_vers= ion().await?); > + > Ok(Some(self.entry.take())) > } else if header.htype =3D=3D format::PXAR_ENTRY || header.htype= =3D=3D format::PXAR_ENTRY_V1 { > if header.htype =3D=3D format::PXAR_ENTRY { > @@ -777,6 +794,14 @@ impl DecoderImpl { > =20 > seq_read_entry(&mut self.input).await > } > + > + async fn read_format_version(&mut self) -> io::Result { > + match seq_read_entry(&mut self.input).await? { > + 1u64 =3D> Ok(format::FormatVersion::Version1), this should never happen though, right? > + 2u64 =3D> Ok(format::FormatVersion::Version2), also this (cted below) > + _ =3D> io_bail!("unexpected pxar format version"), this should maybe include the value? ;) > + } > + } > } > =20 > /// Reader for file contents inside a pxar archive. > diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs > index 88c0ed5..9270153 100644 > --- a/src/encoder/mod.rs > +++ b/src/encoder/mod.rs > @@ -17,7 +17,7 @@ use endian_trait::Endian; > =20 > use crate::binary_tree_array; > use crate::decoder::{self, SeqRead}; > -use crate::format::{self, GoodbyeItem, PayloadRef}; > +use crate::format::{self, FormatVersion, GoodbyeItem, PayloadRef}; > use crate::Metadata; > =20 > pub mod aio; > @@ -307,6 +307,8 @@ pub(crate) struct EncoderImpl<'a, T: SeqWrite + 'a> { > /// Since only the "current" entry can be actively writing files, we= share the file copy > /// buffer. > file_copy_buffer: Arc>>, > + /// Pxar format version to encode > + version: format::FormatVersion, > } > =20 > impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { > @@ -320,11 +322,14 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { > } > =20 > let mut state =3D EncoderState::default(); > - if let Some(payload_output) =3D payload_output.as_mut() { > + let version =3D if let Some(payload_output) =3D payload_output.a= s_mut() { > let header =3D format::Header::with_content_size(format::PXA= R_PAYLOAD_START_MARKER, 0); > header.check_header_size()?; > seq_write_struct(payload_output, header, &mut state.payload_= write_position).await?; > - } > + format::FormatVersion::Version2 > + } else { > + format::FormatVersion::default() shouldn't this be Version1 instead of default()? they are the same *now*, but that might not be the case forever? > + }; > =20 > let mut this =3D Self { > output, > @@ -334,8 +339,10 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { > file_copy_buffer: Arc::new(Mutex::new(unsafe { > crate::util::vec_new_uninitialized(1024 * 1024) > })), > + version, > }; > =20 > + this.encode_format_version().await?; > this.encode_metadata(metadata).await?; > let state =3D this.state_mut()?; > state.files_offset =3D state.position(); > @@ -522,6 +529,10 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { > file_size: u64, > payload_offset: PayloadOffset, > ) -> io::Result<()> { > + if self.version =3D=3D FormatVersion::Version1 { > + io_bail!("payload references not supported pxar format versi= on 1"); > + } > + > if self.payload_output.as_mut().is_none() { > io_bail!("unable to add payload reference"); > } > @@ -729,6 +740,26 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { > Ok(()) > } > =20 > + async fn encode_format_version(&mut self) -> io::Result<()> { > + let version_bytes =3D match self.version { > + format::FormatVersion::Version1 =3D> return Ok(()), > + format::FormatVersion::Version2 =3D> 2u64.to_le_bytes(), (cted from above) and this here should maybe go together? > + }; > + > + let (output, state) =3D self.output_state()?; > + if state.write_position !=3D 0 { > + io_bail!("pxar format version must be encoded at the beginning of an = archive"); should this also be enforced while decoding? should we also encode a/the version of the payload archive? > + } > + > + seq_write_pxar_entry( > + output, > + format::PXAR_FORMAT_VERSION, > + &version_bytes, > + &mut state.write_position, > + ) > + .await > + } > + > async fn encode_metadata(&mut self, metadata: &Metadata) -> io::Resu= lt<()> { > let (output, state) =3D self.output_state()?; > seq_write_pxar_struct_entry( > diff --git a/src/format/mod.rs b/src/format/mod.rs > index a672d19..2bf33c9 100644 > --- a/src/format/mod.rs > +++ b/src/format/mod.rs > @@ -6,6 +6,7 @@ > //! item data. > //! > //! An archive contains items in the following order: > +//! * `FORMAT_VERSION` -- (optional for v1), version of encoding fo= rmat > //! * `ENTRY` -- containing general stat() data and relate= d bits > //! * `XATTR` -- one extended attribute > //! * ... -- more of these when there are multiple def= ined > @@ -80,6 +81,8 @@ pub mod mode { > } > =20 > // Generated by `cargo run --example mk-format-hashes` > +/// Pxar format version entry, fallback to version 1 if not present > +pub const PXAR_FORMAT_VERSION: u64 =3D 0x730f6c75df16a40d; > /// Beginning of an entry (current version). > pub const PXAR_ENTRY: u64 =3D 0xd5956474e588acef; > /// Previous version of the entry struct > @@ -186,6 +189,7 @@ impl Header { > impl Display for Header { > fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { > let readable =3D match self.htype { > + PXAR_FORMAT_VERSION =3D> "FORMAT_VERSION", > PXAR_FILENAME =3D> "FILENAME", > PXAR_SYMLINK =3D> "SYMLINK", > PXAR_HARDLINK =3D> "HARDLINK", > @@ -551,6 +555,13 @@ impl From<&std::fs::Metadata> for Stat { > } > } > =20 > +#[derive(Clone, Debug, Default, PartialEq)] > +pub enum FormatVersion { > + #[default] > + Version1, > + Version2, > +} > + > #[derive(Clone, Debug)] > pub struct Filename { > pub name: Vec, > diff --git a/src/lib.rs b/src/lib.rs > index ef81a85..a87b5ac 100644 > --- a/src/lib.rs > +++ b/src/lib.rs > @@ -342,6 +342,9 @@ impl Acl { > /// Identifies whether the entry is a file, symlink, directory, etc. > #[derive(Clone, Debug)] > pub enum EntryKind { > + /// Pxar file format version > + Version(format::FormatVersion), > + > /// Symbolic links. > Symlink(format::Symlink), > =20 > --=20 > 2.39.2 >=20 >=20 >=20 > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel >=20 >=20 >=20