From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 75EEAB8B60 for ; Mon, 11 Mar 2024 14:21:59 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 5F50E94C1 for ; Mon, 11 Mar 2024 14:21:59 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Mon, 11 Mar 2024 14:21:58 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 22E7E44B35 for ; Mon, 11 Mar 2024 14:21:58 +0100 (CET) Date: Mon, 11 Mar 2024 14:21:50 +0100 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Proxmox Backup Server development discussion References: <20240305092703.126906-1-c.ebner@proxmox.com> <20240305092703.126906-7-c.ebner@proxmox.com> In-Reply-To: <20240305092703.126906-7-c.ebner@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.16.0 (https://github.com/astroidmail/astroid) Message-Id: <1710159295.f7lna23xv8.astroid@yuna.none> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-SPAM-LEVEL: Spam detection results: 0 AWL 0.016 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment PROLO_LEO1 0.1 Meta Catches all Leo drug variations so far SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: Re: [pbs-devel] [RFC v2 pxar 06/36] encoder: move to stack based state tracking X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 11 Mar 2024 13:21:59 -0000 On March 5, 2024 10:26 am, Christian Ebner wrote: > In preparation for the proxmox-backup-client look-ahead caching, > where a passing around of different encoder instances with internal > references is not feasible. >=20 > Instead of creating a new encoder instance for each directory level > and keeping references to the parent state, use an internal stack. >=20 > This is a breaking change in the pxar library API. >=20 > Signed-off-by: Christian Ebner > --- > changes since version 1: > - fix incorrect docu comment >=20 > examples/pxarcmd.rs | 6 +- > src/encoder/aio.rs | 20 ++-- > src/encoder/mod.rs | 246 +++++++++++++++++++++++++------------------- > src/encoder/sync.rs | 10 +- > 4 files changed, 158 insertions(+), 124 deletions(-) >=20 > diff --git a/examples/pxarcmd.rs b/examples/pxarcmd.rs > index e0c779d..dcf3c44 100644 > --- a/examples/pxarcmd.rs > +++ b/examples/pxarcmd.rs > @@ -138,14 +138,14 @@ fn add_directory<'a, T: SeqWrite + 'a>( > =20 > let meta =3D Metadata::from(&file_meta); > if file_type.is_dir() { > - let mut dir =3D encoder.create_directory(file_name, &meta)?; > + encoder.create_directory(file_name, &meta)?; > add_directory( > - &mut dir, > + encoder, > std::fs::read_dir(file_path)?, > root_path, > &mut *hardlinks, > )?; > - dir.finish()?; > + encoder.finish()?; > } else if file_type.is_symlink() { > todo!("symlink handling"); > } else if file_type.is_file() { > diff --git a/src/encoder/aio.rs b/src/encoder/aio.rs > index 82b9ab2..60b11cd 100644 > --- a/src/encoder/aio.rs > +++ b/src/encoder/aio.rs > @@ -105,17 +105,14 @@ impl<'a, T: SeqWrite + 'a> Encoder<'a, T> { > &mut self, > file_name: P, > metadata: &Metadata, > - ) -> io::Result> { > - Ok(Encoder { > - inner: self > - .inner > - .create_directory(file_name.as_ref(), metadata) > - .await?, > - }) > + ) -> io::Result<()> { > + self.inner > + .create_directory(file_name.as_ref(), metadata) > + .await > } > =20 > - /// Finish this directory. This is mandatory, otherwise the `Drop` h= andler will `panic!`. > - pub async fn finish(self) -> io::Result<()> { > + /// Finish this directory. This is mandatory, encodes the end for th= e current directory. > + pub async fn finish(&mut self) -> io::Result<()> { > self.inner.finish().await > } > =20 > @@ -302,11 +299,12 @@ mod test { > .await > .unwrap(); > { > - let mut dir =3D encoder > + encoder > .create_directory("baba", &Metadata::dir_builder(0o7= 00).build()) > .await > .unwrap(); > - dir.create_file(&Metadata::file_builder(0o755).build(), = "abab", 1024) > + encoder > + .create_file(&Metadata::file_builder(0o755).build(),= "abab", 1024) > .await > .unwrap(); > } > diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs > index e4ea69b..962087a 100644 > --- a/src/encoder/mod.rs > +++ b/src/encoder/mod.rs > @@ -227,6 +227,16 @@ struct EncoderState { > } > =20 > impl EncoderState { > + #[inline] > + fn position(&self) -> u64 { > + self.write_position > + } > + > + #[inline] > + fn payload_position(&self) -> u64 { > + self.payload_write_position > + } > + > fn merge_error(&mut self, error: Option) { > // one error is enough: > if self.encode_error.is_none() { > @@ -244,16 +254,6 @@ pub(crate) enum EncoderOutput<'a, T> { > Borrowed(&'a mut T), > } > =20 > -impl<'a, T> EncoderOutput<'a, T> { > - #[inline] > - fn to_borrowed_mut<'s>(&'s mut self) -> EncoderOutput<'s, T> > - where > - 'a: 's, > - { > - EncoderOutput::Borrowed(self.as_mut()) > - } > -} > - > impl<'a, T> std::convert::AsMut for EncoderOutput<'a, T> { > fn as_mut(&mut self) -> &mut T { > match self { > @@ -282,8 +282,8 @@ impl<'a, T> std::convert::From<&'a mut T> for Encoder= Output<'a, T> { > pub(crate) struct EncoderImpl<'a, T: SeqWrite + 'a> { > output: EncoderOutput<'a, T>, > payload_output: EncoderOutput<'a, Option>, > - state: EncoderState, > - parent: Option<&'a mut EncoderState>, > + /// EncoderState stack storing the state for each directory level > + state: Vec, > finished: bool, > =20 > /// Since only the "current" entry can be actively writing files, we= share the file copy > @@ -291,21 +291,6 @@ pub(crate) struct EncoderImpl<'a, T: SeqWrite + 'a> = { > file_copy_buffer: Arc>>, > } > =20 > -impl<'a, T: SeqWrite + 'a> Drop for EncoderImpl<'a, T> { > - fn drop(&mut self) { > - if let Some(ref mut parent) =3D self.parent { > - // propagate errors: > - parent.merge_error(self.state.encode_error); > - if !self.finished { > - parent.add_error(EncodeError::IncompleteDirectory); > - } > - } else if !self.finished { > - // FIXME: how do we deal with this? > - // eprintln!("Encoder dropped without finishing!"); > - } > - } > -} > - > impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { > pub async fn new( > output: EncoderOutput<'a, T>, > @@ -317,8 +302,7 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { > let mut this =3D Self { > output, > payload_output: EncoderOutput::Owned(None), > - state: EncoderState::default(), > - parent: None, > + state: vec![EncoderState::default()], > finished: false, > file_copy_buffer: Arc::new(Mutex::new(unsafe { > crate::util::vec_new_uninitialized(1024 * 1024) > @@ -326,7 +310,11 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { > }; > =20 > this.encode_metadata(metadata).await?; > - this.state.files_offset =3D this.position(); > + let state =3D this > + .state > + .last_mut() > + .ok_or_else(|| io_format_err!("encoder state stack underflow= "))?; nit: this could just use this.state_mut() > + state.files_offset =3D state.position(); > =20 > Ok(this) > } > @@ -337,13 +325,32 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { > } > =20 > fn check(&self) -> io::Result<()> { > - match self.state.encode_error { > + if self.finished { > + io_bail!("unexpected encoder finished state"); > + } > + let state =3D self > + .state > + .last() > + .ok_or_else(|| io_format_err!("encoder state stack underflow= "))?; > + match state.encode_error { nit: this could just use self.state() > Some(EncodeError::IncompleteFile) =3D> io_bail!("incomplete = file"), > Some(EncodeError::IncompleteDirectory) =3D> io_bail!("direct= ory not finalized"), > None =3D> Ok(()), > } > } > =20 > + fn state(&self) -> io::Result<&EncoderState> { > + self.state > + .last() > + .ok_or_else(|| io_format_err!("encoder state stack underflow= ")) > + } > + > + fn state_mut(&mut self) -> io::Result<&mut EncoderState> { > + self.state > + .last_mut() > + .ok_or_else(|| io_format_err!("encoder state stack underflow= ")) > + } > + > pub async fn create_file<'b>( > &'b mut self, > metadata: &Metadata, > @@ -368,26 +375,38 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { > { > self.check()?; > =20 > - let file_offset =3D self.position(); > + let file_offset =3D self.state()?.position(); > self.start_file_do(Some(metadata), file_name).await?; > =20 > if self.payload_output.as_mut().is_some() { > - let mut data =3D self.payload_position().to_le_bytes().to_ve= c(); > + let state =3D self > + .state > + .last_mut() > + .ok_or_else(|| io_format_err!("encoder state stack under= flow"))?; this one here > + let mut data =3D state.payload_position().to_le_bytes().to_v= ec(); > data.append(&mut file_size.to_le_bytes().to_vec()); > seq_write_pxar_entry( > self.output.as_mut(), > format::PXAR_PAYLOAD_REF, > &data, > - &mut self.state.write_position, > + &mut state.write_position, > ) > .await?; > } else { > let header =3D format::Header::with_content_size(format::PXA= R_PAYLOAD, file_size); > header.check_header_size()?; > - seq_write_struct(self.output.as_mut(), header, &mut self.sta= te.write_position).await?; > + let state =3D self > + .state > + .last_mut() > + .ok_or_else(|| io_format_err!("encoder state stack under= flow"))?; > + seq_write_struct(self.output.as_mut(), header, &mut state.wr= ite_position).await?; and this one here can't use the state/state_mut helpers atm (because those borrow self).. > }; > =20 > - let payload_data_offset =3D self.position(); > + let state =3D self > + .state > + .last_mut() > + .ok_or_else(|| io_format_err!("encoder state stack underflow= "))?; > + let payload_data_offset =3D state.position(); same here (and repeated quite a few times in the remainder of this patch). it might be worth it to have another helper that gives you (&mut output, &mut state) for payload or regular output? e.g., something like this (or as two helpers dropping the bool parameter): + fn output_state(&mut self, payload: bool) -> io::Result<(&mut T, &mut = EncoderState)> { + Ok(( + if payload { + self.output.as_mut() + } else { + self.payload_output.as_mut().as_mut().unwrap() + }, + self.state + .last_mut() + .ok_or_else(|| io_format_err!("encoder state stack underfl= ow"))?, + )) + } > =20 > let meta_size =3D payload_data_offset - file_offset; > =20 > @@ -400,7 +419,7 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { > size: file_size + meta_size, > }, > remaining_size: file_size, > - parent: &mut self.state, > + parent: state, > }) > } > =20 [.. the rest just contains repeats of the pattern mentioned above]