From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id CE24C90FD8 for ; Wed, 3 Apr 2024 11:55:24 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 70BBF1550A for ; Wed, 3 Apr 2024 11:54:54 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Wed, 3 Apr 2024 11:54:53 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 25BE14247E for ; Wed, 3 Apr 2024 11:54:53 +0200 (CEST) Date: Wed, 03 Apr 2024 11:54:45 +0200 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Proxmox Backup Server development discussion References: <20240328123707.336951-1-c.ebner@proxmox.com> <20240328123707.336951-7-c.ebner@proxmox.com> In-Reply-To: <20240328123707.336951-7-c.ebner@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.16.0 (https://github.com/astroidmail/astroid) Message-Id: <1712136715.t3s03j6zyy.astroid@yuna.none> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-SPAM-LEVEL: Spam detection results: 0 AWL 0.009 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment PROLO_LEO1 0.1 Meta Catches all Leo drug variations so far SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [sync.rs, mod.rs, aio.rs, pxarcmd.rs, main.rs] Subject: Re: [pbs-devel] [PATCH v3 pxar 06/58] encoder: move to stack based state tracking X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 03 Apr 2024 09:55:24 -0000 On March 28, 2024 1:36 pm, Christian Ebner wrote: > In preparation for the proxmox-backup-client look-ahead caching, > where a passing around of different encoder instances with internal > references is not feasible. >=20 > Instead of creating a new encoder instance for each directory level > and keeping references to the parent state, use an internal stack. >=20 > This is a breaking change in the pxar library API. >=20 > Signed-off-by: Christian Ebner > --- > changes since version 2: > - consume encoder with new `close` method to finalize > - new output_state helper usable when for both a mut borrow is required > - double checked use of state/state_mut usage > - major refactoring >=20 > examples/pxarcmd.rs | 7 +- > src/encoder/aio.rs | 26 ++-- > src/encoder/mod.rs | 285 +++++++++++++++++++++++-------------------- > src/encoder/sync.rs | 16 ++- > tests/simple/main.rs | 3 + > 5 files changed, 187 insertions(+), 150 deletions(-) >=20 > diff --git a/examples/pxarcmd.rs b/examples/pxarcmd.rs > index e0c779d..0294eba 100644 > --- a/examples/pxarcmd.rs > +++ b/examples/pxarcmd.rs > @@ -106,6 +106,7 @@ fn cmd_create(mut args: std::env::ArgsOs) -> Result<(= ), Error> { > let mut encoder =3D Encoder::create(file, &meta)?; > add_directory(&mut encoder, dir, &dir_path, &mut HashMap::new())?; > encoder.finish()?; > + encoder.close()?; > =20 > Ok(()) > } > @@ -138,14 +139,14 @@ fn add_directory<'a, T: SeqWrite + 'a>( > =20 > let meta =3D Metadata::from(&file_meta); > if file_type.is_dir() { > - let mut dir =3D encoder.create_directory(file_name, &meta)?; > + encoder.create_directory(file_name, &meta)?; > add_directory( > - &mut dir, > + encoder, > std::fs::read_dir(file_path)?, > root_path, > &mut *hardlinks, > )?; > - dir.finish()?; > + encoder.finish()?; > } else if file_type.is_symlink() { > todo!("symlink handling"); > } else if file_type.is_file() { > diff --git a/src/encoder/aio.rs b/src/encoder/aio.rs > index 31a1a2f..635e550 100644 > --- a/src/encoder/aio.rs > +++ b/src/encoder/aio.rs > @@ -109,20 +109,23 @@ impl<'a, T: SeqWrite + 'a> Encoder<'a, T> { > &mut self, > file_name: P, > metadata: &Metadata, > - ) -> io::Result> { > - Ok(Encoder { > - inner: self > - .inner > - .create_directory(file_name.as_ref(), metadata) > - .await?, > - }) > + ) -> io::Result<()> { > + self.inner > + .create_directory(file_name.as_ref(), metadata) > + .await > } > =20 > - /// Finish this directory. This is mandatory, otherwise the `Drop` h= andler will `panic!`. > - pub async fn finish(self) -> io::Result<()> { > + /// Finish this directory. This is mandatory, encodes the end for th= e current directory. > + pub async fn finish(&mut self) -> io::Result<()> { > self.inner.finish().await > } > =20 > + /// Close the encoder instance. This is mandatory, encodes the end f= or the optional payload > + /// output stream, if some is given > + pub async fn close(self) -> io::Result<()> { > + self.inner.close().await > + } > + > /// Add a symbolic link to the archive. > pub async fn add_symlink, PT: AsRef>( > &mut self, > @@ -307,11 +310,12 @@ mod test { > .await > .unwrap(); > { > - let mut dir =3D encoder > + encoder > .create_directory("baba", &Metadata::dir_builder(0o7= 00).build()) > .await > .unwrap(); > - dir.create_file(&Metadata::file_builder(0o755).build(), = "abab", 1024) > + encoder > + .create_file(&Metadata::file_builder(0o755).build(),= "abab", 1024) > .await > .unwrap(); > } > diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs > index bff6acf..31bb0fa 100644 > --- a/src/encoder/mod.rs > +++ b/src/encoder/mod.rs > @@ -227,6 +227,16 @@ struct EncoderState { > } > =20 > impl EncoderState { > + #[inline] > + fn position(&self) -> u64 { > + self.write_position > + } > + > + #[inline] > + fn payload_position(&self) -> u64 { > + self.payload_write_position > + } > + > fn merge_error(&mut self, error: Option) { > // one error is enough: > if self.encode_error.is_none() { > @@ -244,16 +254,6 @@ pub(crate) enum EncoderOutput<'a, T> { > Borrowed(&'a mut T), > } > =20 > -impl<'a, T> EncoderOutput<'a, T> { > - #[inline] > - fn to_borrowed_mut<'s>(&'s mut self) -> EncoderOutput<'s, T> > - where > - 'a: 's, > - { > - EncoderOutput::Borrowed(self.as_mut()) > - } > -} > - > impl<'a, T> std::convert::AsMut for EncoderOutput<'a, T> { > fn as_mut(&mut self) -> &mut T { > match self { > @@ -282,8 +282,8 @@ impl<'a, T> std::convert::From<&'a mut T> for Encoder= Output<'a, T> { > pub(crate) struct EncoderImpl<'a, T: SeqWrite + 'a> { > output: EncoderOutput<'a, T>, > payload_output: EncoderOutput<'a, Option>, > - state: EncoderState, > - parent: Option<&'a mut EncoderState>, > + /// EncoderState stack storing the state for each directory level > + state: Vec, > finished: bool, > =20 > /// Since only the "current" entry can be actively writing files, we= share the file copy > @@ -291,21 +291,6 @@ pub(crate) struct EncoderImpl<'a, T: SeqWrite + 'a> = { > file_copy_buffer: Arc>>, > } > =20 > -impl<'a, T: SeqWrite + 'a> Drop for EncoderImpl<'a, T> { > - fn drop(&mut self) { > - if let Some(ref mut parent) =3D self.parent { > - // propagate errors: > - parent.merge_error(self.state.encode_error); > - if !self.finished { > - parent.add_error(EncodeError::IncompleteDirectory); > - } > - } else if !self.finished { > - // FIXME: how do we deal with this? > - // eprintln!("Encoder dropped without finishing!"); > - } > - } > -} should we still have some sort of checks here? e.g., when dropping an encoder, how should self.finished and self.state look like? IIUC, then a dropped encoder should have an empty state and be finished (i.e., `close()` has been called on it). or is this simply not relevant anymore because we only create one and then drop it at the end (but should we then have a similar mechanism for EncoderState?) > - > impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { > pub async fn new( > output: EncoderOutput<'a, T>, > @@ -318,8 +303,7 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { > let mut this =3D Self { > output, > payload_output: EncoderOutput::Owned(None), > - state: EncoderState::default(), > - parent: None, > + state: vec![EncoderState::default()], > finished: false, > file_copy_buffer: Arc::new(Mutex::new(unsafe { > crate::util::vec_new_uninitialized(1024 * 1024) > @@ -327,7 +311,8 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { > }; > =20 > this.encode_metadata(metadata).await?; > - this.state.files_offset =3D this.position(); > + let state =3D this.state_mut()?; > + state.files_offset =3D state.position(); > =20 > if let Some(payload_output) =3D payload_output { > this.payload_output =3D EncoderOutput::Owned(Some(payload_ou= tput)); > @@ -337,13 +322,38 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { > } > =20 > fn check(&self) -> io::Result<()> { > - match self.state.encode_error { > + if self.finished { > + io_bail!("unexpected encoder finished state"); > + } > + let state =3D self.state()?; > + match state.encode_error { > Some(EncodeError::IncompleteFile) =3D> io_bail!("incomplete = file"), > Some(EncodeError::IncompleteDirectory) =3D> io_bail!("direct= ory not finalized"), > None =3D> Ok(()), > } > } > =20 > + fn state(&self) -> io::Result<&EncoderState> { > + self.state > + .last() > + .ok_or_else(|| io_format_err!("encoder state stack underflow= ")) > + } > + > + fn state_mut(&mut self) -> io::Result<&mut EncoderState> { > + self.state > + .last_mut() > + .ok_or_else(|| io_format_err!("encoder state stack underflow= ")) > + } > + > + fn output_state(&mut self) -> io::Result<(&mut T, &mut EncoderState)= > { > + Ok(( > + self.output.as_mut(), > + self.state > + .last_mut() > + .ok_or_else(|| io_format_err!("encoder state stack under= flow"))?, > + )) > + } > + we could have another helper here that also returns the Option<&mut T> for payload_output (while not used as often, it might still be a good idea for readability): diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs index b0ec877..e8c5faa 100644 --- a/src/encoder/mod.rs +++ b/src/encoder/mod.rs @@ -387,6 +387,16 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { )) } =20 + fn payload_output_state(&mut self) -> io::Result<(&mut T, Option<&mut = T>, &mut EncoderState)> { + Ok(( + self.output.as_mut(), + self.payload_output.as_mut().as_mut(), + self.state + .last_mut() + .ok_or_else(|| io_format_err!("encoder state stack underfl= ow"))?, + )) + } + pub async fn create_file<'b>( &'b mut self, metadata: &Metadata, @@ -414,12 +424,9 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { let file_offset =3D self.state()?.position(); self.start_file_do(Some(metadata), file_name).await?; =20 - if let Some(payload_output) =3D self.payload_output.as_mut() { - let state =3D self - .state - .last_mut() - .ok_or_else(|| io_format_err!("encoder state stack underfl= ow"))?; + let (output, payload_output, state) =3D self.payload_output_state(= )?; =20 + if let Some(payload_output) =3D payload_output { // Position prior to the payload header let payload_position =3D state.payload_position(); =20 @@ -435,7 +442,7 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { =20 // Write ref to metadata archive seq_write_pxar_entry( - self.output.as_mut(), + output, format::PXAR_PAYLOAD_REF, &payload_ref.data(), &mut state.write_position, @@ -444,21 +451,18 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { } else { let header =3D format::Header::with_content_size(format::PXAR_= PAYLOAD, file_size); header.check_header_size()?; - let (output, state) =3D self.output_state()?; seq_write_struct(output, header, &mut state.write_position).aw= ait?; } =20 - let state =3D self - .state - .last_mut() - .ok_or_else(|| io_format_err!("encoder state stack underflow")= )?; + let (output, payload_output, state) =3D self.payload_output_state(= )?; + let payload_data_offset =3D state.position(); =20 let meta_size =3D payload_data_offset - file_offset; =20 Ok(FileImpl { - output: self.output.as_mut(), - payload_output: self.payload_output.as_mut().as_mut(), + output, + payload_output, goodbye_item: GoodbyeItem { hash: format::hash_filename(file_name), offset: file_offset, > [..]