From: "Fabian Grünbichler" <f.gruenbichler@proxmox.com>
To: Proxmox Backup Server development discussion
<pbs-devel@lists.proxmox.com>
Subject: Re: [pbs-devel] [RFC v2 pxar 06/36] encoder: move to stack based state tracking
Date: Mon, 11 Mar 2024 14:21:50 +0100 [thread overview]
Message-ID: <1710159295.f7lna23xv8.astroid@yuna.none> (raw)
In-Reply-To: <20240305092703.126906-7-c.ebner@proxmox.com>
On March 5, 2024 10:26 am, Christian Ebner wrote:
> In preparation for the proxmox-backup-client look-ahead caching,
> where a passing around of different encoder instances with internal
> references is not feasible.
>
> Instead of creating a new encoder instance for each directory level
> and keeping references to the parent state, use an internal stack.
>
> This is a breaking change in the pxar library API.
>
> Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
> ---
> changes since version 1:
> - fix incorrect docu comment
>
> examples/pxarcmd.rs | 6 +-
> src/encoder/aio.rs | 20 ++--
> src/encoder/mod.rs | 246 +++++++++++++++++++++++++-------------------
> src/encoder/sync.rs | 10 +-
> 4 files changed, 158 insertions(+), 124 deletions(-)
>
> diff --git a/examples/pxarcmd.rs b/examples/pxarcmd.rs
> index e0c779d..dcf3c44 100644
> --- a/examples/pxarcmd.rs
> +++ b/examples/pxarcmd.rs
> @@ -138,14 +138,14 @@ fn add_directory<'a, T: SeqWrite + 'a>(
>
> let meta = Metadata::from(&file_meta);
> if file_type.is_dir() {
> - let mut dir = encoder.create_directory(file_name, &meta)?;
> + encoder.create_directory(file_name, &meta)?;
> add_directory(
> - &mut dir,
> + encoder,
> std::fs::read_dir(file_path)?,
> root_path,
> &mut *hardlinks,
> )?;
> - dir.finish()?;
> + encoder.finish()?;
> } else if file_type.is_symlink() {
> todo!("symlink handling");
> } else if file_type.is_file() {
> diff --git a/src/encoder/aio.rs b/src/encoder/aio.rs
> index 82b9ab2..60b11cd 100644
> --- a/src/encoder/aio.rs
> +++ b/src/encoder/aio.rs
> @@ -105,17 +105,14 @@ impl<'a, T: SeqWrite + 'a> Encoder<'a, T> {
> &mut self,
> file_name: P,
> metadata: &Metadata,
> - ) -> io::Result<Encoder<'_, T>> {
> - Ok(Encoder {
> - inner: self
> - .inner
> - .create_directory(file_name.as_ref(), metadata)
> - .await?,
> - })
> + ) -> io::Result<()> {
> + self.inner
> + .create_directory(file_name.as_ref(), metadata)
> + .await
> }
>
> - /// Finish this directory. This is mandatory, otherwise the `Drop` handler will `panic!`.
> - pub async fn finish(self) -> io::Result<()> {
> + /// Finish this directory. This is mandatory, encodes the end for the current directory.
> + pub async fn finish(&mut self) -> io::Result<()> {
> self.inner.finish().await
> }
>
> @@ -302,11 +299,12 @@ mod test {
> .await
> .unwrap();
> {
> - let mut dir = encoder
> + encoder
> .create_directory("baba", &Metadata::dir_builder(0o700).build())
> .await
> .unwrap();
> - dir.create_file(&Metadata::file_builder(0o755).build(), "abab", 1024)
> + encoder
> + .create_file(&Metadata::file_builder(0o755).build(), "abab", 1024)
> .await
> .unwrap();
> }
> diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs
> index e4ea69b..962087a 100644
> --- a/src/encoder/mod.rs
> +++ b/src/encoder/mod.rs
> @@ -227,6 +227,16 @@ struct EncoderState {
> }
>
> impl EncoderState {
> + #[inline]
> + fn position(&self) -> u64 {
> + self.write_position
> + }
> +
> + #[inline]
> + fn payload_position(&self) -> u64 {
> + self.payload_write_position
> + }
> +
> fn merge_error(&mut self, error: Option<EncodeError>) {
> // one error is enough:
> if self.encode_error.is_none() {
> @@ -244,16 +254,6 @@ pub(crate) enum EncoderOutput<'a, T> {
> Borrowed(&'a mut T),
> }
>
> -impl<'a, T> EncoderOutput<'a, T> {
> - #[inline]
> - fn to_borrowed_mut<'s>(&'s mut self) -> EncoderOutput<'s, T>
> - where
> - 'a: 's,
> - {
> - EncoderOutput::Borrowed(self.as_mut())
> - }
> -}
> -
> impl<'a, T> std::convert::AsMut<T> for EncoderOutput<'a, T> {
> fn as_mut(&mut self) -> &mut T {
> match self {
> @@ -282,8 +282,8 @@ impl<'a, T> std::convert::From<&'a mut T> for EncoderOutput<'a, T> {
> pub(crate) struct EncoderImpl<'a, T: SeqWrite + 'a> {
> output: EncoderOutput<'a, T>,
> payload_output: EncoderOutput<'a, Option<T>>,
> - state: EncoderState,
> - parent: Option<&'a mut EncoderState>,
> + /// EncoderState stack storing the state for each directory level
> + state: Vec<EncoderState>,
> finished: bool,
>
> /// Since only the "current" entry can be actively writing files, we share the file copy
> @@ -291,21 +291,6 @@ pub(crate) struct EncoderImpl<'a, T: SeqWrite + 'a> {
> file_copy_buffer: Arc<Mutex<Vec<u8>>>,
> }
>
> -impl<'a, T: SeqWrite + 'a> Drop for EncoderImpl<'a, T> {
> - fn drop(&mut self) {
> - if let Some(ref mut parent) = self.parent {
> - // propagate errors:
> - parent.merge_error(self.state.encode_error);
> - if !self.finished {
> - parent.add_error(EncodeError::IncompleteDirectory);
> - }
> - } else if !self.finished {
> - // FIXME: how do we deal with this?
> - // eprintln!("Encoder dropped without finishing!");
> - }
> - }
> -}
> -
> impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
> pub async fn new(
> output: EncoderOutput<'a, T>,
> @@ -317,8 +302,7 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
> let mut this = Self {
> output,
> payload_output: EncoderOutput::Owned(None),
> - state: EncoderState::default(),
> - parent: None,
> + state: vec![EncoderState::default()],
> finished: false,
> file_copy_buffer: Arc::new(Mutex::new(unsafe {
> crate::util::vec_new_uninitialized(1024 * 1024)
> @@ -326,7 +310,11 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
> };
>
> this.encode_metadata(metadata).await?;
> - this.state.files_offset = this.position();
> + let state = this
> + .state
> + .last_mut()
> + .ok_or_else(|| io_format_err!("encoder state stack underflow"))?;
nit: this could just use this.state_mut()
> + state.files_offset = state.position();
>
> Ok(this)
> }
> @@ -337,13 +325,32 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
> }
>
> fn check(&self) -> io::Result<()> {
> - match self.state.encode_error {
> + if self.finished {
> + io_bail!("unexpected encoder finished state");
> + }
> + let state = self
> + .state
> + .last()
> + .ok_or_else(|| io_format_err!("encoder state stack underflow"))?;
> + match state.encode_error {
nit: this could just use self.state()
> Some(EncodeError::IncompleteFile) => io_bail!("incomplete file"),
> Some(EncodeError::IncompleteDirectory) => io_bail!("directory not finalized"),
> None => Ok(()),
> }
> }
>
> + fn state(&self) -> io::Result<&EncoderState> {
> + self.state
> + .last()
> + .ok_or_else(|| io_format_err!("encoder state stack underflow"))
> + }
> +
> + fn state_mut(&mut self) -> io::Result<&mut EncoderState> {
> + self.state
> + .last_mut()
> + .ok_or_else(|| io_format_err!("encoder state stack underflow"))
> + }
> +
> pub async fn create_file<'b>(
> &'b mut self,
> metadata: &Metadata,
> @@ -368,26 +375,38 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
> {
> self.check()?;
>
> - let file_offset = self.position();
> + let file_offset = self.state()?.position();
> self.start_file_do(Some(metadata), file_name).await?;
>
> if self.payload_output.as_mut().is_some() {
> - let mut data = self.payload_position().to_le_bytes().to_vec();
> + let state = self
> + .state
> + .last_mut()
> + .ok_or_else(|| io_format_err!("encoder state stack underflow"))?;
this one here
> + let mut data = state.payload_position().to_le_bytes().to_vec();
> data.append(&mut file_size.to_le_bytes().to_vec());
> seq_write_pxar_entry(
> self.output.as_mut(),
> format::PXAR_PAYLOAD_REF,
> &data,
> - &mut self.state.write_position,
> + &mut state.write_position,
> )
> .await?;
> } else {
> let header = format::Header::with_content_size(format::PXAR_PAYLOAD, file_size);
> header.check_header_size()?;
> - seq_write_struct(self.output.as_mut(), header, &mut self.state.write_position).await?;
> + let state = self
> + .state
> + .last_mut()
> + .ok_or_else(|| io_format_err!("encoder state stack underflow"))?;
> + seq_write_struct(self.output.as_mut(), header, &mut state.write_position).await?;
and this one here can't use the state/state_mut helpers atm (because
those borrow self)..
> };
>
> - let payload_data_offset = self.position();
> + let state = self
> + .state
> + .last_mut()
> + .ok_or_else(|| io_format_err!("encoder state stack underflow"))?;
> + let payload_data_offset = state.position();
same here (and repeated quite a few times in the remainder of this
patch). it might be worth it to have another helper that gives you
(&mut output, &mut state) for payload or regular output?
e.g., something like this (or as two helpers dropping the bool
parameter):
+ fn output_state(&mut self, payload: bool) -> io::Result<(&mut T, &mut EncoderState)> {
+ Ok((
+ if payload {
+ self.output.as_mut()
+ } else {
+ self.payload_output.as_mut().as_mut().unwrap()
+ },
+ self.state
+ .last_mut()
+ .ok_or_else(|| io_format_err!("encoder state stack underflow"))?,
+ ))
+ }
>
> let meta_size = payload_data_offset - file_offset;
>
> @@ -400,7 +419,7 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
> size: file_size + meta_size,
> },
> remaining_size: file_size,
> - parent: &mut self.state,
> + parent: state,
> })
> }
>
[.. the rest just contains repeats of the pattern mentioned above]
next prev parent reply other threads:[~2024-03-11 13:21 UTC|newest]
Thread overview: 94+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-03-05 9:26 [pbs-devel] [RFC pxar proxmox-backup 00/36] fix #3174: improve file-level backup Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 pxar 01/36] format/examples: add PXAR_PAYLOAD_REF entry header Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 pxar 02/36] encoder: add optional output writer for file payloads Christian Ebner
2024-03-11 13:21 ` Fabian Grünbichler
2024-03-11 13:50 ` Christian Ebner
2024-03-11 15:41 ` Fabian Grünbichler
2024-03-05 9:26 ` [pbs-devel] [RFC v2 pxar 03/36] format/decoder: add method to read payload references Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 pxar 04/36] decoder: add optional payload input stream Christian Ebner
2024-03-11 13:21 ` Fabian Grünbichler
2024-03-11 14:05 ` Christian Ebner
2024-03-11 15:27 ` Fabian Grünbichler
2024-03-11 15:51 ` Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 pxar 05/36] accessor: " Christian Ebner
2024-03-11 13:21 ` Fabian Grünbichler
2024-03-05 9:26 ` [pbs-devel] [RFC v2 pxar 06/36] encoder: move to stack based state tracking Christian Ebner
2024-03-11 13:21 ` Fabian Grünbichler [this message]
2024-03-11 14:12 ` Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 pxar 07/36] encoder: add payload reference capability Christian Ebner
2024-03-11 13:21 ` Fabian Grünbichler
2024-03-11 14:15 ` Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 pxar 08/36] encoder: add payload position capability Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 pxar 09/36] encoder: add payload advance capability Christian Ebner
2024-03-11 13:22 ` Fabian Grünbichler
2024-03-11 14:22 ` Christian Ebner
2024-03-11 15:27 ` Fabian Grünbichler
2024-03-11 15:41 ` Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 pxar 10/36] encoder/format: finish payload stream with marker Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 11/36] client: pxar: switch to stack based encoder state Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 12/36] client: backup: factor out extension from backup target Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 13/36] client: backup: early check for fixed index type Christian Ebner
2024-03-11 14:57 ` Fabian Grünbichler
2024-03-11 15:12 ` Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 14/36] client: backup: split payload to dedicated stream Christian Ebner
2024-03-11 14:57 ` Fabian Grünbichler
2024-03-11 15:22 ` Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 15/36] client: restore: read payload from dedicated index Christian Ebner
2024-03-11 14:58 ` Fabian Grünbichler
2024-03-11 15:26 ` Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 16/36] tools: cover meta extension for pxar archives Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 17/36] restore: " Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 18/36] client: mount: make split pxar archives mountable Christian Ebner
2024-03-11 14:58 ` Fabian Grünbichler
2024-03-11 15:29 ` Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 19/36] api: datastore: refactor getting local chunk reader Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 20/36] api: datastore: attach optional payload " Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 21/36] catalog: shell: factor out pxar fuse reader instantiation Christian Ebner
2024-03-11 14:58 ` Fabian Grünbichler
2024-03-11 15:31 ` Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 22/36] catalog: shell: redirect payload reader for split streams Christian Ebner
2024-03-11 14:58 ` Fabian Grünbichler
2024-03-11 15:24 ` Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 23/36] www: cover meta extension for pxar archives Christian Ebner
2024-03-11 14:58 ` Fabian Grünbichler
2024-03-11 15:31 ` Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 24/36] index: fetch chunk form index by start/end-offset Christian Ebner
2024-03-12 8:50 ` Fabian Grünbichler
2024-03-14 8:23 ` Christian Ebner
2024-03-12 12:47 ` Dietmar Maurer
2024-03-12 12:51 ` Christian Ebner
2024-03-12 13:03 ` Dietmar Maurer
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 25/36] upload stream: impl reused chunk injector Christian Ebner
2024-03-13 9:43 ` Dietmar Maurer
2024-03-14 14:03 ` Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 26/36] client: chunk stream: add chunk injection queues Christian Ebner
2024-03-12 9:46 ` Fabian Grünbichler
2024-03-19 10:52 ` Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 27/36] client: implement prepare reference method Christian Ebner
2024-03-12 10:07 ` Fabian Grünbichler
2024-03-19 11:51 ` Christian Ebner
2024-03-19 12:49 ` Fabian Grünbichler
2024-03-20 8:37 ` Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 28/36] client: pxar: implement store to insert chunks on caching Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 29/36] client: pxar: add previous reference to archiver Christian Ebner
2024-03-12 12:12 ` Fabian Grünbichler
2024-03-12 12:25 ` Christian Ebner
2024-03-19 12:59 ` Christian Ebner
2024-03-19 13:04 ` Fabian Grünbichler
2024-03-20 8:52 ` Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 30/36] client: pxar: add method for metadata comparison Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 31/36] specs: add backup detection mode specification Christian Ebner
2024-03-12 12:17 ` Fabian Grünbichler
2024-03-12 12:31 ` Christian Ebner
2024-03-20 9:28 ` Christian Ebner
2024-03-05 9:26 ` [pbs-devel] [RFC v2 proxmox-backup 32/36] pxar: caching: add look-ahead cache types Christian Ebner
2024-03-05 9:27 ` [pbs-devel] [RFC v2 proxmox-backup 33/36] client: pxar: add look-ahead caching Christian Ebner
2024-03-12 14:08 ` Fabian Grünbichler
2024-03-20 10:28 ` Christian Ebner
2024-03-05 9:27 ` [pbs-devel] [RFC v2 proxmox-backup 34/36] fix #3174: client: pxar: enable caching and meta comparison Christian Ebner
2024-03-13 11:12 ` Fabian Grünbichler
2024-03-05 9:27 ` [pbs-devel] [RFC v2 proxmox-backup 35/36] test-suite: add detection mode change benchmark Christian Ebner
2024-03-13 11:48 ` Fabian Grünbichler
2024-03-05 9:27 ` [pbs-devel] [RFC v2 proxmox-backup 36/36] test-suite: Add bin to deb, add shell completions Christian Ebner
2024-03-13 11:18 ` Fabian Grünbichler
2024-03-13 11:44 ` [pbs-devel] [RFC pxar proxmox-backup 00/36] fix #3174: improve file-level backup Fabian Grünbichler
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1710159295.f7lna23xv8.astroid@yuna.none \
--to=f.gruenbichler@proxmox.com \
--cc=pbs-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.