From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id CF4501FF38E for ; Tue, 11 Jun 2024 10:29:04 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 8B16132C3B; Tue, 11 Jun 2024 10:29:38 +0200 (CEST) Date: Tue, 11 Jun 2024 10:29:01 +0200 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Proxmox Backup Server development discussion References: <20240610151157.529760-1-c.ebner@proxmox.com> <20240610151157.529760-2-c.ebner@proxmox.com> In-Reply-To: <20240610151157.529760-2-c.ebner@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.16.0 (https://github.com/astroidmail/astroid) Message-Id: <1718094453.u0pexqcqf8.astroid@yuna.none> X-SPAM-LEVEL: Spam detection results: 0 AWL 0.057 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [aio.rs, sync.rs, proxmox.com, mod.rs] Subject: Re: [pbs-devel] [PATCH pxar 1/2] decoder: move payload header check for split input X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" On June 10, 2024 5:11 pm, Christian Ebner wrote: > The payload entries in the payload output for split pxar archives are > separated by payload headers, which allow to perform consistency > checks for the payload references encoded in the metadata archive. > > Currently, this consistency check is performed right after reading the > entry in the metadata archive, which however has the downside that the > payload has to be fetched and decoded just for this consistency check. > This greatly impacts performance when accessing a metadata archive > with attached payload input reader, e.g. in the fuse implementation to > mount pxar archives, being especially severe when accessed over the > network in combination with a remote chunk reader as the Proxmox > Backup Server does. > > Therefore, move this check to the contents reader instantiation > instead and add an additional flag to the decoder's `InPayload` state. > > Getting the decoder now needs to be async and the method must return > an error when the check fails. as discussed off-list - the accessor used by FUSE uses the content-range based (unsafe) way to access file contents, and that one is now lacking the payload marker checks. they can probably be added to the entry's content_range method that returns the ranges to be passed to the unsafe interface (and if any other caller uses that unsafe interface with other ranges, any brokenness is on them anyway ;)). > > Signed-off-by: Christian Ebner > --- > src/decoder/aio.rs | 4 +-- > src/decoder/mod.rs | 60 ++++++++++++++++++++++++++------------------- > src/decoder/sync.rs | 5 ++-- > 3 files changed, 40 insertions(+), 29 deletions(-) > > diff --git a/src/decoder/aio.rs b/src/decoder/aio.rs > index 3f9881d..19e7023 100644 > --- a/src/decoder/aio.rs > +++ b/src/decoder/aio.rs > @@ -60,8 +60,8 @@ impl Decoder { > } > > /// Get a reader for the contents of the current entry, if the entry has contents. > - pub fn contents(&mut self) -> Option> { > - self.inner.content_reader() > + pub async fn contents(&mut self) -> io::Result>> { > + self.inner.content_reader().await > } > > /// Get the size of the current contents, if the entry has contents. > diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs > index 46a21b8..8f92964 100644 > --- a/src/decoder/mod.rs > +++ b/src/decoder/mod.rs > @@ -182,6 +182,7 @@ enum State { > InPayload { > offset: u64, > size: u64, > + header_checked: bool, > }, > > /// file entries with no data (fifo, socket) > @@ -296,8 +297,12 @@ impl DecoderImpl { > // hierarchy and parse the next PXAR_FILENAME or the PXAR_GOODBYE: > self.read_next_item().await?; > } > - State::InPayload { offset, .. } => { > + State::InPayload { offset, header_checked, .. } => { > if self.input.payload().is_some() { > + if !header_checked { > + // header is only checked if payload has been accessed > + self.payload_consumed += size_of::
() as u64; > + } > // Update consumed payload as given by the offset referenced by the content reader > self.payload_consumed += offset; > } else { > @@ -370,19 +375,39 @@ impl DecoderImpl { > } > } > > - pub fn content_reader(&mut self) -> Option> { > - if let State::InPayload { offset, size } = &mut self.state { > - if self.input.payload().is_some() { > - Some(Contents::new( > + pub async fn content_reader(&mut self) -> Result>, io::Error> { > + if let State::InPayload { offset, size, header_checked } = &mut self.state { > + if let Some(payload_input) = self.input.payload_mut() { > + if !*header_checked { > + let header: Header = seq_read_entry(payload_input).await?; > + if header.htype != format::PXAR_PAYLOAD { > + io_bail!( > + "unexpected header in payload input: expected {} , got {header}", > + format::PXAR_PAYLOAD, > + ); > + } > + self.payload_consumed += size_of::
() as u64; > + > + if header.content_size() != *size { > + io_bail!( > + "encountered payload size mismatch: got {size}, expected {}", > + header.content_size(), > + ); > + } > + > + *header_checked = true; > + } > + > + Ok(Some(Contents::new( > self.input.payload_mut().unwrap(), > offset, > *size, > - )) > + ))) > } else { > - Some(Contents::new(self.input.archive_mut(), offset, *size)) > + Ok(Some(Contents::new(self.input.archive_mut(), offset, *size))) > } > } else { > - None > + Ok(None) > } > } > > @@ -621,6 +646,7 @@ impl DecoderImpl { > }; > self.state = State::InPayload { > offset: 0, > + header_checked: false, > size: self.current_header.content_size(), > }; > return Ok(ItemResult::Entry); > @@ -652,23 +678,6 @@ impl DecoderImpl { > let end = start + payload_ref.size + size_of::
() as u64; > payload_input.update_range(start..end); > } > - > - let header: Header = seq_read_entry(payload_input).await?; > - if header.htype != format::PXAR_PAYLOAD { > - io_bail!( > - "unexpected header in payload input: expected {} , got {header}", > - format::PXAR_PAYLOAD, > - ); > - } > - self.payload_consumed += size_of::
() as u64; > - > - if header.content_size() != payload_ref.size { > - io_bail!( > - "encountered payload size mismatch: got {}, expected {}", > - payload_ref.size, > - header.content_size(), > - ); > - } > } > > self.entry.kind = EntryKind::File { > @@ -678,6 +687,7 @@ impl DecoderImpl { > }; > self.state = State::InPayload { > offset: 0, > + header_checked: false, > size: payload_ref.size, > }; > return Ok(ItemResult::Entry); > diff --git a/src/decoder/sync.rs b/src/decoder/sync.rs > index 8779f87..1116fe8 100644 > --- a/src/decoder/sync.rs > +++ b/src/decoder/sync.rs > @@ -77,8 +77,9 @@ impl Decoder { > } > > /// Get a reader for the contents of the current entry, if the entry has contents. > - pub fn contents(&mut self) -> Option> { > - self.inner.content_reader().map(|inner| Contents { inner }) > + pub fn contents(&mut self) -> io::Result>> { > + let content_reader = poll_result_once(self.inner.content_reader())?; > + Ok(content_reader.map(|inner| Contents { inner })) > } > > /// Get the size of the current contents, if the entry has contents. > -- > 2.39.2 > > > > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel > > > _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel