From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id D3059987BC for ; Mon, 9 Oct 2023 13:52:12 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id A1E8616B4D for ; Mon, 9 Oct 2023 13:52:12 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Mon, 9 Oct 2023 13:52:10 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id DA8BA4493E for ; Mon, 9 Oct 2023 13:52:09 +0200 (CEST) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Mon, 9 Oct 2023 13:51:23 +0200 Message-Id: <20231009115139.1417886-8-c.ebner@proxmox.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20231009115139.1417886-1-c.ebner@proxmox.com> References: <20231009115139.1417886-1-c.ebner@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.086 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [RFC v2 pxar 7/23] fix #3174: enc/dec: impl PXAR_APPENDIX_TAIL entrytype X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 09 Oct 2023 11:52:12 -0000 The PXAR_APPENDIX_TAIL entry marks pxar archives containing an appendix section. It has the same size as a goodbye tail marker item in order to be able to easily read and distinguish archives with and without such section. This also implements the accessor used by e.g. the fuse implementation to perform random io on the archive. The accessor reads the last entry and stores the appendix offset if needed, in order to recalculate the actual file payload offset within the archive when encountering a appendix reference entry in the archive. Signed-off-by: Christian Ebner --- Changes since v1: - adapt to custom type for appendix start offset examples/mk-format-hashes.rs | 5 ++++ examples/pxarcmd.rs | 4 ++-- src/accessor/mod.rs | 46 ++++++++++++++++++++++++++++++++++++ src/encoder/aio.rs | 6 ++--- src/encoder/mod.rs | 19 ++++++++++++++- src/encoder/sync.rs | 4 ++-- src/format/mod.rs | 4 ++++ 7 files changed, 80 insertions(+), 8 deletions(-) diff --git a/examples/mk-format-hashes.rs b/examples/mk-format-hashes.rs index f068edd..7fb938d 100644 --- a/examples/mk-format-hashes.rs +++ b/examples/mk-format-hashes.rs @@ -57,6 +57,11 @@ const CONSTANTS: &[(&str, &str, &str)] = &[ "PXAR_GOODBYE_TAIL_MARKER", "__PROXMOX_FORMAT_PXAR_GOODBYE_TAIL_MARKER__", ), + ( + "Marks the end of an archive containing an appendix section", + "PXAR_APPENDIX_TAIL", + "__PROXMOX_FORMAT_APPENDIX_TAIL__", + ), ]; fn main() { diff --git a/examples/pxarcmd.rs b/examples/pxarcmd.rs index e0c779d..c7848cc 100644 --- a/examples/pxarcmd.rs +++ b/examples/pxarcmd.rs @@ -105,7 +105,7 @@ fn cmd_create(mut args: std::env::ArgsOs) -> Result<(), Error> { let mut encoder = Encoder::create(file, &meta)?; add_directory(&mut encoder, dir, &dir_path, &mut HashMap::new())?; - encoder.finish()?; + encoder.finish(None)?; Ok(()) } @@ -145,7 +145,7 @@ fn add_directory<'a, T: SeqWrite + 'a>( root_path, &mut *hardlinks, )?; - dir.finish()?; + dir.finish(None)?; } else if file_type.is_symlink() { todo!("symlink handling"); } else if file_type.is_file() { diff --git a/src/accessor/mod.rs b/src/accessor/mod.rs index 6a2de73..1c19d7b 100644 --- a/src/accessor/mod.rs +++ b/src/accessor/mod.rs @@ -182,6 +182,7 @@ pub(crate) struct AccessorImpl { input: T, size: u64, caches: Arc, + appendix_offset: Option, } impl AccessorImpl { @@ -190,10 +191,22 @@ impl AccessorImpl { io_bail!("too small to contain a pxar archive"); } + let tail_offset = size - (size_of::() as u64); + let tail: GoodbyeItem = read_entry_at(&input, tail_offset).await?; + + let (appendix_offset, size) = if tail.hash == format::PXAR_APPENDIX_TAIL { + (Some(tail.offset), size - 40) + } else if tail.hash != format::PXAR_GOODBYE_TAIL_MARKER { + io_bail!("no goodbye tail marker found"); + } else { + (None, size) + }; + Ok(Self { input, size, caches: Arc::new(Caches::default()), + appendix_offset, }) } @@ -207,6 +220,7 @@ impl AccessorImpl { self.size, "/".into(), Arc::clone(&self.caches), + self.appendix_offset, ) .await } @@ -263,6 +277,7 @@ impl AccessorImpl { self.size, "/".into(), Arc::clone(&self.caches), + self.appendix_offset, ) .await } @@ -274,6 +289,7 @@ impl AccessorImpl { offset, "/".into(), Arc::clone(&self.caches), + self.appendix_offset, ) .await } @@ -369,6 +385,7 @@ pub(crate) struct DirectoryImpl { table: Arc<[GoodbyeItem]>, path: PathBuf, caches: Arc, + appendix_offset: Option, } impl DirectoryImpl { @@ -378,6 +395,7 @@ impl DirectoryImpl { end_offset: u64, path: PathBuf, caches: Arc, + appendix_offset: Option, ) -> io::Result> { let tail = Self::read_tail_entry(&input, end_offset).await?; @@ -407,6 +425,7 @@ impl DirectoryImpl { table: table.as_ref().map_or_else(|| Arc::new([]), Arc::clone), path, caches, + appendix_offset, }; // sanity check: @@ -516,6 +535,32 @@ impl DirectoryImpl { .next() .await .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??; + + if let EntryKind::AppendixRef { + appendix_offset, + file_size, + } = entry.kind() + { + let appendix_start = match self.appendix_offset { + Some(appendix_start) => appendix_start, + None => io_bail!("missing required appendix start offset information"), + }; + + let name = file_name.ok_or_else(|| io_format_err!("missing required filename"))?; + let c_string = std::ffi::CString::new(name.as_os_str().as_bytes())?; + let start = + appendix_start + appendix_offset + 16 + c_string.as_bytes_with_nul().len() as u64; + let end = start + file_size; + decoder = self.get_decoder(start..end, file_name).await?; + + let entry = decoder + .next() + .await + .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??; + + return Ok((entry, decoder)); + } + Ok((entry, decoder)) } @@ -698,6 +743,7 @@ impl FileEntryImpl { self.entry_range_info.entry_range.end, self.entry.path.clone(), Arc::clone(&self.caches), + None, ) .await } diff --git a/src/encoder/aio.rs b/src/encoder/aio.rs index 8ff1364..48ba857 100644 --- a/src/encoder/aio.rs +++ b/src/encoder/aio.rs @@ -108,8 +108,8 @@ impl<'a, T: SeqWrite + 'a> Encoder<'a, T> { } /// Finish this directory. This is mandatory, otherwise the `Drop` handler will `panic!`. - pub async fn finish(self) -> io::Result<()> { - self.inner.finish().await + pub async fn finish(self, appendix_tail: Option<(AppendixStartOffset, u64)>) -> io::Result<()> { + self.inner.finish(appendix_tail).await } /// Add size to encoders position and return new position. @@ -327,7 +327,7 @@ mod test { .await .unwrap(); } - encoder.finish().await.unwrap(); + encoder.finish(None).await.unwrap(); }; fn test_send(_: T) {} diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs index de23728..532b906 100644 --- a/src/encoder/mod.rs +++ b/src/encoder/mod.rs @@ -869,7 +869,10 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { .await } - pub async fn finish(mut self) -> io::Result<()> { + pub async fn finish( + mut self, + appendix_tail: Option<(AppendixStartOffset, u64)>, + ) -> io::Result<()> { let tail_bytes = self.finish_goodbye_table().await?; seq_write_pxar_entry( self.output.as_mut(), @@ -879,6 +882,20 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { ) .await?; + if let Some((appendix_start_offset, size)) = appendix_tail { + let mut appendix_tail = Vec::new(); + appendix_tail.append(&mut format::PXAR_APPENDIX_TAIL.to_le_bytes().to_vec()); + appendix_tail.append(&mut appendix_start_offset.raw().to_le_bytes().to_vec()); + appendix_tail.append(&mut size.to_le_bytes().to_vec()); + seq_write_pxar_entry( + self.output.as_mut(), + format::PXAR_GOODBYE, + &appendix_tail, + &mut self.state.write_position, + ) + .await?; + } + if let EncoderOutput::Owned(output) = &mut self.output { flush(output).await?; } diff --git a/src/encoder/sync.rs b/src/encoder/sync.rs index a7f6b20..97b4e7c 100644 --- a/src/encoder/sync.rs +++ b/src/encoder/sync.rs @@ -106,8 +106,8 @@ impl<'a, T: SeqWrite + 'a> Encoder<'a, T> { } /// Finish this directory. This is mandatory, otherwise the `Drop` handler will `panic!`. - pub fn finish(self) -> io::Result<()> { - poll_result_once(self.inner.finish()) + pub fn finish(self, appendix_tail: Option<(AppendixStartOffset, u64)>) -> io::Result<()> { + poll_result_once(self.inner.finish(appendix_tail)) } /// Add size to encoders position and return new position. diff --git a/src/format/mod.rs b/src/format/mod.rs index 8254df9..8016ab1 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -41,6 +41,8 @@ //! are appended. They are NOT guaranteed to follow the full pxar structure and should only be //! used to extract the file payloads by given offset. //! * `APPENDIX` -- pxar archive fragments containing file payloads +//! * final goodbye table +//! * `APPENDIX_TAIL` -- marks the end of an archive containing a APPENDIX section use std::cmp::Ordering; use std::ffi::{CStr, OsStr}; @@ -113,6 +115,8 @@ pub const PXAR_APPENDIX_REF: u64 = 0x849b4a17e0234f8e; pub const PXAR_GOODBYE: u64 = 0x2fec4fa642d5731d; /// The end marker used in the GOODBYE object pub const PXAR_GOODBYE_TAIL_MARKER: u64 = 0xef5eed5b753e1555; +/// Marks the end of an archive containing an appendix section +pub const PXAR_APPENDIX_TAIL: u64 = 0x5b1b9abb7ae454f1; #[derive(Debug, Endian)] #[repr(C)] -- 2.39.2