From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 4563CA0BFD for ; Thu, 9 Nov 2023 19:47:06 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 4183C18640 for ; Thu, 9 Nov 2023 19:46:35 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Thu, 9 Nov 2023 19:46:32 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 99CD4478C0 for ; Thu, 9 Nov 2023 19:46:32 +0100 (CET) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Thu, 9 Nov 2023 19:45:53 +0100 Message-Id: <20231109184614.1611127-6-c.ebner@proxmox.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20231109184614.1611127-1-c.ebner@proxmox.com> References: <20231109184614.1611127-1-c.ebner@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.067 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: [pbs-devel] [PATCH v4 pxar 5/26] fix #3174: enc/dec: impl PXAR_APPENDIX entrytype X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 09 Nov 2023 18:47:06 -0000 Add an additional entry type for marking the start of a pxar archive appendix section. The appendix is a concatenation of possibly uncorrelated chunks, therefore not following the pxar archive format anymore. The appendix is only used to access the file metadata and payloads when a PXAR_APPENDIX_REF entry is encountered in the archive before this point. Signed-off-by: Christian Ebner --- Changes since v3: - specify u64 as AppendixRefOffset for full_size in add_appendix Changes since v2: - no changes Changes since v1: - Use custom type for appendix start offset instead of raw `u64` examples/mk-format-hashes.rs | 1 + src/decoder/mod.rs | 9 +++++++++ src/encoder/aio.rs | 12 +++++++++++- src/encoder/mod.rs | 33 +++++++++++++++++++++++++++++++++ src/encoder/sync.rs | 12 +++++++++++- src/format/mod.rs | 7 +++++++ src/lib.rs | 4 ++++ 7 files changed, 76 insertions(+), 2 deletions(-) diff --git a/examples/mk-format-hashes.rs b/examples/mk-format-hashes.rs index 8b4f5de..f068edd 100644 --- a/examples/mk-format-hashes.rs +++ b/examples/mk-format-hashes.rs @@ -12,6 +12,7 @@ const CONSTANTS: &[(&str, &str, &str)] = &[ "__PROXMOX_FORMAT_ENTRY__", ), ("", "PXAR_FILENAME", "__PROXMOX_FORMAT_FILENAME__"), + ("", "PXAR_APPENDIX", "__PROXMOX_FORMAT_APPENDIX__"), ("", "PXAR_SYMLINK", "__PROXMOX_FORMAT_SYMLINK__"), ("", "PXAR_DEVICE", "__PROXMOX_FORMAT_DEVICE__"), ("", "PXAR_XATTR", "__PROXMOX_FORMAT_XATTR__"), diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs index 70a8697..43a1122 100644 --- a/src/decoder/mod.rs +++ b/src/decoder/mod.rs @@ -295,6 +295,7 @@ impl DecoderImpl { continue; } } + format::PXAR_APPENDIX => return Ok(Some(self.entry.take())), _ => io_bail!( "expected filename or directory-goodbye pxar entry, got: {}", self.current_header, @@ -546,6 +547,14 @@ impl DecoderImpl { self.entry.kind = EntryKind::Device(self.read_device().await?); return Ok(ItemResult::Entry); } + format::PXAR_APPENDIX => { + let bytes = self.read_entry_as_bytes().await?; + let total = u64::from_le_bytes(bytes[0..8].try_into().unwrap()); + self.entry.kind = EntryKind::Appendix { + total, + }; + return Ok(ItemResult::Entry); + } format::PXAR_PAYLOAD => { let offset = seq_read_position(&mut self.input).await.transpose()?; self.entry.kind = EntryKind::File { diff --git a/src/encoder/aio.rs b/src/encoder/aio.rs index 66ea535..9cc26e0 100644 --- a/src/encoder/aio.rs +++ b/src/encoder/aio.rs @@ -5,7 +5,7 @@ use std::path::Path; use std::pin::Pin; use std::task::{Context, Poll}; -use crate::encoder::{self, AppendixRefOffset, LinkOffset, SeqWrite}; +use crate::encoder::{self, AppendixRefOffset, AppendixStartOffset, LinkOffset, SeqWrite}; use crate::format; use crate::Metadata; @@ -124,6 +124,16 @@ impl<'a, T: SeqWrite + 'a> Encoder<'a, T> { .await } + /// Add the appendix start entry marker + /// + /// Returns the LinkOffset pointing after the entry, the appendix start offset + pub async fn add_appendix( + &mut self, + full_size: AppendixRefOffset, + ) -> io::Result { + self.inner.add_appendix(full_size).await + } + /// Add a symbolic link to the archive. pub async fn add_symlink, PT: AsRef>( &mut self, diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs index 78612a6..1d5fe82 100644 --- a/src/encoder/mod.rs +++ b/src/encoder/mod.rs @@ -65,6 +65,15 @@ impl AppendixRefOffset { /// Offset pointing to the start of the appendix section of the archive. #[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)] pub struct AppendixStartOffset(u64); + +impl AppendixStartOffset { + /// Get the raw byte start offset for this appenidx section. + #[inline] + pub fn raw(self) -> u64 { + self.0 + } +} + /// Sequential write interface used by the encoder's state machine. /// /// This is our internal writer trait which is available for `std::io::Write` types in the @@ -510,6 +519,30 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { Ok(()) } + /// Add the appendix start entry marker + /// + /// Returns the AppendixStartOffset pointing after the entry, the start of the appendix + /// section of the archive. + pub async fn add_appendix( + &mut self, + full_size: AppendixRefOffset, + ) -> io::Result { + self.check()?; + + let data = &full_size.raw().to_le_bytes().to_vec(); + seq_write_pxar_entry( + self.output.as_mut(), + format::PXAR_APPENDIX, + &data, + &mut self.state.write_position, + ) + .await?; + + let offset = self.position(); + + Ok(AppendixStartOffset(offset)) + } + /// Return a file offset usable with `add_hardlink`. pub async fn add_symlink( &mut self, diff --git a/src/encoder/sync.rs b/src/encoder/sync.rs index 2c9ea2b..c7a7acb 100644 --- a/src/encoder/sync.rs +++ b/src/encoder/sync.rs @@ -6,7 +6,7 @@ use std::pin::Pin; use std::task::{Context, Poll}; use crate::decoder::sync::StandardReader; -use crate::encoder::{self, AppendixRefOffset, LinkOffset, SeqSink, SeqWrite}; +use crate::encoder::{self, AppendixRefOffset, AppendixStartOffset, LinkOffset, SeqSink, SeqWrite}; use crate::format; use crate::util::poll_result_once; use crate::Metadata; @@ -124,6 +124,16 @@ impl<'a, T: SeqWrite + 'a> Encoder<'a, T> { )) } + /// Add the appendix start entry marker + /// + /// Returns the LinkOffset pointing after the entry, the appendix start offset + pub async fn add_appendix( + &mut self, + full_size: AppendixRefOffset, + ) -> io::Result { + poll_result_once(self.inner.add_appendix(full_size)) + } + /// Add a symbolic link to the archive. pub fn add_symlink, PT: AsRef>( &mut self, diff --git a/src/format/mod.rs b/src/format/mod.rs index 5eb7562..8254df9 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -35,6 +35,12 @@ //! * `` -- serialization of the second directory entry //! * ... //! * `GOODBYE` -- lookup table at the end of a list of directory entries +//! +//! For backups referencing previous backups to skip file payloads, the archive is followed by a +//! appendix maker after which the concatinated pxar archive fragments containing the file payloads +//! are appended. They are NOT guaranteed to follow the full pxar structure and should only be +//! used to extract the file payloads by given offset. +//! * `APPENDIX` -- pxar archive fragments containing file payloads use std::cmp::Ordering; use std::ffi::{CStr, OsStr}; @@ -85,6 +91,7 @@ pub const PXAR_ENTRY: u64 = 0xd5956474e588acef; /// Previous version of the entry struct pub const PXAR_ENTRY_V1: u64 = 0x11da850a1c1cceff; pub const PXAR_FILENAME: u64 = 0x16701121063917b3; +pub const PXAR_APPENDIX: u64 = 0x9ff6c9507864b38d; pub const PXAR_SYMLINK: u64 = 0x27f971e7dbf5dc5f; pub const PXAR_DEVICE: u64 = 0x9fc9e906586d5ce9; pub const PXAR_XATTR: u64 = 0x0dab0229b57dcd03; diff --git a/src/lib.rs b/src/lib.rs index fa84e7a..035f995 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -372,6 +372,10 @@ pub enum EntryKind { file_size: u64, }, + Appendix { + total: u64, + }, + /// Directory entry. When iterating through an archive, the contents follow next. Directory, -- 2.39.2