From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id EA26098B81 for ; Wed, 15 Nov 2023 16:49:28 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 12033969E for ; Wed, 15 Nov 2023 16:48:35 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Wed, 15 Nov 2023 16:48:33 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id C684D432D5 for ; Wed, 15 Nov 2023 16:48:32 +0100 (CET) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Wed, 15 Nov 2023 16:47:59 +0100 Message-Id: <20231115154813.281564-15-c.ebner@proxmox.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20231115154813.281564-1-c.ebner@proxmox.com> References: <20231115154813.281564-1-c.ebner@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.057 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: [pbs-devel] [PATCH v5 proxmox-backup 14/28] fix #3174: catalog: add specialized Archive entry X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 15 Nov 2023 15:49:29 -0000 Introduces a specialized pxar directory entry type Archive, which extends the regular directory entry by storing an additional optional appendix start offset. The archive entry type is only used for the top most entries in the catalog, replacing the currently used directory entry. If this entry was created by reusing pxar file entries in an appendix section, the appendix start offset is present and can be used to easily locate and calculate the referenced file entries within the appendix section to access them from the catalog shell. Since the catalog might contain multiple archives, each archive entry stores its individual appendix start offset. Signed-off-by: Christian Ebner --- Changes since version 4: - no changes Changes since version 3: - no changes Changes since version 2: - Make sure DirEntryAttribute::Archive is not flagged as leaf node Changes since version 1: - This reworks the Appendix Offset impl of version 1 completely pbs-client/src/catalog_shell.rs | 1 + pbs-datastore/src/catalog.rs | 152 +++++++++++++++++++++++++++++++- 2 files changed, 150 insertions(+), 3 deletions(-) diff --git a/pbs-client/src/catalog_shell.rs b/pbs-client/src/catalog_shell.rs index 99416d2f..7deb9d9a 100644 --- a/pbs-client/src/catalog_shell.rs +++ b/pbs-client/src/catalog_shell.rs @@ -1144,6 +1144,7 @@ impl<'a> ExtractorState<'a> { }; match (did_match, &entry.attr) { + (_, DirEntryAttribute::Archive { .. }) | (_, DirEntryAttribute::Directory { .. }) => { self.handle_new_directory(entry, match_result?).await?; } diff --git a/pbs-datastore/src/catalog.rs b/pbs-datastore/src/catalog.rs index 8ae7c661..220313c6 100644 --- a/pbs-datastore/src/catalog.rs +++ b/pbs-datastore/src/catalog.rs @@ -18,6 +18,11 @@ use crate::file_formats::{PROXMOX_CATALOG_FILE_MAGIC_1_0, PROXMOX_CATALOG_FILE_M /// A file list catalog simply stores a directory tree. Such catalogs may be used as index to do a /// fast search for files. pub trait BackupCatalogWriter { + fn start_archive(&mut self, name: &CStr) -> Result<(), Error>; + fn end_archive( + &mut self, + appendix: Option, + ) -> Result<(), Error>; fn start_directory(&mut self, name: &CStr) -> Result<(), Error>; fn end_directory(&mut self) -> Result<(), Error>; fn add_file( @@ -50,6 +55,7 @@ pub enum CatalogEntryType { Directory = b'd', File = b'f', AppendixRef = b'r', + Archive = b'a', Symlink = b'l', Hardlink = b'h', BlockDevice = b'b', @@ -66,6 +72,7 @@ impl TryFrom for CatalogEntryType { b'd' => CatalogEntryType::Directory, b'f' => CatalogEntryType::File, b'r' => CatalogEntryType::AppendixRef, + b'a' => CatalogEntryType::Archive, b'l' => CatalogEntryType::Symlink, b'h' => CatalogEntryType::Hardlink, b'b' => CatalogEntryType::BlockDevice, @@ -83,6 +90,7 @@ impl From<&DirEntryAttribute> for CatalogEntryType { DirEntryAttribute::Directory { .. } => CatalogEntryType::Directory, DirEntryAttribute::File { .. } => CatalogEntryType::File, DirEntryAttribute::AppendixRef { .. } => CatalogEntryType::AppendixRef, + DirEntryAttribute::Archive { .. } => CatalogEntryType::Archive, DirEntryAttribute::Symlink => CatalogEntryType::Symlink, DirEntryAttribute::Hardlink => CatalogEntryType::Hardlink, DirEntryAttribute::BlockDevice => CatalogEntryType::BlockDevice, @@ -121,10 +129,22 @@ impl AppendixRefOffset { } } +#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)] +pub struct AppendixStartOffset { + offset: u64, +} + +impl AppendixStartOffset { + pub fn raw(&self) -> u64 { + self.offset + } +} + #[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)] pub enum Offset { FileOffset { offset: u64 }, AppendixRefOffset { offset: u64 }, + AppendixStartOffset { offset: u64 }, } /// Represents a named directory entry @@ -160,6 +180,10 @@ pub enum DirEntryAttribute { ctime: i64, appendix_ref_offset: AppendixRefOffset, }, + Archive { + start: u64, + appendix_offset: AppendixStartOffset, + }, Symlink, Hardlink, BlockDevice, @@ -236,6 +260,13 @@ impl DirEntry { }, } } + (CatalogEntryType::Archive, Some(Offset::AppendixStartOffset { offset })) => DirEntry { + name, + attr: DirEntryAttribute::Archive { + start, + appendix_offset: AppendixStartOffset { offset }, + }, + }, _ => panic!("unexpected parameters '{etype}' and '{offset:?}'"), } } @@ -246,6 +277,7 @@ impl DirEntry { DirEntryAttribute::Directory { .. } => pxar::mode::IFDIR, DirEntryAttribute::File { .. } => pxar::mode::IFREG, DirEntryAttribute::AppendixRef { .. } => pxar::mode::IFREG, + DirEntryAttribute::Archive { .. } => pxar::mode::IFDIR, DirEntryAttribute::Symlink => pxar::mode::IFLNK, DirEntryAttribute::Hardlink => return None, DirEntryAttribute::BlockDevice => pxar::mode::IFBLK, @@ -258,6 +290,12 @@ impl DirEntry { /// Check if DirEntry is a directory pub fn is_directory(&self) -> bool { matches!(self.attr, DirEntryAttribute::Directory { .. }) + || matches!(self.attr, DirEntryAttribute::Archive { .. }) + } + + /// Check if DirEntry is an archive + pub fn is_archive(&self) -> bool { + matches!(self.attr, DirEntryAttribute::Archive { .. }) } /// Check if DirEntry is a symlink @@ -285,6 +323,20 @@ impl DirInfo { fn encode_entry(writer: &mut W, entry: &DirEntry, pos: u64) -> Result<(), Error> { match entry { + DirEntry { + name, + attr: + DirEntryAttribute::Archive { + start, + appendix_offset, + }, + } => { + writer.write_all(&[CatalogEntryType::Archive as u8])?; + catalog_encode_u64(writer, name.len() as u64)?; + writer.write_all(name)?; + catalog_encode_u64(writer, appendix_offset.raw())?; + catalog_encode_u64(writer, pos - start)?; + } DirEntry { name, attr: DirEntryAttribute::Directory { start }, @@ -427,6 +479,19 @@ impl DirInfo { cursor.read_exact(name)?; let cont = match etype { + CatalogEntryType::Archive => { + let offset = catalog_decode_u64(&mut cursor)?; + let start = catalog_decode_u64(&mut cursor)?; + callback( + etype, + name, + start, + 0, + 0, + 0, + Some(Offset::AppendixStartOffset { offset }), + )? + } CatalogEntryType::Directory => { let offset = catalog_decode_u64(&mut cursor)?; callback(etype, name, offset, 0, 0, 0, None)? @@ -533,6 +598,51 @@ impl CatalogWriter { } impl BackupCatalogWriter for CatalogWriter { + fn start_archive(&mut self, name: &CStr) -> Result<(), Error> { + let new = DirInfo::new(name.to_owned()); + self.dirstack.push(new); + Ok(()) + } + + fn end_archive( + &mut self, + appendix: Option, + ) -> Result<(), Error> { + let (start, name) = match self.dirstack.pop() { + Some(dir) => { + let start = self.pos; + let (name, data) = dir.encode(start)?; + self.write_all(&data)?; + (start, name) + } + None => { + bail!("got unexpected end_directory level 0"); + } + }; + + let current = self + .dirstack + .last_mut() + .ok_or_else(|| format_err!("outside root"))?; + let name = name.to_bytes().to_vec(); + let appendix_offset = if let Some(appendix) = appendix { + AppendixStartOffset { + offset: appendix.raw(), + } + } else { + AppendixStartOffset { offset: 0 } + }; + current.entries.push(DirEntry { + name, + attr: DirEntryAttribute::Archive { + start, + appendix_offset, + }, + }); + + Ok(()) + } + fn start_directory(&mut self, name: &CStr) -> Result<(), Error> { let new = DirInfo::new(name.to_owned()); self.dirstack.push(new); @@ -746,10 +856,33 @@ impl CatalogReader { }) } + pub fn appendix_offset( + &mut self, + archive_name: &[u8], + ) -> Result, Error> { + let root = self.root()?; + let dir_entry = self.lookup(&root, archive_name)?.unwrap(); + if let DirEntry { + attr: DirEntryAttribute::Archive { + appendix_offset, .. + }, + .. + } = dir_entry + { + if appendix_offset.raw() != 0 { + return Ok(Some(appendix_offset)); + } else { + return Ok(None); + } + } + Ok(None) + } + /// Read all directory entries pub fn read_dir(&mut self, parent: &DirEntry) -> Result, Error> { let start = match parent.attr { DirEntryAttribute::Directory { start } => start, + DirEntryAttribute::Archive { start, .. } => start, _ => bail!("parent is not a directory - internal error"), }; @@ -813,6 +946,7 @@ impl CatalogReader { ) -> Result, Error> { let start = match parent.attr { DirEntryAttribute::Directory { start } => start, + DirEntryAttribute::Archive { start, .. } => start, _ => bail!("parent is not a directory - internal error"), }; @@ -822,7 +956,7 @@ impl CatalogReader { DirInfo::parse( &data, self.magic, - |etype, name, offset, size, mtime, ctime, link_offset| { + |etype, name, offset, size, mtime, ctime, archive_offset| { if name != filename { return Ok(true); } @@ -834,7 +968,7 @@ impl CatalogReader { size, mtime, ctime, - link_offset, + archive_offset, ); item = Some(entry); Ok(false) // stop parsing @@ -868,6 +1002,14 @@ impl CatalogReader { path.push(name); match etype { + CatalogEntryType::Archive => { + log::info!("{} {:?}", etype, path); + if offset > start { + bail!("got wrong archive offset ({} > {})", offset, start); + } + let pos = start - offset; + self.dump_dir(&path, pos)?; + } CatalogEntryType::Directory => { log::info!("{} {:?}", etype, path); if offset > start { @@ -1208,7 +1350,11 @@ impl ArchiveEntry { Some(entry_type) => CatalogEntryType::from(entry_type).to_string(), None => "v".to_owned(), }, - leaf: !matches!(entry_type, None | Some(DirEntryAttribute::Directory { .. })), + leaf: !matches!( + entry_type, + None | Some(DirEntryAttribute::Directory { .. }) + | Some(DirEntryAttribute::Archive { .. }) + ), size, mtime: match entry_type { Some(DirEntryAttribute::File { mtime, .. }) => Some(*mtime), -- 2.39.2