From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <c.ebner@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id 7F8D090C74
 for <pbs-devel@lists.proxmox.com>; Thu, 25 Jan 2024 14:27:06 +0100 (CET)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id 14F3219C97
 for <pbs-devel@lists.proxmox.com>; Thu, 25 Jan 2024 14:26:36 +0100 (CET)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS
 for <pbs-devel@lists.proxmox.com>; Thu, 25 Jan 2024 14:26:31 +0100 (CET)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id EB492492CF
 for <pbs-devel@lists.proxmox.com>; Thu, 25 Jan 2024 14:26:29 +0100 (CET)
From: Christian Ebner <c.ebner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Date: Thu, 25 Jan 2024 14:25:53 +0100
Message-Id: <20240125132608.1172472-15-c.ebner@proxmox.com>
X-Mailer: git-send-email 2.39.2
In-Reply-To: <20240125132608.1172472-1-c.ebner@proxmox.com>
References: <20240125132608.1172472-1-c.ebner@proxmox.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.050 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 T_SCC_BODY_TEXT_LINE    -0.01 -
Subject: [pbs-devel] [PATCH v6 proxmox-backup 14/29] fix #3174: catalog: add
 specialized Archive entry
X-BeenThere: pbs-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Thu, 25 Jan 2024 13:27:06 -0000

Introduces a specialized pxar directory entry type Archive,
which extends the regular directory entry by storing an additional
optional appendix start offset.

The archive entry type is only used for the top most entries in the
catalog, replacing the currently used directory entry. If this entry
was created by reusing pxar file entries in an appendix section,
the appendix start offset is present and can be used to easily locate
and calculate the referenced file entries within the appendix section
to access them from the catalog shell.

Since the catalog might contain multiple archives, each archive entry
stores its individual appendix start offset.

Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
---
Changes since v5:
- no changes

 pbs-client/src/catalog_shell.rs |   2 +-
 pbs-datastore/src/catalog.rs    | 152 +++++++++++++++++++++++++++++++-
 2 files changed, 150 insertions(+), 4 deletions(-)

diff --git a/pbs-client/src/catalog_shell.rs b/pbs-client/src/catalog_shell.rs
index 8629a3a5..c9b49a9a 100644
--- a/pbs-client/src/catalog_shell.rs
+++ b/pbs-client/src/catalog_shell.rs
@@ -1082,7 +1082,7 @@ impl<'a> ExtractorState<'a> {
         };
 
         match (did_match, &entry.attr) {
-            (_, DirEntryAttribute::Directory { .. }) => {
+            (_, DirEntryAttribute::Archive { .. }) | (_, DirEntryAttribute::Directory { .. }) => {
                 self.handle_new_directory(entry, match_result?).await?;
             }
             (true, DirEntryAttribute::File { .. } | DirEntryAttribute::AppendixRef { .. }) => {
diff --git a/pbs-datastore/src/catalog.rs b/pbs-datastore/src/catalog.rs
index 8ae7c661..220313c6 100644
--- a/pbs-datastore/src/catalog.rs
+++ b/pbs-datastore/src/catalog.rs
@@ -18,6 +18,11 @@ use crate::file_formats::{PROXMOX_CATALOG_FILE_MAGIC_1_0, PROXMOX_CATALOG_FILE_M
 /// A file list catalog simply stores a directory tree. Such catalogs may be used as index to do a
 /// fast search for files.
 pub trait BackupCatalogWriter {
+    fn start_archive(&mut self, name: &CStr) -> Result<(), Error>;
+    fn end_archive(
+        &mut self,
+        appendix: Option<pxar::encoder::AppendixStartOffset>,
+    ) -> Result<(), Error>;
     fn start_directory(&mut self, name: &CStr) -> Result<(), Error>;
     fn end_directory(&mut self) -> Result<(), Error>;
     fn add_file(
@@ -50,6 +55,7 @@ pub enum CatalogEntryType {
     Directory = b'd',
     File = b'f',
     AppendixRef = b'r',
+    Archive = b'a',
     Symlink = b'l',
     Hardlink = b'h',
     BlockDevice = b'b',
@@ -66,6 +72,7 @@ impl TryFrom<u8> for CatalogEntryType {
             b'd' => CatalogEntryType::Directory,
             b'f' => CatalogEntryType::File,
             b'r' => CatalogEntryType::AppendixRef,
+            b'a' => CatalogEntryType::Archive,
             b'l' => CatalogEntryType::Symlink,
             b'h' => CatalogEntryType::Hardlink,
             b'b' => CatalogEntryType::BlockDevice,
@@ -83,6 +90,7 @@ impl From<&DirEntryAttribute> for CatalogEntryType {
             DirEntryAttribute::Directory { .. } => CatalogEntryType::Directory,
             DirEntryAttribute::File { .. } => CatalogEntryType::File,
             DirEntryAttribute::AppendixRef { .. } => CatalogEntryType::AppendixRef,
+            DirEntryAttribute::Archive { .. } => CatalogEntryType::Archive,
             DirEntryAttribute::Symlink => CatalogEntryType::Symlink,
             DirEntryAttribute::Hardlink => CatalogEntryType::Hardlink,
             DirEntryAttribute::BlockDevice => CatalogEntryType::BlockDevice,
@@ -121,10 +129,22 @@ impl AppendixRefOffset {
     }
 }
 
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
+pub struct AppendixStartOffset {
+    offset: u64,
+}
+
+impl AppendixStartOffset {
+    pub fn raw(&self) -> u64 {
+        self.offset
+    }
+}
+
 #[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
 pub enum Offset {
     FileOffset { offset: u64 },
     AppendixRefOffset { offset: u64 },
+    AppendixStartOffset { offset: u64 },
 }
 
 /// Represents a named directory entry
@@ -160,6 +180,10 @@ pub enum DirEntryAttribute {
         ctime: i64,
         appendix_ref_offset: AppendixRefOffset,
     },
+    Archive {
+        start: u64,
+        appendix_offset: AppendixStartOffset,
+    },
     Symlink,
     Hardlink,
     BlockDevice,
@@ -236,6 +260,13 @@ impl DirEntry {
                     },
                 }
             }
+            (CatalogEntryType::Archive, Some(Offset::AppendixStartOffset { offset })) => DirEntry {
+                name,
+                attr: DirEntryAttribute::Archive {
+                    start,
+                    appendix_offset: AppendixStartOffset { offset },
+                },
+            },
             _ => panic!("unexpected parameters '{etype}' and '{offset:?}'"),
         }
     }
@@ -246,6 +277,7 @@ impl DirEntry {
             DirEntryAttribute::Directory { .. } => pxar::mode::IFDIR,
             DirEntryAttribute::File { .. } => pxar::mode::IFREG,
             DirEntryAttribute::AppendixRef { .. } => pxar::mode::IFREG,
+            DirEntryAttribute::Archive { .. } => pxar::mode::IFDIR,
             DirEntryAttribute::Symlink => pxar::mode::IFLNK,
             DirEntryAttribute::Hardlink => return None,
             DirEntryAttribute::BlockDevice => pxar::mode::IFBLK,
@@ -258,6 +290,12 @@ impl DirEntry {
     /// Check if DirEntry is a directory
     pub fn is_directory(&self) -> bool {
         matches!(self.attr, DirEntryAttribute::Directory { .. })
+            || matches!(self.attr, DirEntryAttribute::Archive { .. })
+    }
+
+    /// Check if DirEntry is an archive
+    pub fn is_archive(&self) -> bool {
+        matches!(self.attr, DirEntryAttribute::Archive { .. })
     }
 
     /// Check if DirEntry is a symlink
@@ -285,6 +323,20 @@ impl DirInfo {
 
     fn encode_entry<W: Write>(writer: &mut W, entry: &DirEntry, pos: u64) -> Result<(), Error> {
         match entry {
+            DirEntry {
+                name,
+                attr:
+                    DirEntryAttribute::Archive {
+                        start,
+                        appendix_offset,
+                    },
+            } => {
+                writer.write_all(&[CatalogEntryType::Archive as u8])?;
+                catalog_encode_u64(writer, name.len() as u64)?;
+                writer.write_all(name)?;
+                catalog_encode_u64(writer, appendix_offset.raw())?;
+                catalog_encode_u64(writer, pos - start)?;
+            }
             DirEntry {
                 name,
                 attr: DirEntryAttribute::Directory { start },
@@ -427,6 +479,19 @@ impl DirInfo {
             cursor.read_exact(name)?;
 
             let cont = match etype {
+                CatalogEntryType::Archive => {
+                    let offset = catalog_decode_u64(&mut cursor)?;
+                    let start = catalog_decode_u64(&mut cursor)?;
+                    callback(
+                        etype,
+                        name,
+                        start,
+                        0,
+                        0,
+                        0,
+                        Some(Offset::AppendixStartOffset { offset }),
+                    )?
+                }
                 CatalogEntryType::Directory => {
                     let offset = catalog_decode_u64(&mut cursor)?;
                     callback(etype, name, offset, 0, 0, 0, None)?
@@ -533,6 +598,51 @@ impl<W: Write> CatalogWriter<W> {
 }
 
 impl<W: Write> BackupCatalogWriter for CatalogWriter<W> {
+    fn start_archive(&mut self, name: &CStr) -> Result<(), Error> {
+        let new = DirInfo::new(name.to_owned());
+        self.dirstack.push(new);
+        Ok(())
+    }
+
+    fn end_archive(
+        &mut self,
+        appendix: Option<pxar::encoder::AppendixStartOffset>,
+    ) -> Result<(), Error> {
+        let (start, name) = match self.dirstack.pop() {
+            Some(dir) => {
+                let start = self.pos;
+                let (name, data) = dir.encode(start)?;
+                self.write_all(&data)?;
+                (start, name)
+            }
+            None => {
+                bail!("got unexpected end_directory level 0");
+            }
+        };
+
+        let current = self
+            .dirstack
+            .last_mut()
+            .ok_or_else(|| format_err!("outside root"))?;
+        let name = name.to_bytes().to_vec();
+        let appendix_offset = if let Some(appendix) = appendix {
+            AppendixStartOffset {
+                offset: appendix.raw(),
+            }
+        } else {
+            AppendixStartOffset { offset: 0 }
+        };
+        current.entries.push(DirEntry {
+            name,
+            attr: DirEntryAttribute::Archive {
+                start,
+                appendix_offset,
+            },
+        });
+
+        Ok(())
+    }
+
     fn start_directory(&mut self, name: &CStr) -> Result<(), Error> {
         let new = DirInfo::new(name.to_owned());
         self.dirstack.push(new);
@@ -746,10 +856,33 @@ impl<R: Read + Seek> CatalogReader<R> {
         })
     }
 
+    pub fn appendix_offset(
+        &mut self,
+        archive_name: &[u8],
+    ) -> Result<Option<AppendixStartOffset>, Error> {
+        let root = self.root()?;
+        let dir_entry = self.lookup(&root, archive_name)?.unwrap();
+        if let DirEntry {
+            attr: DirEntryAttribute::Archive {
+                appendix_offset, ..
+            },
+            ..
+        } = dir_entry
+        {
+            if appendix_offset.raw() != 0 {
+                return Ok(Some(appendix_offset));
+            } else {
+                return Ok(None);
+            }
+        }
+        Ok(None)
+    }
+
     /// Read all directory entries
     pub fn read_dir(&mut self, parent: &DirEntry) -> Result<Vec<DirEntry>, Error> {
         let start = match parent.attr {
             DirEntryAttribute::Directory { start } => start,
+            DirEntryAttribute::Archive { start, .. } => start,
             _ => bail!("parent is not a directory - internal error"),
         };
 
@@ -813,6 +946,7 @@ impl<R: Read + Seek> CatalogReader<R> {
     ) -> Result<Option<DirEntry>, Error> {
         let start = match parent.attr {
             DirEntryAttribute::Directory { start } => start,
+            DirEntryAttribute::Archive { start, .. } => start,
             _ => bail!("parent is not a directory - internal error"),
         };
 
@@ -822,7 +956,7 @@ impl<R: Read + Seek> CatalogReader<R> {
         DirInfo::parse(
             &data,
             self.magic,
-            |etype, name, offset, size, mtime, ctime, link_offset| {
+            |etype, name, offset, size, mtime, ctime, archive_offset| {
                 if name != filename {
                     return Ok(true);
                 }
@@ -834,7 +968,7 @@ impl<R: Read + Seek> CatalogReader<R> {
                     size,
                     mtime,
                     ctime,
-                    link_offset,
+                    archive_offset,
                 );
                 item = Some(entry);
                 Ok(false) // stop parsing
@@ -868,6 +1002,14 @@ impl<R: Read + Seek> CatalogReader<R> {
                 path.push(name);
 
                 match etype {
+                    CatalogEntryType::Archive => {
+                        log::info!("{} {:?}", etype, path);
+                        if offset > start {
+                            bail!("got wrong archive offset ({} > {})", offset, start);
+                        }
+                        let pos = start - offset;
+                        self.dump_dir(&path, pos)?;
+                    }
                     CatalogEntryType::Directory => {
                         log::info!("{} {:?}", etype, path);
                         if offset > start {
@@ -1208,7 +1350,11 @@ impl ArchiveEntry {
                 Some(entry_type) => CatalogEntryType::from(entry_type).to_string(),
                 None => "v".to_owned(),
             },
-            leaf: !matches!(entry_type, None | Some(DirEntryAttribute::Directory { .. })),
+            leaf: !matches!(
+                entry_type,
+                None | Some(DirEntryAttribute::Directory { .. })
+                    | Some(DirEntryAttribute::Archive { .. })
+            ),
             size,
             mtime: match entry_type {
                 Some(DirEntryAttribute::File { mtime, .. }) => Some(*mtime),
-- 
2.39.2