From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <c.ebner@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id 8DB61DBCA
 for <pbs-devel@lists.proxmox.com>; Fri, 22 Sep 2023 09:17:28 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id 665426CBF
 for <pbs-devel@lists.proxmox.com>; Fri, 22 Sep 2023 09:16:56 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS
 for <pbs-devel@lists.proxmox.com>; Fri, 22 Sep 2023 09:16:52 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 3B07C4878B
 for <pbs-devel@lists.proxmox.com>; Fri, 22 Sep 2023 09:16:52 +0200 (CEST)
From: Christian Ebner <c.ebner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Date: Fri, 22 Sep 2023 09:16:13 +0200
Message-Id: <20230922071621.12670-13-c.ebner@proxmox.com>
X-Mailer: git-send-email 2.39.2
In-Reply-To: <20230922071621.12670-1-c.ebner@proxmox.com>
References: <20230922071621.12670-1-c.ebner@proxmox.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.105 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
Subject: [pbs-devel] [RFC proxmox-backup 12/20] fix #3174: catalog: incl
 pxar archives file offset
X-BeenThere: pbs-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Fri, 22 Sep 2023 07:17:28 -0000

Include the stream offset for regular files in the backup catalog.
This allows to calculate the files payload offset relative to the
appendix start offset in the pxar archive for future backup runs using
the catalog as reference to skip over unchanged file payloads,
referencing the existing chunks instead.

Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
---
 pbs-client/src/pxar/create.rs                 |  30 +++--
 pbs-datastore/src/catalog.rs                  | 122 +++++++++++++++---
 .../src/proxmox_restore_daemon/api.rs         |   1 +
 3 files changed, 121 insertions(+), 32 deletions(-)

diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs
index e7053d9e..0f23ed2f 100644
--- a/pbs-client/src/pxar/create.rs
+++ b/pbs-client/src/pxar/create.rs
@@ -390,12 +390,6 @@ impl Archiver {
         patterns_count: usize,
     ) -> Result<(), Error> {
         let content = generate_pxar_excludes_cli(&self.patterns[..patterns_count]);
-        if let Some(ref catalog) = self.catalog {
-            catalog
-                .lock()
-                .unwrap()
-                .add_file(file_name, content.len() as u64, 0)?;
-        }
 
         let mut metadata = Metadata::default();
         metadata.stat.mode = pxar::format::mode::IFREG | 0o600;
@@ -405,6 +399,14 @@ impl Archiver {
             .await?;
         file.write_all(&content).await?;
 
+        if let Some(ref catalog) = self.catalog {
+            let link_offset = file.file_offset();
+            catalog
+                .lock()
+                .unwrap()
+                .add_file(file_name, content.len() as u64, 0, link_offset)?;
+        }
+
         Ok(())
     }
 
@@ -572,17 +574,19 @@ impl Archiver {
                 }
 
                 let file_size = stat.st_size as u64;
-                if let Some(ref catalog) = self.catalog {
-                    catalog
-                        .lock()
-                        .unwrap()
-                        .add_file(c_file_name, file_size, stat.st_mtime)?;
-                }
-
                 let offset: LinkOffset = self
                     .add_regular_file(encoder, fd, file_name, &metadata, file_size)
                     .await?;
 
+                if let Some(ref catalog) = self.catalog {
+                    catalog.lock().unwrap().add_file(
+                        c_file_name,
+                        file_size,
+                        stat.st_mtime,
+                        offset,
+                    )?;
+                }
+
                 if stat.st_nlink > 1 {
                     self.hardlinks
                         .insert(link_info, (self.path.clone(), offset));
diff --git a/pbs-datastore/src/catalog.rs b/pbs-datastore/src/catalog.rs
index 86e20c92..1cc5421d 100644
--- a/pbs-datastore/src/catalog.rs
+++ b/pbs-datastore/src/catalog.rs
@@ -7,6 +7,7 @@ use anyhow::{bail, format_err, Error};
 use serde::{Deserialize, Serialize};
 
 use pathpatterns::{MatchList, MatchType};
+use pxar::encoder::LinkOffset;
 
 use proxmox_io::ReadExt;
 use proxmox_schema::api;
@@ -20,7 +21,13 @@ use crate::file_formats::PROXMOX_CATALOG_FILE_MAGIC_1_0;
 pub trait BackupCatalogWriter {
     fn start_directory(&mut self, name: &CStr) -> Result<(), Error>;
     fn end_directory(&mut self) -> Result<(), Error>;
-    fn add_file(&mut self, name: &CStr, size: u64, mtime: i64) -> Result<(), Error>;
+    fn add_file(
+        &mut self,
+        name: &CStr,
+        size: u64,
+        mtime: i64,
+        link_offset: LinkOffset,
+    ) -> Result<(), Error>;
     fn add_symlink(&mut self, name: &CStr) -> Result<(), Error>;
     fn add_hardlink(&mut self, name: &CStr) -> Result<(), Error>;
     fn add_block_device(&mut self, name: &CStr) -> Result<(), Error>;
@@ -94,8 +101,14 @@ pub struct DirEntry {
 /// Used to specific additional attributes inside DirEntry
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub enum DirEntryAttribute {
-    Directory { start: u64 },
-    File { size: u64, mtime: i64 },
+    Directory {
+        start: u64,
+    },
+    File {
+        size: u64,
+        mtime: i64,
+        link_offset: LinkOffset,
+    },
     Symlink,
     Hardlink,
     BlockDevice,
@@ -105,7 +118,14 @@ pub enum DirEntryAttribute {
 }
 
 impl DirEntry {
-    fn new(etype: CatalogEntryType, name: Vec<u8>, start: u64, size: u64, mtime: i64) -> Self {
+    fn new(
+        etype: CatalogEntryType,
+        name: Vec<u8>,
+        start: u64,
+        size: u64,
+        mtime: i64,
+        link_offset: Option<LinkOffset>,
+    ) -> Self {
         match etype {
             CatalogEntryType::Directory => DirEntry {
                 name,
@@ -113,7 +133,11 @@ impl DirEntry {
             },
             CatalogEntryType::File => DirEntry {
                 name,
-                attr: DirEntryAttribute::File { size, mtime },
+                attr: DirEntryAttribute::File {
+                    size,
+                    mtime,
+                    link_offset: link_offset.unwrap(),
+                },
             },
             CatalogEntryType::Symlink => DirEntry {
                 name,
@@ -197,13 +221,19 @@ impl DirInfo {
             }
             DirEntry {
                 name,
-                attr: DirEntryAttribute::File { size, mtime },
+                attr:
+                    DirEntryAttribute::File {
+                        size,
+                        mtime,
+                        link_offset,
+                    },
             } => {
                 writer.write_all(&[CatalogEntryType::File as u8])?;
                 catalog_encode_u64(writer, name.len() as u64)?;
                 writer.write_all(name)?;
                 catalog_encode_u64(writer, *size)?;
                 catalog_encode_i64(writer, *mtime)?;
+                catalog_encode_u64(writer, link_offset.raw())?;
             }
             DirEntry {
                 name,
@@ -271,7 +301,9 @@ impl DirInfo {
         Ok((self.name, data))
     }
 
-    fn parse<C: FnMut(CatalogEntryType, &[u8], u64, u64, i64) -> Result<bool, Error>>(
+    fn parse<
+        C: FnMut(CatalogEntryType, &[u8], u64, u64, i64, Option<LinkOffset>) -> Result<bool, Error>,
+    >(
         data: &[u8],
         mut callback: C,
     ) -> Result<(), Error> {
@@ -300,14 +332,22 @@ impl DirInfo {
             let cont = match etype {
                 CatalogEntryType::Directory => {
                     let offset = catalog_decode_u64(&mut cursor)?;
-                    callback(etype, name, offset, 0, 0)?
+                    callback(etype, name, offset, 0, 0, None)?
                 }
                 CatalogEntryType::File => {
                     let size = catalog_decode_u64(&mut cursor)?;
                     let mtime = catalog_decode_i64(&mut cursor)?;
-                    callback(etype, name, 0, size, mtime)?
+                    let link_offset = catalog_decode_u64(&mut cursor)?;
+                    callback(
+                        etype,
+                        name,
+                        0,
+                        size,
+                        mtime,
+                        Some(LinkOffset::new(link_offset)),
+                    )?
                 }
-                _ => callback(etype, name, 0, 0, 0)?,
+                _ => callback(etype, name, 0, 0, 0, None)?,
             };
             if !cont {
                 return Ok(());
@@ -407,7 +447,13 @@ impl<W: Write> BackupCatalogWriter for CatalogWriter<W> {
         Ok(())
     }
 
-    fn add_file(&mut self, name: &CStr, size: u64, mtime: i64) -> Result<(), Error> {
+    fn add_file(
+        &mut self,
+        name: &CStr,
+        size: u64,
+        mtime: i64,
+        link_offset: LinkOffset,
+    ) -> Result<(), Error> {
         let dir = self
             .dirstack
             .last_mut()
@@ -415,7 +461,11 @@ impl<W: Write> BackupCatalogWriter for CatalogWriter<W> {
         let name = name.to_bytes().to_vec();
         dir.entries.push(DirEntry {
             name,
-            attr: DirEntryAttribute::File { size, mtime },
+            attr: DirEntryAttribute::File {
+                size,
+                mtime,
+                link_offset,
+            },
         });
         Ok(())
     }
@@ -550,8 +600,15 @@ impl<R: Read + Seek> CatalogReader<R> {
 
         let mut entry_list = Vec::new();
 
-        DirInfo::parse(&data, |etype, name, offset, size, mtime| {
-            let entry = DirEntry::new(etype, name.to_vec(), start - offset, size, mtime);
+        DirInfo::parse(&data, |etype, name, offset, size, mtime, link_offset| {
+            let entry = DirEntry::new(
+                etype,
+                name.to_vec(),
+                start - offset,
+                size,
+                mtime,
+                link_offset,
+            );
             entry_list.push(entry);
             Ok(true)
         })?;
@@ -600,12 +657,19 @@ impl<R: Read + Seek> CatalogReader<R> {
         let data = self.read_raw_dirinfo_block(start)?;
 
         let mut item = None;
-        DirInfo::parse(&data, |etype, name, offset, size, mtime| {
+        DirInfo::parse(&data, |etype, name, offset, size, mtime, link_offset| {
             if name != filename {
                 return Ok(true);
             }
 
-            let entry = DirEntry::new(etype, name.to_vec(), start - offset, size, mtime);
+            let entry = DirEntry::new(
+                etype,
+                name.to_vec(),
+                start - offset,
+                size,
+                mtime,
+                link_offset,
+            );
             item = Some(entry);
             Ok(false) // stop parsing
         })?;
@@ -628,7 +692,7 @@ impl<R: Read + Seek> CatalogReader<R> {
     pub fn dump_dir(&mut self, prefix: &std::path::Path, start: u64) -> Result<(), Error> {
         let data = self.read_raw_dirinfo_block(start)?;
 
-        DirInfo::parse(&data, |etype, name, offset, size, mtime| {
+        DirInfo::parse(&data, |etype, name, offset, size, mtime, link_offset| {
             let mut path = std::path::PathBuf::from(prefix);
             let name: &OsStr = OsStrExt::from_bytes(name);
             path.push(name);
@@ -648,7 +712,14 @@ impl<R: Read + Seek> CatalogReader<R> {
                         mtime_string = s;
                     }
 
-                    log::info!("{} {:?} {} {}", etype, path, size, mtime_string,);
+                    log::info!(
+                        "{} {:?} {} {} {:?}",
+                        etype,
+                        path,
+                        size,
+                        mtime_string,
+                        link_offset
+                    );
                 }
                 _ => {
                     log::info!("{} {:?}", etype, path);
@@ -705,9 +776,15 @@ impl<R: Read + Seek> CatalogReader<R> {
             components.push(b'/');
             components.extend(&direntry.name);
             let mut entry = ArchiveEntry::new(&components, Some(&direntry.attr));
-            if let DirEntryAttribute::File { size, mtime } = direntry.attr {
+            if let DirEntryAttribute::File {
+                size,
+                mtime,
+                link_offset,
+            } = direntry.attr
+            {
                 entry.size = size.into();
                 entry.mtime = mtime.into();
+                entry.link_offset = Some(link_offset.raw());
             }
             res.push(entry);
         }
@@ -916,6 +993,9 @@ pub struct ArchiveEntry {
     /// The file "last modified" time stamp, if entry_type is 'f' (file)
     #[serde(skip_serializing_if = "Option::is_none")]
     pub mtime: Option<i64>,
+    /// The file link offset in the pxar archive, if entry_type is 'f' (file)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub link_offset: Option<u64>,
 }
 
 impl ArchiveEntry {
@@ -946,6 +1026,10 @@ impl ArchiveEntry {
                 Some(DirEntryAttribute::File { mtime, .. }) => Some(*mtime),
                 _ => None,
             },
+            link_offset: match entry_type {
+                Some(DirEntryAttribute::File { link_offset, .. }) => Some(link_offset.raw()),
+                _ => None,
+            },
         }
     }
 }
diff --git a/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs b/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs
index c4e97d33..95e3593b 100644
--- a/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs
+++ b/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs
@@ -109,6 +109,7 @@ fn get_dir_entry(path: &Path) -> Result<DirEntryAttribute, Error> {
         libc::S_IFREG => DirEntryAttribute::File {
             size: stat.st_size as u64,
             mtime: stat.st_mtime,
+            link_offset: pxar::encoder::LinkOffset::new(0),
         },
         libc::S_IFDIR => DirEntryAttribute::Directory { start: 0 },
         _ => bail!("unsupported file type: {}", stat.st_mode),
-- 
2.39.2