public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Christian Ebner <c.ebner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [RFC proxmox-backup 12/20] fix #3174: catalog: incl pxar archives file offset
Date: Fri, 22 Sep 2023 09:16:13 +0200	[thread overview]
Message-ID: <20230922071621.12670-13-c.ebner@proxmox.com> (raw)
In-Reply-To: <20230922071621.12670-1-c.ebner@proxmox.com>

Include the stream offset for regular files in the backup catalog.
This allows to calculate the files payload offset relative to the
appendix start offset in the pxar archive for future backup runs using
the catalog as reference to skip over unchanged file payloads,
referencing the existing chunks instead.

Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
---
 pbs-client/src/pxar/create.rs                 |  30 +++--
 pbs-datastore/src/catalog.rs                  | 122 +++++++++++++++---
 .../src/proxmox_restore_daemon/api.rs         |   1 +
 3 files changed, 121 insertions(+), 32 deletions(-)

diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs
index e7053d9e..0f23ed2f 100644
--- a/pbs-client/src/pxar/create.rs
+++ b/pbs-client/src/pxar/create.rs
@@ -390,12 +390,6 @@ impl Archiver {
         patterns_count: usize,
     ) -> Result<(), Error> {
         let content = generate_pxar_excludes_cli(&self.patterns[..patterns_count]);
-        if let Some(ref catalog) = self.catalog {
-            catalog
-                .lock()
-                .unwrap()
-                .add_file(file_name, content.len() as u64, 0)?;
-        }
 
         let mut metadata = Metadata::default();
         metadata.stat.mode = pxar::format::mode::IFREG | 0o600;
@@ -405,6 +399,14 @@ impl Archiver {
             .await?;
         file.write_all(&content).await?;
 
+        if let Some(ref catalog) = self.catalog {
+            let link_offset = file.file_offset();
+            catalog
+                .lock()
+                .unwrap()
+                .add_file(file_name, content.len() as u64, 0, link_offset)?;
+        }
+
         Ok(())
     }
 
@@ -572,17 +574,19 @@ impl Archiver {
                 }
 
                 let file_size = stat.st_size as u64;
-                if let Some(ref catalog) = self.catalog {
-                    catalog
-                        .lock()
-                        .unwrap()
-                        .add_file(c_file_name, file_size, stat.st_mtime)?;
-                }
-
                 let offset: LinkOffset = self
                     .add_regular_file(encoder, fd, file_name, &metadata, file_size)
                     .await?;
 
+                if let Some(ref catalog) = self.catalog {
+                    catalog.lock().unwrap().add_file(
+                        c_file_name,
+                        file_size,
+                        stat.st_mtime,
+                        offset,
+                    )?;
+                }
+
                 if stat.st_nlink > 1 {
                     self.hardlinks
                         .insert(link_info, (self.path.clone(), offset));
diff --git a/pbs-datastore/src/catalog.rs b/pbs-datastore/src/catalog.rs
index 86e20c92..1cc5421d 100644
--- a/pbs-datastore/src/catalog.rs
+++ b/pbs-datastore/src/catalog.rs
@@ -7,6 +7,7 @@ use anyhow::{bail, format_err, Error};
 use serde::{Deserialize, Serialize};
 
 use pathpatterns::{MatchList, MatchType};
+use pxar::encoder::LinkOffset;
 
 use proxmox_io::ReadExt;
 use proxmox_schema::api;
@@ -20,7 +21,13 @@ use crate::file_formats::PROXMOX_CATALOG_FILE_MAGIC_1_0;
 pub trait BackupCatalogWriter {
     fn start_directory(&mut self, name: &CStr) -> Result<(), Error>;
     fn end_directory(&mut self) -> Result<(), Error>;
-    fn add_file(&mut self, name: &CStr, size: u64, mtime: i64) -> Result<(), Error>;
+    fn add_file(
+        &mut self,
+        name: &CStr,
+        size: u64,
+        mtime: i64,
+        link_offset: LinkOffset,
+    ) -> Result<(), Error>;
     fn add_symlink(&mut self, name: &CStr) -> Result<(), Error>;
     fn add_hardlink(&mut self, name: &CStr) -> Result<(), Error>;
     fn add_block_device(&mut self, name: &CStr) -> Result<(), Error>;
@@ -94,8 +101,14 @@ pub struct DirEntry {
 /// Used to specific additional attributes inside DirEntry
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub enum DirEntryAttribute {
-    Directory { start: u64 },
-    File { size: u64, mtime: i64 },
+    Directory {
+        start: u64,
+    },
+    File {
+        size: u64,
+        mtime: i64,
+        link_offset: LinkOffset,
+    },
     Symlink,
     Hardlink,
     BlockDevice,
@@ -105,7 +118,14 @@ pub enum DirEntryAttribute {
 }
 
 impl DirEntry {
-    fn new(etype: CatalogEntryType, name: Vec<u8>, start: u64, size: u64, mtime: i64) -> Self {
+    fn new(
+        etype: CatalogEntryType,
+        name: Vec<u8>,
+        start: u64,
+        size: u64,
+        mtime: i64,
+        link_offset: Option<LinkOffset>,
+    ) -> Self {
         match etype {
             CatalogEntryType::Directory => DirEntry {
                 name,
@@ -113,7 +133,11 @@ impl DirEntry {
             },
             CatalogEntryType::File => DirEntry {
                 name,
-                attr: DirEntryAttribute::File { size, mtime },
+                attr: DirEntryAttribute::File {
+                    size,
+                    mtime,
+                    link_offset: link_offset.unwrap(),
+                },
             },
             CatalogEntryType::Symlink => DirEntry {
                 name,
@@ -197,13 +221,19 @@ impl DirInfo {
             }
             DirEntry {
                 name,
-                attr: DirEntryAttribute::File { size, mtime },
+                attr:
+                    DirEntryAttribute::File {
+                        size,
+                        mtime,
+                        link_offset,
+                    },
             } => {
                 writer.write_all(&[CatalogEntryType::File as u8])?;
                 catalog_encode_u64(writer, name.len() as u64)?;
                 writer.write_all(name)?;
                 catalog_encode_u64(writer, *size)?;
                 catalog_encode_i64(writer, *mtime)?;
+                catalog_encode_u64(writer, link_offset.raw())?;
             }
             DirEntry {
                 name,
@@ -271,7 +301,9 @@ impl DirInfo {
         Ok((self.name, data))
     }
 
-    fn parse<C: FnMut(CatalogEntryType, &[u8], u64, u64, i64) -> Result<bool, Error>>(
+    fn parse<
+        C: FnMut(CatalogEntryType, &[u8], u64, u64, i64, Option<LinkOffset>) -> Result<bool, Error>,
+    >(
         data: &[u8],
         mut callback: C,
     ) -> Result<(), Error> {
@@ -300,14 +332,22 @@ impl DirInfo {
             let cont = match etype {
                 CatalogEntryType::Directory => {
                     let offset = catalog_decode_u64(&mut cursor)?;
-                    callback(etype, name, offset, 0, 0)?
+                    callback(etype, name, offset, 0, 0, None)?
                 }
                 CatalogEntryType::File => {
                     let size = catalog_decode_u64(&mut cursor)?;
                     let mtime = catalog_decode_i64(&mut cursor)?;
-                    callback(etype, name, 0, size, mtime)?
+                    let link_offset = catalog_decode_u64(&mut cursor)?;
+                    callback(
+                        etype,
+                        name,
+                        0,
+                        size,
+                        mtime,
+                        Some(LinkOffset::new(link_offset)),
+                    )?
                 }
-                _ => callback(etype, name, 0, 0, 0)?,
+                _ => callback(etype, name, 0, 0, 0, None)?,
             };
             if !cont {
                 return Ok(());
@@ -407,7 +447,13 @@ impl<W: Write> BackupCatalogWriter for CatalogWriter<W> {
         Ok(())
     }
 
-    fn add_file(&mut self, name: &CStr, size: u64, mtime: i64) -> Result<(), Error> {
+    fn add_file(
+        &mut self,
+        name: &CStr,
+        size: u64,
+        mtime: i64,
+        link_offset: LinkOffset,
+    ) -> Result<(), Error> {
         let dir = self
             .dirstack
             .last_mut()
@@ -415,7 +461,11 @@ impl<W: Write> BackupCatalogWriter for CatalogWriter<W> {
         let name = name.to_bytes().to_vec();
         dir.entries.push(DirEntry {
             name,
-            attr: DirEntryAttribute::File { size, mtime },
+            attr: DirEntryAttribute::File {
+                size,
+                mtime,
+                link_offset,
+            },
         });
         Ok(())
     }
@@ -550,8 +600,15 @@ impl<R: Read + Seek> CatalogReader<R> {
 
         let mut entry_list = Vec::new();
 
-        DirInfo::parse(&data, |etype, name, offset, size, mtime| {
-            let entry = DirEntry::new(etype, name.to_vec(), start - offset, size, mtime);
+        DirInfo::parse(&data, |etype, name, offset, size, mtime, link_offset| {
+            let entry = DirEntry::new(
+                etype,
+                name.to_vec(),
+                start - offset,
+                size,
+                mtime,
+                link_offset,
+            );
             entry_list.push(entry);
             Ok(true)
         })?;
@@ -600,12 +657,19 @@ impl<R: Read + Seek> CatalogReader<R> {
         let data = self.read_raw_dirinfo_block(start)?;
 
         let mut item = None;
-        DirInfo::parse(&data, |etype, name, offset, size, mtime| {
+        DirInfo::parse(&data, |etype, name, offset, size, mtime, link_offset| {
             if name != filename {
                 return Ok(true);
             }
 
-            let entry = DirEntry::new(etype, name.to_vec(), start - offset, size, mtime);
+            let entry = DirEntry::new(
+                etype,
+                name.to_vec(),
+                start - offset,
+                size,
+                mtime,
+                link_offset,
+            );
             item = Some(entry);
             Ok(false) // stop parsing
         })?;
@@ -628,7 +692,7 @@ impl<R: Read + Seek> CatalogReader<R> {
     pub fn dump_dir(&mut self, prefix: &std::path::Path, start: u64) -> Result<(), Error> {
         let data = self.read_raw_dirinfo_block(start)?;
 
-        DirInfo::parse(&data, |etype, name, offset, size, mtime| {
+        DirInfo::parse(&data, |etype, name, offset, size, mtime, link_offset| {
             let mut path = std::path::PathBuf::from(prefix);
             let name: &OsStr = OsStrExt::from_bytes(name);
             path.push(name);
@@ -648,7 +712,14 @@ impl<R: Read + Seek> CatalogReader<R> {
                         mtime_string = s;
                     }
 
-                    log::info!("{} {:?} {} {}", etype, path, size, mtime_string,);
+                    log::info!(
+                        "{} {:?} {} {} {:?}",
+                        etype,
+                        path,
+                        size,
+                        mtime_string,
+                        link_offset
+                    );
                 }
                 _ => {
                     log::info!("{} {:?}", etype, path);
@@ -705,9 +776,15 @@ impl<R: Read + Seek> CatalogReader<R> {
             components.push(b'/');
             components.extend(&direntry.name);
             let mut entry = ArchiveEntry::new(&components, Some(&direntry.attr));
-            if let DirEntryAttribute::File { size, mtime } = direntry.attr {
+            if let DirEntryAttribute::File {
+                size,
+                mtime,
+                link_offset,
+            } = direntry.attr
+            {
                 entry.size = size.into();
                 entry.mtime = mtime.into();
+                entry.link_offset = Some(link_offset.raw());
             }
             res.push(entry);
         }
@@ -916,6 +993,9 @@ pub struct ArchiveEntry {
     /// The file "last modified" time stamp, if entry_type is 'f' (file)
     #[serde(skip_serializing_if = "Option::is_none")]
     pub mtime: Option<i64>,
+    /// The file link offset in the pxar archive, if entry_type is 'f' (file)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub link_offset: Option<u64>,
 }
 
 impl ArchiveEntry {
@@ -946,6 +1026,10 @@ impl ArchiveEntry {
                 Some(DirEntryAttribute::File { mtime, .. }) => Some(*mtime),
                 _ => None,
             },
+            link_offset: match entry_type {
+                Some(DirEntryAttribute::File { link_offset, .. }) => Some(link_offset.raw()),
+                _ => None,
+            },
         }
     }
 }
diff --git a/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs b/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs
index c4e97d33..95e3593b 100644
--- a/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs
+++ b/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs
@@ -109,6 +109,7 @@ fn get_dir_entry(path: &Path) -> Result<DirEntryAttribute, Error> {
         libc::S_IFREG => DirEntryAttribute::File {
             size: stat.st_size as u64,
             mtime: stat.st_mtime,
+            link_offset: pxar::encoder::LinkOffset::new(0),
         },
         libc::S_IFDIR => DirEntryAttribute::Directory { start: 0 },
         _ => bail!("unsupported file type: {}", stat.st_mode),
-- 
2.39.2





  parent reply	other threads:[~2023-09-22  7:17 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-22  7:16 [pbs-devel] [RFC pxar proxmox-backup 00/20] fix #3174: improve file-level backup Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 1/20] fix #3174: encoder: impl fn new for LinkOffset Christian Ebner
2023-09-27 12:08   ` Wolfgang Bumiller
2023-09-27 12:26     ` Christian Ebner
2023-09-28  6:49       ` Wolfgang Bumiller
2023-09-28  7:52         ` Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 2/20] fix #3174: decoder: factor out skip_bytes from skip_entry Christian Ebner
2023-09-27 11:32   ` Wolfgang Bumiller
2023-09-27 11:53     ` Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 3/20] fix #3174: decoder: impl skip_bytes for sync dec Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 4/20] fix #3174: metadata: impl fn to calc byte size Christian Ebner
2023-09-27 11:38   ` Wolfgang Bumiller
2023-09-27 11:55     ` Christian Ebner
2023-09-28  8:07       ` Christian Ebner
2023-09-28  9:00         ` Wolfgang Bumiller
2023-09-28  9:27           ` Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 5/20] fix #3174: enc/dec: impl PXAR_APPENDIX_REF entrytype Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 6/20] fix #3174: enc/dec: impl PXAR_APPENDIX entrytype Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 7/20] fix #3174: encoder: add helper to incr encoder pos Christian Ebner
2023-09-27 12:07   ` Wolfgang Bumiller
2023-09-27 12:20     ` Christian Ebner
2023-09-28  7:04       ` Wolfgang Bumiller
2023-09-28  7:50         ` Christian Ebner
2023-09-28  8:32           ` Wolfgang Bumiller
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 8/20] fix #3174: enc/dec: impl PXAR_APPENDIX_TAIL entrytype Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 09/20] fix #3174: index: add fn index list from start/end-offsets Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 10/20] fix #3174: index: add fn digest for DynamicEntry Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 11/20] fix #3174: api: double catalog upload size Christian Ebner
2023-09-22  7:16 ` Christian Ebner [this message]
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 13/20] fix #3174: archiver/extractor: impl appendix ref Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 14/20] fix #3174: extractor: impl seq restore from appendix Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 15/20] fix #3174: archiver: store ref to previous backup Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 16/20] fix #3174: upload stream: impl reused chunk injector Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 17/20] fix #3174: chunker: add forced boundaries Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 18/20] fix #3174: backup writer: inject queued chunk in upload steam Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 19/20] fix #3174: archiver: reuse files with unchanged metadata Christian Ebner
2023-09-26  7:01   ` Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 20/20] fix #3174: client: Add incremental flag to backup creation Christian Ebner
2023-09-26  7:11   ` Christian Ebner
2023-09-26  7:15 ` [pbs-devel] [RFC pxar proxmox-backup 00/20] fix #3174: improve file-level backup Christian Ebner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230922071621.12670-13-c.ebner@proxmox.com \
    --to=c.ebner@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal