public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Christian Ebner <c.ebner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [RFC pxar 8/20] fix #3174: enc/dec: impl PXAR_APPENDIX_TAIL entrytype
Date: Fri, 22 Sep 2023 09:16:09 +0200	[thread overview]
Message-ID: <20230922071621.12670-9-c.ebner@proxmox.com> (raw)
In-Reply-To: <20230922071621.12670-1-c.ebner@proxmox.com>

The PXAR_APPENDIX_TAIL entry marks pxar archives containing an appendix
section. It has the same size as a goodbye tail marker item in order to
be able to easily read and distinguish archives with and without such
section.

This also implements the accessor used by e.g. the fuse implementation
to perform random io on the archive. The accessor reads the last entry
and stores the appendix offset if needed, in order to recalculate the
actual file payload offset within the archive when encountering a
appendix reference entry in the archive.

Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
---
 examples/mk-format-hashes.rs |  5 ++++
 examples/pxarcmd.rs          |  4 ++--
 src/accessor/mod.rs          | 46 ++++++++++++++++++++++++++++++++++++
 src/encoder/aio.rs           |  6 ++---
 src/encoder/mod.rs           | 16 ++++++++++++-
 src/encoder/sync.rs          |  4 ++--
 src/format/mod.rs            |  4 ++++
 7 files changed, 77 insertions(+), 8 deletions(-)

diff --git a/examples/mk-format-hashes.rs b/examples/mk-format-hashes.rs
index f068edd..7fb938d 100644
--- a/examples/mk-format-hashes.rs
+++ b/examples/mk-format-hashes.rs
@@ -57,6 +57,11 @@ const CONSTANTS: &[(&str, &str, &str)] = &[
         "PXAR_GOODBYE_TAIL_MARKER",
         "__PROXMOX_FORMAT_PXAR_GOODBYE_TAIL_MARKER__",
     ),
+    (
+        "Marks the end of an archive containing an appendix section",
+        "PXAR_APPENDIX_TAIL",
+        "__PROXMOX_FORMAT_APPENDIX_TAIL__",
+    ),
 ];
 
 fn main() {
diff --git a/examples/pxarcmd.rs b/examples/pxarcmd.rs
index e0c779d..c7848cc 100644
--- a/examples/pxarcmd.rs
+++ b/examples/pxarcmd.rs
@@ -105,7 +105,7 @@ fn cmd_create(mut args: std::env::ArgsOs) -> Result<(), Error> {
 
     let mut encoder = Encoder::create(file, &meta)?;
     add_directory(&mut encoder, dir, &dir_path, &mut HashMap::new())?;
-    encoder.finish()?;
+    encoder.finish(None)?;
 
     Ok(())
 }
@@ -145,7 +145,7 @@ fn add_directory<'a, T: SeqWrite + 'a>(
                 root_path,
                 &mut *hardlinks,
             )?;
-            dir.finish()?;
+            dir.finish(None)?;
         } else if file_type.is_symlink() {
             todo!("symlink handling");
         } else if file_type.is_file() {
diff --git a/src/accessor/mod.rs b/src/accessor/mod.rs
index 6a2de73..1c19d7b 100644
--- a/src/accessor/mod.rs
+++ b/src/accessor/mod.rs
@@ -182,6 +182,7 @@ pub(crate) struct AccessorImpl<T> {
     input: T,
     size: u64,
     caches: Arc<Caches>,
+    appendix_offset: Option<u64>,
 }
 
 impl<T: ReadAt> AccessorImpl<T> {
@@ -190,10 +191,22 @@ impl<T: ReadAt> AccessorImpl<T> {
             io_bail!("too small to contain a pxar archive");
         }
 
+        let tail_offset = size - (size_of::<GoodbyeItem>() as u64);
+        let tail: GoodbyeItem = read_entry_at(&input, tail_offset).await?;
+
+        let (appendix_offset, size) = if tail.hash == format::PXAR_APPENDIX_TAIL {
+            (Some(tail.offset), size - 40)
+        } else if tail.hash != format::PXAR_GOODBYE_TAIL_MARKER {
+            io_bail!("no goodbye tail marker found");
+        } else {
+            (None, size)
+        };
+
         Ok(Self {
             input,
             size,
             caches: Arc::new(Caches::default()),
+            appendix_offset,
         })
     }
 
@@ -207,6 +220,7 @@ impl<T: ReadAt> AccessorImpl<T> {
             self.size,
             "/".into(),
             Arc::clone(&self.caches),
+            self.appendix_offset,
         )
         .await
     }
@@ -263,6 +277,7 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
             self.size,
             "/".into(),
             Arc::clone(&self.caches),
+            self.appendix_offset,
         )
         .await
     }
@@ -274,6 +289,7 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
             offset,
             "/".into(),
             Arc::clone(&self.caches),
+            self.appendix_offset,
         )
         .await
     }
@@ -369,6 +385,7 @@ pub(crate) struct DirectoryImpl<T> {
     table: Arc<[GoodbyeItem]>,
     path: PathBuf,
     caches: Arc<Caches>,
+    appendix_offset: Option<u64>,
 }
 
 impl<T: Clone + ReadAt> DirectoryImpl<T> {
@@ -378,6 +395,7 @@ impl<T: Clone + ReadAt> DirectoryImpl<T> {
         end_offset: u64,
         path: PathBuf,
         caches: Arc<Caches>,
+        appendix_offset: Option<u64>,
     ) -> io::Result<DirectoryImpl<T>> {
         let tail = Self::read_tail_entry(&input, end_offset).await?;
 
@@ -407,6 +425,7 @@ impl<T: Clone + ReadAt> DirectoryImpl<T> {
             table: table.as_ref().map_or_else(|| Arc::new([]), Arc::clone),
             path,
             caches,
+            appendix_offset,
         };
 
         // sanity check:
@@ -516,6 +535,32 @@ impl<T: Clone + ReadAt> DirectoryImpl<T> {
             .next()
             .await
             .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
+
+        if let EntryKind::AppendixRef {
+            appendix_offset,
+            file_size,
+        } = entry.kind()
+        {
+            let appendix_start = match self.appendix_offset {
+                Some(appendix_start) => appendix_start,
+                None => io_bail!("missing required appendix start offset information"),
+            };
+
+            let name = file_name.ok_or_else(|| io_format_err!("missing required filename"))?;
+            let c_string = std::ffi::CString::new(name.as_os_str().as_bytes())?;
+            let start =
+                appendix_start + appendix_offset + 16 + c_string.as_bytes_with_nul().len() as u64;
+            let end = start + file_size;
+            decoder = self.get_decoder(start..end, file_name).await?;
+
+            let entry = decoder
+                .next()
+                .await
+                .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
+
+            return Ok((entry, decoder));
+        }
+
         Ok((entry, decoder))
     }
 
@@ -698,6 +743,7 @@ impl<T: Clone + ReadAt> FileEntryImpl<T> {
             self.entry_range_info.entry_range.end,
             self.entry.path.clone(),
             Arc::clone(&self.caches),
+            None,
         )
         .await
     }
diff --git a/src/encoder/aio.rs b/src/encoder/aio.rs
index 3587f65..2a877b1 100644
--- a/src/encoder/aio.rs
+++ b/src/encoder/aio.rs
@@ -108,8 +108,8 @@ impl<'a, T: SeqWrite + 'a> Encoder<'a, T> {
     }
 
     /// Finish this directory. This is mandatory, otherwise the `Drop` handler will `panic!`.
-    pub async fn finish(self) -> io::Result<()> {
-        self.inner.finish().await
+    pub async fn finish(self, appendix_tail: Option<(LinkOffset, u64)>) -> io::Result<()> {
+        self.inner.finish(appendix_tail).await
     }
 
     /// Add size to encoders position and return new position.
@@ -333,7 +333,7 @@ mod test {
                     .await
                     .unwrap();
             }
-            encoder.finish().await.unwrap();
+            encoder.finish(None).await.unwrap();
         };
 
         fn test_send<T: Send>(_: T) {}
diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs
index abe21f2..c3de8c2 100644
--- a/src/encoder/mod.rs
+++ b/src/encoder/mod.rs
@@ -822,7 +822,7 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
         .await
     }
 
-    pub async fn finish(mut self) -> io::Result<()> {
+    pub async fn finish(mut self, appendix_tail: Option<(LinkOffset, u64)>) -> io::Result<()> {
         let tail_bytes = self.finish_goodbye_table().await?;
         seq_write_pxar_entry(
             self.output.as_mut(),
@@ -832,6 +832,20 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
         )
         .await?;
 
+        if let Some((link_offset, size)) = appendix_tail {
+            let mut appendix_tail = Vec::new();
+            appendix_tail.append(&mut format::PXAR_APPENDIX_TAIL.to_le_bytes().to_vec());
+            appendix_tail.append(&mut link_offset.raw().to_le_bytes().to_vec());
+            appendix_tail.append(&mut size.to_le_bytes().to_vec());
+            seq_write_pxar_entry(
+                self.output.as_mut(),
+                format::PXAR_GOODBYE,
+                &appendix_tail,
+                &mut self.state.write_position,
+            )
+            .await?;
+        }
+
         if let EncoderOutput::Owned(output) = &mut self.output {
             flush(output).await?;
         }
diff --git a/src/encoder/sync.rs b/src/encoder/sync.rs
index b3d7c44..f16d2d6 100644
--- a/src/encoder/sync.rs
+++ b/src/encoder/sync.rs
@@ -106,8 +106,8 @@ impl<'a, T: SeqWrite + 'a> Encoder<'a, T> {
     }
 
     /// Finish this directory. This is mandatory, otherwise the `Drop` handler will `panic!`.
-    pub fn finish(self) -> io::Result<()> {
-        poll_result_once(self.inner.finish())
+    pub fn finish(self, appendix_tail: Option<(LinkOffset, u64)>) -> io::Result<()> {
+        poll_result_once(self.inner.finish(appendix_tail))
     }
 
     /// Add size to encoders position and return new position.
diff --git a/src/format/mod.rs b/src/format/mod.rs
index 8254df9..8016ab1 100644
--- a/src/format/mod.rs
+++ b/src/format/mod.rs
@@ -41,6 +41,8 @@
 //! are appended. They are NOT guaranteed to follow the full pxar structure and should only be
 //! used to extract the file payloads by given offset.
 //!   * `APPENDIX`          -- pxar archive fragments containing file payloads
+//!   * final goodbye table
+//!   * `APPENDIX_TAIL`     -- marks the end of an archive containing a APPENDIX section
 
 use std::cmp::Ordering;
 use std::ffi::{CStr, OsStr};
@@ -113,6 +115,8 @@ pub const PXAR_APPENDIX_REF: u64 = 0x849b4a17e0234f8e;
 pub const PXAR_GOODBYE: u64 = 0x2fec4fa642d5731d;
 /// The end marker used in the GOODBYE object
 pub const PXAR_GOODBYE_TAIL_MARKER: u64 = 0xef5eed5b753e1555;
+/// Marks the end of an archive containing an appendix section
+pub const PXAR_APPENDIX_TAIL: u64 = 0x5b1b9abb7ae454f1;
 
 #[derive(Debug, Endian)]
 #[repr(C)]
-- 
2.39.2





  parent reply	other threads:[~2023-09-22  7:16 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-22  7:16 [pbs-devel] [RFC pxar proxmox-backup 00/20] fix #3174: improve file-level backup Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 1/20] fix #3174: encoder: impl fn new for LinkOffset Christian Ebner
2023-09-27 12:08   ` Wolfgang Bumiller
2023-09-27 12:26     ` Christian Ebner
2023-09-28  6:49       ` Wolfgang Bumiller
2023-09-28  7:52         ` Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 2/20] fix #3174: decoder: factor out skip_bytes from skip_entry Christian Ebner
2023-09-27 11:32   ` Wolfgang Bumiller
2023-09-27 11:53     ` Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 3/20] fix #3174: decoder: impl skip_bytes for sync dec Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 4/20] fix #3174: metadata: impl fn to calc byte size Christian Ebner
2023-09-27 11:38   ` Wolfgang Bumiller
2023-09-27 11:55     ` Christian Ebner
2023-09-28  8:07       ` Christian Ebner
2023-09-28  9:00         ` Wolfgang Bumiller
2023-09-28  9:27           ` Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 5/20] fix #3174: enc/dec: impl PXAR_APPENDIX_REF entrytype Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 6/20] fix #3174: enc/dec: impl PXAR_APPENDIX entrytype Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC pxar 7/20] fix #3174: encoder: add helper to incr encoder pos Christian Ebner
2023-09-27 12:07   ` Wolfgang Bumiller
2023-09-27 12:20     ` Christian Ebner
2023-09-28  7:04       ` Wolfgang Bumiller
2023-09-28  7:50         ` Christian Ebner
2023-09-28  8:32           ` Wolfgang Bumiller
2023-09-22  7:16 ` Christian Ebner [this message]
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 09/20] fix #3174: index: add fn index list from start/end-offsets Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 10/20] fix #3174: index: add fn digest for DynamicEntry Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 11/20] fix #3174: api: double catalog upload size Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 12/20] fix #3174: catalog: incl pxar archives file offset Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 13/20] fix #3174: archiver/extractor: impl appendix ref Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 14/20] fix #3174: extractor: impl seq restore from appendix Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 15/20] fix #3174: archiver: store ref to previous backup Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 16/20] fix #3174: upload stream: impl reused chunk injector Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 17/20] fix #3174: chunker: add forced boundaries Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 18/20] fix #3174: backup writer: inject queued chunk in upload steam Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 19/20] fix #3174: archiver: reuse files with unchanged metadata Christian Ebner
2023-09-26  7:01   ` Christian Ebner
2023-09-22  7:16 ` [pbs-devel] [RFC proxmox-backup 20/20] fix #3174: client: Add incremental flag to backup creation Christian Ebner
2023-09-26  7:11   ` Christian Ebner
2023-09-26  7:15 ` [pbs-devel] [RFC pxar proxmox-backup 00/20] fix #3174: improve file-level backup Christian Ebner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230922071621.12670-9-c.ebner@proxmox.com \
    --to=c.ebner@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal