From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <c.ebner@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id EF68ADBC9
 for <pbs-devel@lists.proxmox.com>; Fri, 22 Sep 2023 09:17:27 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id 7CC7E6C91
 for <pbs-devel@lists.proxmox.com>; Fri, 22 Sep 2023 09:16:55 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS
 for <pbs-devel@lists.proxmox.com>; Fri, 22 Sep 2023 09:16:52 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 288DF48792
 for <pbs-devel@lists.proxmox.com>; Fri, 22 Sep 2023 09:16:52 +0200 (CEST)
From: Christian Ebner <c.ebner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Date: Fri, 22 Sep 2023 09:16:15 +0200
Message-Id: <20230922071621.12670-15-c.ebner@proxmox.com>
X-Mailer: git-send-email 2.39.2
In-Reply-To: <20230922071621.12670-1-c.ebner@proxmox.com>
References: <20230922071621.12670-1-c.ebner@proxmox.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.108 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
Subject: [pbs-devel] [RFC proxmox-backup 14/20] fix #3174: extractor: impl
 seq restore from appendix
X-BeenThere: pbs-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Fri, 22 Sep 2023 07:17:28 -0000

Restores the file payloads for all AppendixRef entries encountered
during the sequential restore of the pxar archive.
This is done by iterating over all the files listed in the corresponding
state variable, opening each of the parent directory while storing its
metadata for successive restore and creating the file, followed by
writing the contents to it.

When leaving the directories, their metatdata is restored.

Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
---
 pbs-client/src/pxar/create.rs  |   4 +-
 pbs-client/src/pxar/extract.rs | 125 +++++++++++++++++++++++++++++++++
 pbs-client/src/pxar/tools.rs   |   1 +
 3 files changed, 128 insertions(+), 2 deletions(-)

diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs
index 0468abe9..c0fc5e2d 100644
--- a/pbs-client/src/pxar/create.rs
+++ b/pbs-client/src/pxar/create.rs
@@ -43,7 +43,7 @@ pub struct PxarCreateOptions {
     pub skip_lost_and_found: bool,
 }
 
-fn detect_fs_type(fd: RawFd) -> Result<i64, Error> {
+pub fn detect_fs_type(fd: RawFd) -> Result<i64, Error> {
     let mut fs_stat = std::mem::MaybeUninit::uninit();
     let res = unsafe { libc::fstatfs(fd, fs_stat.as_mut_ptr()) };
     Errno::result(res)?;
@@ -776,7 +776,7 @@ impl Archiver {
     }
 }
 
-fn get_metadata(
+pub fn get_metadata(
     fd: RawFd,
     stat: &FileStat,
     flags: Flags,
diff --git a/pbs-client/src/pxar/extract.rs b/pbs-client/src/pxar/extract.rs
index d2d42749..3570eb01 100644
--- a/pbs-client/src/pxar/extract.rs
+++ b/pbs-client/src/pxar/extract.rs
@@ -313,6 +313,131 @@ where
 
                 res
             }
+            (_, EntryKind::Appendix { total }) => {
+                // Bytes consumed in decoder since encountering the appendix marker
+                let mut consumed = 0;
+
+                for (path, offset, size) in &self.state.appendix_list {
+                    self.extractor.allow_existing_dirs = true;
+
+                    let components = match path.parent() {
+                        Some(components) => components,
+                        None => return Some(Err(format_err!("expected path with parent"))),
+                    };
+
+                    // Open dir path components, skipping the root component, get metadata
+                    for dir in components.iter().skip(1) {
+                        let parent_fd = match self.extractor.dir_stack.last_dir_fd(true) {
+                            Ok(parent_fd) => parent_fd,
+                            Err(err) => return Some(Err(err)),
+                        };
+                        let fs_magic = match crate::pxar::create::detect_fs_type(parent_fd.as_raw_fd()) {
+                            Ok(fs_magic) => fs_magic,
+                            Err(err) => return Some(Err(err)),
+                        };
+
+                        let mut fs_feature_flags = Flags::from_magic(fs_magic);
+                        let file_name = match CString::new(dir.as_bytes()) {
+                            Ok(file_name) => file_name,
+                            Err(err) => return Some(Err(err.into())),
+                        };
+                        let fd = proxmox_sys::fd::openat(
+                            &parent_fd,
+                            file_name.as_ref(),
+                            OFlag::O_NOATIME,
+                            Mode::empty(),
+                        )
+                        .unwrap();
+                        let stat = nix::sys::stat::fstat(fd.as_raw_fd()).unwrap();
+                        let metadata = match crate::pxar::create::get_metadata(
+                            fd.as_raw_fd(),
+                            &stat,
+                            fs_feature_flags,
+                            fs_magic,
+                            &mut fs_feature_flags,
+                        ) {
+                            Ok(metadata) => metadata,
+                            Err(err) => return Some(Err(err)),
+                        };
+
+                        match self.extractor.enter_directory(dir.to_os_string(), metadata.clone(), true) {
+                            Ok(()) => (),
+                            Err(err) => return Some(Err(err)),
+                        };
+                    }
+
+                    let skip = *offset - consumed;
+                    match self.decoder.skip_bytes(skip) {
+                        Ok(()) => (),
+                        Err(err) => return Some(Err(err.into())),
+                    };
+
+                    let entry = match self.decoder.next() {
+                        Some(Ok(entry)) => entry,
+                        Some(Err(err)) => return Some(Err(err.into())),
+                        None => return Some(Err(format_err!("expected entry"))),
+                    };
+
+                    let file_name_os = entry.file_name();
+                    let file_name_bytes = file_name_os.as_bytes();
+
+                    let file_name = match CString::new(file_name_bytes) {
+                        Ok(file_name_ref) => file_name_ref,
+                        Err(err) => return Some(Err(format_err!(err))),
+                    };
+
+                    let metadata = entry.metadata();
+
+                    self.extractor.set_path(path.as_os_str().to_owned());
+
+                    let contents = self.decoder.contents();
+
+                    let result = if let Some(mut contents) = contents {
+                        self.extractor.extract_file(
+                            &file_name,
+                            metadata,
+                            *size,
+                            &mut contents,
+                            self.extractor
+                                .overwrite_flags
+                                .contains(OverwriteFlags::FILE),
+                        )
+                    } else {
+                        Err(format_err!(
+                            "found regular file entry without contents in archive"
+                        ))
+                    }
+                    .context(PxarExtractContext::ExtractFile);
+                    result.unwrap();
+
+                    // Iter over all dir path components, skipping the root component, set metadata
+                    for _dir in components.iter().skip(1) {
+                        match self.extractor.leave_directory() {
+                            Ok(()) => (),
+                            Err(err) => return Some(Err(err)),
+                        }
+                    }
+
+                    // Entry header
+                    let mut metadata_bytes = std::mem::size_of::<pxar::format::Header>();
+                    // Filename payload
+                    metadata_bytes += std::mem::size_of_val(file_name.as_bytes()) + 1;
+                    // Metadata with headers and payloads
+                    metadata_bytes += metadata.calculate_byte_len();
+                    // Payload header
+                    metadata_bytes += std::mem::size_of::<pxar::format::Header>();
+
+                    consumed += skip + metadata_bytes as u64 + *size;
+                }
+
+                let skip = *total - consumed;
+                match self.decoder.skip_bytes(skip) {
+                    Ok(()) => (),
+                    Err(err) => return Some(Err(err.into())),
+                }
+
+                Ok(())
+            }
             (true, EntryKind::Symlink(link)) => {
                 self.callback(entry.path());
                 self.extractor
diff --git a/pbs-client/src/pxar/tools.rs b/pbs-client/src/pxar/tools.rs
index aac5a1e7..174a7351 100644
--- a/pbs-client/src/pxar/tools.rs
+++ b/pbs-client/src/pxar/tools.rs
@@ -156,6 +156,7 @@ pub fn format_multi_line_entry(entry: &Entry) -> String {
 
     let (size, link, type_name) = match entry.kind() {
         EntryKind::File { size, .. } => (format!("{}", *size), String::new(), "file"),
+        EntryKind::Appendix { total } => (format!("{total}"), String::new(), "appendix"),
         EntryKind::AppendixRef {
             appendix_offset,
             file_size,
-- 
2.39.2