From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id CBDE698974 for ; Wed, 15 Nov 2023 16:48:58 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id E9467964C for ; Wed, 15 Nov 2023 16:48:33 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Wed, 15 Nov 2023 16:48:32 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 3EED44328A for ; Wed, 15 Nov 2023 16:48:32 +0100 (CET) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Wed, 15 Nov 2023 16:48:00 +0100 Message-Id: <20231115154813.281564-16-c.ebner@proxmox.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20231115154813.281564-1-c.ebner@proxmox.com> References: <20231115154813.281564-1-c.ebner@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.058 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: [pbs-devel] [PATCH v5 proxmox-backup 15/28] fix #3174: extractor: impl seq restore from appendix X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 15 Nov 2023 15:48:58 -0000 Restores the file payloads for all AppendixRef entries encountered during the sequential restore of the pxar archive. This is done by iterating over all the files listed in the corresponding state variable, opening each of the parent directory while storing its metadata for successive restore and creating the file, followed by writing the contents to it. When leaving the directories, their metatdata is restored. Signed-off-by: Christian Ebner --- Changes since version 4: - Refactor and rename `byte_len` to `encoded_size` Changes since version 3: - Use BTreeMap and sorted insert instead of Vec and sorting afterwards Changes since version 2: - Sort entries by their appendix start offset for restore. Required since chunks are now normalized during upload. Changes since version 1: - Use the Encoder to get encoded metadata byte size pbs-client/src/pxar/create.rs | 4 +- pbs-client/src/pxar/extract.rs | 147 +++++++++++++++++++++++++++++++-- pbs-client/src/pxar/tools.rs | 1 + 3 files changed, 142 insertions(+), 10 deletions(-) diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs index 611d7421..50bba4e6 100644 --- a/pbs-client/src/pxar/create.rs +++ b/pbs-client/src/pxar/create.rs @@ -43,7 +43,7 @@ pub struct PxarCreateOptions { pub skip_lost_and_found: bool, } -fn detect_fs_type(fd: RawFd) -> Result { +pub fn detect_fs_type(fd: RawFd) -> Result { let mut fs_stat = std::mem::MaybeUninit::uninit(); let res = unsafe { libc::fstatfs(fd, fs_stat.as_mut_ptr()) }; Errno::result(res)?; @@ -776,7 +776,7 @@ impl Archiver { } } -fn get_metadata( +pub fn get_metadata( fd: RawFd, stat: &FileStat, flags: Flags, diff --git a/pbs-client/src/pxar/extract.rs b/pbs-client/src/pxar/extract.rs index d2d42749..aa6d4e4d 100644 --- a/pbs-client/src/pxar/extract.rs +++ b/pbs-client/src/pxar/extract.rs @@ -1,6 +1,6 @@ //! Code for extraction of pxar contents onto the file system. -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use std::ffi::{CStr, CString, OsStr, OsString}; use std::io; use std::os::unix::ffi::OsStrExt; @@ -74,7 +74,7 @@ struct ExtractorIterState { err_path_stack: Vec, current_match: bool, end_reached: bool, - appendix_list: Vec<(PathBuf, u64, u64)>, + appendix_refs: BTreeMap, } /// An [`Iterator`] that encapsulates the process of extraction in [extract_archive]. @@ -99,7 +99,7 @@ impl ExtractorIterState { err_path_stack: Vec::new(), current_match: options.extract_match_default, end_reached: false, - appendix_list: Vec::new(), + appendix_refs: BTreeMap::new(), } } } @@ -313,6 +313,139 @@ where res } + (_, EntryKind::Appendix { total }) => { + // Bytes consumed in decoder since encountering the appendix marker + let mut consumed = 0; + for (offset, (path, size)) in &self.state.appendix_refs { + self.extractor.allow_existing_dirs = true; + + // Open dir path components, skipping the root component, get metadata + for dir in path.iter().skip(1) { + let parent_fd = match self.extractor.dir_stack.last_dir_fd(true) { + Ok(parent_fd) => parent_fd, + Err(err) => return Some(Err(err.into())), + }; + let fs_magic = + match crate::pxar::create::detect_fs_type(parent_fd.as_raw_fd()) { + Ok(fs_magic) => fs_magic, + Err(err) => return Some(Err(err.into())), + }; + + let mut fs_feature_flags = Flags::from_magic(fs_magic); + let file_name = match CString::new(dir.as_bytes()) { + Ok(file_name) => file_name, + Err(err) => return Some(Err(err.into())), + }; + let fd = match proxmox_sys::fd::openat( + &parent_fd, + file_name.as_ref(), + OFlag::O_NOATIME, + Mode::empty(), + ) { + Ok(fd) => fd, + Err(err) => return Some(Err(err.into())), + }; + + let stat = match nix::sys::stat::fstat(fd.as_raw_fd()) { + Ok(stat) => stat, + Err(err) => return Some(Err(err.into())), + }; + let metadata = match crate::pxar::create::get_metadata( + fd.as_raw_fd(), + &stat, + fs_feature_flags, + fs_magic, + &mut fs_feature_flags, + ) { + Ok(metadata) => metadata, + Err(err) => return Some(Err(err)), + }; + + match self.extractor.enter_directory( + dir.to_os_string(), + metadata.clone(), + true, + ) { + Ok(()) => (), + Err(err) => return Some(Err(err)), + }; + } + + let skip = *offset - consumed; + match self.decoder.skip_bytes(skip) { + Ok(()) => (), + Err(err) => return Some(Err(err.into())), + }; + + let entry = match self.decoder.next() { + Some(Ok(entry)) => entry, + Some(Err(err)) => return Some(Err(err.into())), + None => return Some(Err(format_err!("expected entry"))), + }; + + let file_name_os = entry.file_name(); + let file_name_bytes = file_name_os.as_bytes(); + + let file_name = match CString::new(file_name_bytes) { + Ok(file_name_ref) => file_name_ref, + Err(err) => return Some(Err(err.into())), + }; + + let metadata = entry.metadata(); + + self.extractor.set_path(path.as_os_str().to_owned()); + + let contents = self.decoder.contents(); + match contents { + None => { + return Some(Err(format_err!( + "found regular file entry without contents in archive" + ))) + } + Some(mut contents) => { + let result = self + .extractor + .extract_file( + &file_name, + metadata, + *size, + &mut contents, + self.extractor + .overwrite_flags + .contains(OverwriteFlags::FILE), + ) + .context(PxarExtractContext::ExtractFile); + if let Err(err) = result { + return Some(Err(err.into())); + } + } + } + + // Iter over all dir path components, skipping the root component, set metadata + for _dir in path.iter().skip(1) { + if let Err(err) = self.extractor.leave_directory() { + return Some(Err(err.into())); + } + } + + let mut bytes = + match pxar::encoder::sync::encoded_size(file_name.as_c_str(), &metadata) { + Ok(bytes) => bytes, + Err(err) => return Some(Err(err.into())), + }; + // payload header size + bytes += std::mem::size_of::() as u64; + + consumed += skip + bytes + *size; + } + + let skip = *total - consumed; + if let Err(err) = self.decoder.skip_bytes(skip) { + return Some(Err(err.into())); + } + + Ok(()) + } (true, EntryKind::Symlink(link)) => { self.callback(entry.path()); self.extractor @@ -382,11 +515,9 @@ where file_size, }, ) => { - self.state.appendix_list.push(( - entry.path().to_path_buf(), - *appendix_offset, - *file_size, - )); + self.state + .appendix_refs + .insert(*appendix_offset, (entry.path().to_path_buf(), *file_size)); Ok(()) } (false, _) => Ok(()), // skip this diff --git a/pbs-client/src/pxar/tools.rs b/pbs-client/src/pxar/tools.rs index aac5a1e7..174a7351 100644 --- a/pbs-client/src/pxar/tools.rs +++ b/pbs-client/src/pxar/tools.rs @@ -156,6 +156,7 @@ pub fn format_multi_line_entry(entry: &Entry) -> String { let (size, link, type_name) = match entry.kind() { EntryKind::File { size, .. } => (format!("{}", *size), String::new(), "file"), + EntryKind::Appendix { total } => (format!("{total}"), String::new(), "appendix"), EntryKind::AppendixRef { appendix_offset, file_size, -- 2.39.2