From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id B9ADABC254 for ; Thu, 28 Mar 2024 13:38:35 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 902B5A027 for ; Thu, 28 Mar 2024 13:37:47 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Thu, 28 Mar 2024 13:37:45 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 77CF842A1D for ; Thu, 28 Mar 2024 13:37:44 +0100 (CET) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Thu, 28 Mar 2024 13:36:46 +0100 Message-Id: <20240328123707.336951-38-c.ebner@proxmox.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240328123707.336951-1-c.ebner@proxmox.com> References: <20240328123707.336951-1-c.ebner@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.030 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [PATCH v3 proxmox-backup 37/58] client: pxar: helper for lookup of reusable dynamic entries X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 28 Mar 2024 12:38:35 -0000 The helper method allows to lookup the entries of a dynamic index which fully cover a given offset range. Further, the helper returns the start padding from the start offset of the dynamic index entry to the start offset of the given range and the end padding. This will be used to lookup size and digest for chunks covering the payload range of a regular file in order to re-use found chunks by indexing them in the archives index file instead of re-encoding the payload. Signed-off-by: Christian Ebner --- changes since version 2: - moved this from the dynamic index to the pxar create as suggested - refactored and optimized search, going for linear search to find the end entry - reworded commit message pbs-client/src/pxar/create.rs | 63 +++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs index 2bb5a6253..e2d3954ca 100644 --- a/pbs-client/src/pxar/create.rs +++ b/pbs-client/src/pxar/create.rs @@ -2,6 +2,7 @@ use std::collections::{HashMap, HashSet}; use std::ffi::{CStr, CString, OsStr}; use std::fmt; use std::io::{self, Read}; +use std::ops::Range; use std::os::unix::ffi::OsStrExt; use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd}; use std::path::{Path, PathBuf}; @@ -16,6 +17,7 @@ use nix::fcntl::OFlag; use nix::sys::stat::{FileStat, Mode}; use pathpatterns::{MatchEntry, MatchFlag, MatchList, MatchType, PatternFlag}; +use pbs_datastore::index::IndexFile; use proxmox_sys::error::SysError; use pxar::encoder::{LinkOffset, SeqWrite}; use pxar::Metadata; @@ -25,6 +27,7 @@ use proxmox_lang::c_str; use proxmox_sys::fs::{self, acl, xattr}; use pbs_datastore::catalog::BackupCatalogWriter; +use pbs_datastore::dynamic_index::DynamicIndexReader; use crate::pxar::metadata::errno_is_unsupported; use crate::pxar::tools::assert_single_path_component; @@ -791,6 +794,66 @@ impl Archiver { } } +/// Dynamic Entry reusable by payload references +#[derive(Clone, Debug)] +#[repr(C)] +pub struct ReusableDynamicEntry { + size_le: u64, + digest: [u8; 32], +} + +impl ReusableDynamicEntry { + #[inline] + pub fn size(&self) -> u64 { + u64::from_le(self.size_le) + } + + #[inline] + pub fn digest(&self) -> [u8; 32] { + self.digest + } +} + +/// List of dynamic entries containing the data given by an offset range +fn lookup_dynamic_entries( + index: &DynamicIndexReader, + range: Range, +) -> Result<(Vec, u64, u64), Error> { + let end_idx = index.index_count() - 1; + let chunk_end = index.chunk_end(end_idx); + let start = index.binary_search(0, 0, end_idx, chunk_end, range.start)?; + let mut end = start; + while end < end_idx { + if range.end < index.chunk_end(end) { + break; + } + end += 1; + } + + let offset_first = if start == 0 { + 0 + } else { + index.chunk_end(start - 1) + }; + + let padding_start = range.start - offset_first; + let padding_end = index.chunk_end(end) - range.end; + + let mut indices = Vec::new(); + let mut prev_end = offset_first; + for dynamic_entry in &index.index()[start..end + 1] { + let size = dynamic_entry.end() - prev_end; + let reusable_dynamic_entry = ReusableDynamicEntry { + size_le: size.to_le(), + digest: dynamic_entry.digest(), + }; + prev_end += size; + indices.push(reusable_dynamic_entry); + } + + Ok((indices, padding_start, padding_end)) +} + fn get_metadata( fd: RawFd, stat: &FileStat, -- 2.39.2