From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 875DC91F2C for ; Thu, 4 Apr 2024 19:14:01 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 4795070EE for ; Thu, 4 Apr 2024 19:13:31 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Thu, 4 Apr 2024 19:13:30 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 02DD24555C for ; Thu, 4 Apr 2024 19:13:30 +0200 (CEST) Message-ID: <195ccd13-ff9f-4b11-ae0a-80bc73f2420f@proxmox.com> Date: Thu, 4 Apr 2024 19:13:28 +0200 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird To: Proxmox Backup Server development discussion , =?UTF-8?Q?Fabian_Gr=C3=BCnbichler?= References: <20240328123707.336951-1-c.ebner@proxmox.com> <20240328123707.336951-38-c.ebner@proxmox.com> <1712231610.9nc2oh19ik.astroid@yuna.none> Content-Language: en-US, de-DE From: Christian Ebner In-Reply-To: <1712231610.9nc2oh19ik.astroid@yuna.none> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.031 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH v3 proxmox-backup 37/58] client: pxar: helper for lookup of reusable dynamic entries X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 04 Apr 2024 17:14:01 -0000 On 4/4/24 14:54, Fabian Grünbichler wrote: > On March 28, 2024 1:36 pm, Christian Ebner wrote: >> The helper method allows to lookup the entries of a dynamic index >> which fully cover a given offset range. Further, the helper returns >> the start padding from the start offset of the dynamic index entry >> to the start offset of the given range and the end padding. >> >> This will be used to lookup size and digest for chunks covering the >> payload range of a regular file in order to re-use found chunks by >> indexing them in the archives index file instead of re-encoding the >> payload. >> >> Signed-off-by: Christian Ebner >> --- >> changes since version 2: >> - moved this from the dynamic index to the pxar create as suggested >> - refactored and optimized search, going for linear search to find the >> end entry >> - reworded commit message >> >> pbs-client/src/pxar/create.rs | 63 +++++++++++++++++++++++++++++++++++ >> 1 file changed, 63 insertions(+) >> >> diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs >> index 2bb5a6253..e2d3954ca 100644 >> --- a/pbs-client/src/pxar/create.rs >> +++ b/pbs-client/src/pxar/create.rs >> @@ -2,6 +2,7 @@ use std::collections::{HashMap, HashSet}; >> use std::ffi::{CStr, CString, OsStr}; >> use std::fmt; >> use std::io::{self, Read}; >> +use std::ops::Range; >> use std::os::unix::ffi::OsStrExt; >> use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd}; >> use std::path::{Path, PathBuf}; >> @@ -16,6 +17,7 @@ use nix::fcntl::OFlag; >> use nix::sys::stat::{FileStat, Mode}; >> >> use pathpatterns::{MatchEntry, MatchFlag, MatchList, MatchType, PatternFlag}; >> +use pbs_datastore::index::IndexFile; >> use proxmox_sys::error::SysError; >> use pxar::encoder::{LinkOffset, SeqWrite}; >> use pxar::Metadata; >> @@ -25,6 +27,7 @@ use proxmox_lang::c_str; >> use proxmox_sys::fs::{self, acl, xattr}; >> >> use pbs_datastore::catalog::BackupCatalogWriter; >> +use pbs_datastore::dynamic_index::DynamicIndexReader; >> >> use crate::pxar::metadata::errno_is_unsupported; >> use crate::pxar::tools::assert_single_path_component; >> @@ -791,6 +794,66 @@ impl Archiver { >> } >> } >> >> +/// Dynamic Entry reusable by payload references >> +#[derive(Clone, Debug)] >> +#[repr(C)] >> +pub struct ReusableDynamicEntry { >> + size_le: u64, >> + digest: [u8; 32], >> +} >> + >> +impl ReusableDynamicEntry { >> + #[inline] >> + pub fn size(&self) -> u64 { >> + u64::from_le(self.size_le) >> + } >> + >> + #[inline] >> + pub fn digest(&self) -> [u8; 32] { >> + self.digest >> + } >> +} >> + >> +/// List of dynamic entries containing the data given by an offset range >> +fn lookup_dynamic_entries( >> + index: &DynamicIndexReader, >> + range: Range, >> +) -> Result<(Vec, u64, u64), Error> { >> + let end_idx = index.index_count() - 1; >> + let chunk_end = index.chunk_end(end_idx); >> + let start = index.binary_search(0, 0, end_idx, chunk_end, range.start)?; >> + let mut end = start; >> + while end < end_idx { >> + if range.end < index.chunk_end(end) { >> + break; >> + } >> + end += 1; >> + } > > this loop here > >> + >> + let offset_first = if start == 0 { >> + 0 >> + } else { >> + index.chunk_end(start - 1) >> + }; > > offset_first is prev_end, so maybe we could just name it like that from > the start? > >> + >> + let padding_start = range.start - offset_first; >> + let padding_end = index.chunk_end(end) - range.end; >> + >> + let mut indices = Vec::new(); >> + let mut prev_end = offset_first; >> + for dynamic_entry in &index.index()[start..end + 1] { >> + let size = dynamic_entry.end() - prev_end; >> + let reusable_dynamic_entry = ReusableDynamicEntry { >> + size_le: size.to_le(), >> + digest: dynamic_entry.digest(), >> + }; >> + prev_end += size; >> + indices.push(reusable_dynamic_entry); >> + } > > and this one here could probably be combined? > >> + >> + Ok((indices, padding_start, padding_end)) >> +} > > e.g., the whole thing could become something like (untested ;)): > > let end_idx = index.index_count() - 1; > let chunk_end = index.chunk_end(end_idx); > let start = index.binary_search(0, 0, end_idx, chunk_end, range.start)?; > > let mut prev_end = if start == 0 { > 0 > } else { > index.chunk_end(start - 1) > }; > let padding_start = range.start - prev_end; > let mut padding_end = 0; > > let mut indices = Vec::new(); > for dynamic_entry in &index.index()[start..] { > let end = dynamic_entry.end(); > if range.end < end { > padding_end = end - range.end; > break; > } > > let reusable_dynamic_entry = ReusableDynamicEntry { > size_le: (end - prev_end).to_le(), > digest: dynamic_entry.digest(), > }; > indices.push(reusable_dynamic_entry); > prev_end = end; > } > > Ok((indices, padding_start, padding_end)) Thanks for looking into this so deeply, unfortunately this version leads to missing injected chunks in my quick test. Will have a look on where the problem is tomorrow.