From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 60A38918F3 for ; Thu, 4 Apr 2024 14:55:02 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 3F860EDC for ; Thu, 4 Apr 2024 14:55:02 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Thu, 4 Apr 2024 14:55:00 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 6CF0D4530A for ; Thu, 4 Apr 2024 14:55:00 +0200 (CEST) Date: Thu, 04 Apr 2024 14:54:53 +0200 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Proxmox Backup Server development discussion References: <20240328123707.336951-1-c.ebner@proxmox.com> <20240328123707.336951-38-c.ebner@proxmox.com> In-Reply-To: <20240328123707.336951-38-c.ebner@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.16.0 (https://github.com/astroidmail/astroid) Message-Id: <1712231610.9nc2oh19ik.astroid@yuna.none> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-SPAM-LEVEL: Spam detection results: 0 AWL 0.059 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH v3 proxmox-backup 37/58] client: pxar: helper for lookup of reusable dynamic entries X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 04 Apr 2024 12:55:02 -0000 On March 28, 2024 1:36 pm, Christian Ebner wrote: > The helper method allows to lookup the entries of a dynamic index > which fully cover a given offset range. Further, the helper returns > the start padding from the start offset of the dynamic index entry > to the start offset of the given range and the end padding. >=20 > This will be used to lookup size and digest for chunks covering the > payload range of a regular file in order to re-use found chunks by > indexing them in the archives index file instead of re-encoding the > payload. >=20 > Signed-off-by: Christian Ebner > --- > changes since version 2: > - moved this from the dynamic index to the pxar create as suggested > - refactored and optimized search, going for linear search to find the > end entry > - reworded commit message >=20 > pbs-client/src/pxar/create.rs | 63 +++++++++++++++++++++++++++++++++++ > 1 file changed, 63 insertions(+) >=20 > diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.r= s > index 2bb5a6253..e2d3954ca 100644 > --- a/pbs-client/src/pxar/create.rs > +++ b/pbs-client/src/pxar/create.rs > @@ -2,6 +2,7 @@ use std::collections::{HashMap, HashSet}; > use std::ffi::{CStr, CString, OsStr}; > use std::fmt; > use std::io::{self, Read}; > +use std::ops::Range; > use std::os::unix::ffi::OsStrExt; > use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd}; > use std::path::{Path, PathBuf}; > @@ -16,6 +17,7 @@ use nix::fcntl::OFlag; > use nix::sys::stat::{FileStat, Mode}; > =20 > use pathpatterns::{MatchEntry, MatchFlag, MatchList, MatchType, PatternF= lag}; > +use pbs_datastore::index::IndexFile; > use proxmox_sys::error::SysError; > use pxar::encoder::{LinkOffset, SeqWrite}; > use pxar::Metadata; > @@ -25,6 +27,7 @@ use proxmox_lang::c_str; > use proxmox_sys::fs::{self, acl, xattr}; > =20 > use pbs_datastore::catalog::BackupCatalogWriter; > +use pbs_datastore::dynamic_index::DynamicIndexReader; > =20 > use crate::pxar::metadata::errno_is_unsupported; > use crate::pxar::tools::assert_single_path_component; > @@ -791,6 +794,66 @@ impl Archiver { > } > } > =20 > +/// Dynamic Entry reusable by payload references > +#[derive(Clone, Debug)] > +#[repr(C)] > +pub struct ReusableDynamicEntry { > + size_le: u64, > + digest: [u8; 32], > +} > + > +impl ReusableDynamicEntry { > + #[inline] > + pub fn size(&self) -> u64 { > + u64::from_le(self.size_le) > + } > + > + #[inline] > + pub fn digest(&self) -> [u8; 32] { > + self.digest > + } > +} > + > +/// List of dynamic entries containing the data given by an offset range > +fn lookup_dynamic_entries( > + index: &DynamicIndexReader, > + range: Range, > +) -> Result<(Vec, u64, u64), Error> { > + let end_idx =3D index.index_count() - 1; > + let chunk_end =3D index.chunk_end(end_idx); > + let start =3D index.binary_search(0, 0, end_idx, chunk_end, range.st= art)?; > + let mut end =3D start; > + while end < end_idx { > + if range.end < index.chunk_end(end) { > + break; > + } > + end +=3D 1; > + } this loop here > + > + let offset_first =3D if start =3D=3D 0 { > + 0 > + } else { > + index.chunk_end(start - 1) > + }; offset_first is prev_end, so maybe we could just name it like that from the start? > + > + let padding_start =3D range.start - offset_first; > + let padding_end =3D index.chunk_end(end) - range.end; > + > + let mut indices =3D Vec::new(); > + let mut prev_end =3D offset_first; > + for dynamic_entry in &index.index()[start..end + 1] { > + let size =3D dynamic_entry.end() - prev_end; > + let reusable_dynamic_entry =3D ReusableDynamicEntry { > + size_le: size.to_le(), > + digest: dynamic_entry.digest(), > + }; > + prev_end +=3D size; > + indices.push(reusable_dynamic_entry); > + } and this one here could probably be combined? > + > + Ok((indices, padding_start, padding_end)) > +} e.g., the whole thing could become something like (untested ;)): let end_idx =3D index.index_count() - 1; let chunk_end =3D index.chunk_end(end_idx); let start =3D index.binary_search(0, 0, end_idx, chunk_end, range.start= )?; let mut prev_end =3D if start =3D=3D 0 { 0 } else { index.chunk_end(start - 1) }; let padding_start =3D range.start - prev_end; let mut padding_end =3D 0; let mut indices =3D Vec::new(); for dynamic_entry in &index.index()[start..] { let end =3D dynamic_entry.end(); if range.end < end { padding_end =3D end - range.end; break; } let reusable_dynamic_entry =3D ReusableDynamicEntry { size_le: (end - prev_end).to_le(), digest: dynamic_entry.digest(), }; indices.push(reusable_dynamic_entry); prev_end =3D end; } Ok((indices, padding_start, padding_end)) > + > fn get_metadata( > fd: RawFd, > stat: &FileStat, > --=20 > 2.39.2 >=20 >=20 >=20 > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel >=20 >=20 >=20