From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id F1959B8FA6 for ; Tue, 12 Mar 2024 11:07:33 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id D17001557E for ; Tue, 12 Mar 2024 11:07:33 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Tue, 12 Mar 2024 11:07:32 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id C18CA43DD9 for ; Tue, 12 Mar 2024 11:07:31 +0100 (CET) Date: Tue, 12 Mar 2024 11:07:24 +0100 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Proxmox Backup Server development discussion References: <20240305092703.126906-1-c.ebner@proxmox.com> <20240305092703.126906-28-c.ebner@proxmox.com> In-Reply-To: <20240305092703.126906-28-c.ebner@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.16.0 (https://github.com/astroidmail/astroid) Message-Id: <1710237573.g7a8c3ms4l.astroid@yuna.none> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-SPAM-LEVEL: Spam detection results: 0 AWL 0.065 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [mod.rs, main.rs, create.rs, proxmox.com] Subject: Re: [pbs-devel] [RFC v2 proxmox-backup 27/36] client: implement prepare reference method X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 12 Mar 2024 10:07:34 -0000 On March 5, 2024 10:26 am, Christian Ebner wrote: > Implement a method that prepares the decoder instance to access a > previous snapshots metadata index and payload index in order to > pass it to the pxar archiver. The archiver than can utilize these > to compare the metadata for files to the previous state and gather > reusable chunks. >=20 > Signed-off-by: Christian Ebner > --- > changes since version 1: > - no changes >=20 > pbs-client/src/pxar/create.rs | 13 ++++++ > pbs-client/src/pxar/mod.rs | 2 +- > proxmox-backup-client/src/main.rs | 71 ++++++++++++++++++++++++++++++- > 3 files changed, 83 insertions(+), 3 deletions(-) >=20 > diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.r= s > index 9ae84d37..cb0af29e 100644 > --- a/pbs-client/src/pxar/create.rs > +++ b/pbs-client/src/pxar/create.rs > @@ -17,6 +17,7 @@ use nix::sys::stat::{FileStat, Mode}; > =20 > use pathpatterns::{MatchEntry, MatchFlag, MatchList, MatchType, PatternF= lag}; > use proxmox_sys::error::SysError; > +use pxar::accessor::aio::Accessor; > use pxar::encoder::{LinkOffset, SeqWrite}; > use pxar::Metadata; > =20 > @@ -24,7 +25,9 @@ use proxmox_io::vec; > use proxmox_lang::c_str; > use proxmox_sys::fs::{self, acl, xattr}; > =20 > +use crate::RemoteChunkReader; > use pbs_datastore::catalog::BackupCatalogWriter; > +use pbs_datastore::dynamic_index::{DynamicIndexReader, LocalDynamicReadA= t}; > =20 > use crate::inject_reused_chunks::InjectChunks; > use crate::pxar::metadata::errno_is_unsupported; > @@ -46,6 +49,16 @@ pub struct PxarCreateOptions { > pub skip_e2big_xattr: bool, > } > =20 > +/// Statefull information of previous backups snapshots for partial back= ups > +pub struct PxarPrevRef { > + /// Reference accessor for metadata comparison > + pub accessor: Accessor>, > + /// Reference index for reusing payload chunks > + pub payload_index: DynamicIndexReader, > + /// Reference archive name for partial backups > + pub archive_name: String, > +} > + > fn detect_fs_type(fd: RawFd) -> Result { > let mut fs_stat =3D std::mem::MaybeUninit::uninit(); > let res =3D unsafe { libc::fstatfs(fd, fs_stat.as_mut_ptr()) }; > diff --git a/pbs-client/src/pxar/mod.rs b/pbs-client/src/pxar/mod.rs > index 14674b9b..24315f5f 100644 > --- a/pbs-client/src/pxar/mod.rs > +++ b/pbs-client/src/pxar/mod.rs > @@ -56,7 +56,7 @@ pub(crate) mod tools; > mod flags; > pub use flags::Flags; > =20 > -pub use create::{create_archive, PxarCreateOptions}; > +pub use create::{create_archive, PxarCreateOptions, PxarPrevRef}; > pub use extract::{ > create_tar, create_zip, extract_archive, extract_sub_dir, extract_su= b_dir_seq, ErrorHandler, > OverwriteFlags, PxarExtractContext, PxarExtractOptions, > diff --git a/proxmox-backup-client/src/main.rs b/proxmox-backup-client/sr= c/main.rs > index f077ddf6..8d657c15 100644 > --- a/proxmox-backup-client/src/main.rs > +++ b/proxmox-backup-client/src/main.rs > @@ -21,6 +21,7 @@ use proxmox_router::{cli::*, ApiMethod, RpcEnvironment}= ; > use proxmox_schema::api; > use proxmox_sys::fs::{file_get_json, image_size, replace_file, CreateOpt= ions}; > use proxmox_time::{epoch_i64, strftime_local}; > +use pxar::accessor::aio::Accessor; > use pxar::accessor::{MaybeReady, ReadAt, ReadAtOperation}; > =20 > use pbs_api_types::{ > @@ -30,7 +31,7 @@ use pbs_api_types::{ > BACKUP_TYPE_SCHEMA, TRAFFIC_CONTROL_BURST_SCHEMA, TRAFFIC_CONTROL_RA= TE_SCHEMA, > }; > use pbs_client::catalog_shell::Shell; > -use pbs_client::pxar::ErrorHandler as PxarErrorHandler; > +use pbs_client::pxar::{ErrorHandler as PxarErrorHandler, PxarPrevRef}; > use pbs_client::tools::{ > complete_archive_name, complete_auth_id, complete_backup_group, comp= lete_backup_snapshot, > complete_backup_source, complete_chunk_size, complete_group_or_snaps= hot, > @@ -50,7 +51,7 @@ use pbs_client::{ > }; > use pbs_datastore::catalog::{BackupCatalogWriter, CatalogReader, Catalog= Writer}; > use pbs_datastore::chunk_store::verify_chunk_size; > -use pbs_datastore::dynamic_index::{BufferedDynamicReader, DynamicIndexRe= ader}; > +use pbs_datastore::dynamic_index::{BufferedDynamicReader, DynamicIndexRe= ader, LocalDynamicReadAt}; > use pbs_datastore::fixed_index::FixedIndexReader; > use pbs_datastore::index::IndexFile; > use pbs_datastore::manifest::{ > @@ -1181,6 +1182,72 @@ async fn create_backup( > Ok(Value::Null) > } > =20 > +async fn prepare_reference( > + target_base: &str, > + extension: &str, > + manifest: Option>, > + backup_writer: &BackupWriter, > + backup_reader: Option>, > + crypt_config: Option>, > +) -> Result, Error> { > + let target =3D format!("{target_base}.meta.{extension}"); > + let payload_target =3D format!("{target_base}.pld.{extension}"); > + > + let manifest =3D if let Some(ref manifest) =3D manifest { > + manifest > + } else { > + return Ok(None); > + }; > + > + let backup_reader =3D if let Some(ref reader) =3D backup_reader { > + reader > + } else { > + return Ok(None); > + }; couldn't these checks be done before/at the call site and this fn take the manifest and reader without Option? see comments for the patch where this is used.. > + > + let metadata_ref_index =3D if let Ok(index) =3D backup_reader > + .download_dynamic_index(&manifest, &target) > + .await > + { > + index > + } else { > + log::info!("No previous metadata index, fallback to regular enco= ding"); > + return Ok(None); > + }; > + > + let known_payload_chunks =3D Arc::new(Mutex::new(HashSet::new())); > + let payload_ref_index =3D if let Ok(index) =3D backup_writer > + .download_previous_dynamic_index(&payload_target, &manifest, kno= wn_payload_chunks) > + .await > + { > + index > + } else { > + log::info!("No previous payload index, fallback to regular encod= ing"); > + return Ok(None); > + }; for these two, it might make sense to differentiate between: - previous manifest doesn't have that index -> no need to try download, we can just skip - previous manifest has that index -> we try to download -> we need to handle the error (and tell the user about the error message - it might indicate a problem after all!) > + > + log::info!("Using previous index as metadata reference for '{target}= '"); > + > + let most_used =3D metadata_ref_index.find_most_used_chunks(8); > + let file_info =3D manifest.lookup_file_info(&target)?; > + let chunk_reader =3D RemoteChunkReader::new( > + backup_reader.clone(), > + crypt_config.clone(), > + file_info.chunk_crypt_mode(), > + most_used, > + ); > + let reader =3D BufferedDynamicReader::new(metadata_ref_index, chunk_= reader); > + let archive_size =3D reader.archive_size(); > + let reader =3D LocalDynamicReadAt::new(reader); > + let accessor =3D Accessor::new(reader, archive_size).await?; > + > + Ok(Some(pbs_client::pxar::PxarPrevRef { > + accessor, > + payload_index: payload_ref_index, > + archive_name: target, > + })) > +} > + > async fn dump_image( > client: Arc, > crypt_config: Option>, > --=20 > 2.39.2 >=20 >=20 >=20 > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel >=20 >=20 >=20