From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 82141B904B for ; Tue, 12 Mar 2024 13:12:14 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 62AA9172E0 for ; Tue, 12 Mar 2024 13:12:14 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Tue, 12 Mar 2024 13:12:13 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 2BADE43DE9 for ; Tue, 12 Mar 2024 13:12:13 +0100 (CET) Date: Tue, 12 Mar 2024 13:12:04 +0100 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Proxmox Backup Server development discussion References: <20240305092703.126906-1-c.ebner@proxmox.com> <20240305092703.126906-30-c.ebner@proxmox.com> In-Reply-To: <20240305092703.126906-30-c.ebner@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.16.0 (https://github.com/astroidmail/astroid) Message-Id: <1710241045.h2ym0ilnyj.astroid@yuna.none> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-SPAM-LEVEL: Spam detection results: 0 AWL 0.065 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [api.rs, proxmox.com, main.rs, create.rs] Subject: Re: [pbs-devel] [RFC v2 proxmox-backup 29/36] client: pxar: add previous reference to archiver X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 12 Mar 2024 12:12:14 -0000 On March 5, 2024 10:26 am, Christian Ebner wrote: > Read the previous snaphosts manifest and check if a split archive > with the same name is given. If so, create the accessor instance to > read the previous archive entries to be able to lookup and compare > the metata for the entries, allowing to make a decision if the > entry is reusable or not. >=20 > Signed-off-by: Christian Ebner > --- > changes since version 1: > - refactor payload target archive name generation >=20 > pbs-client/src/pxar/create.rs | 45 ++++++++++++--- > proxmox-backup-client/src/main.rs | 57 +++++++++++++++++-- > .../src/proxmox_restore_daemon/api.rs | 1 + > pxar-bin/src/main.rs | 1 + > 4 files changed, 92 insertions(+), 12 deletions(-) >=20 > diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.r= s > index 66bdbce8..7d627079 100644 > --- a/pbs-client/src/pxar/create.rs > +++ b/pbs-client/src/pxar/create.rs > @@ -138,7 +138,7 @@ impl ReusedChunks { > } > =20 > /// Pxar options for creating a pxar archive/stream > -#[derive(Default, Clone)] > +#[derive(Default)] > pub struct PxarCreateOptions { > /// Device/mountpoint st_dev numbers that should be included. None f= or no limitation. > pub device_set: Option>, > @@ -150,6 +150,8 @@ pub struct PxarCreateOptions { > pub skip_lost_and_found: bool, > /// Skip xattrs of files that return E2BIG error > pub skip_e2big_xattr: bool, > + /// Reference state for partial backups > + pub previous_ref: Option, this goes here > } > =20 > /// Statefull information of previous backups snapshots for partial back= ups > @@ -249,6 +251,7 @@ struct Archiver { > file_copy_buffer: Vec, > skip_e2big_xattr: bool, > reused_chunks: ReusedChunks, > + previous_payload_index: Option, > forced_boundaries: Arc>>, but this goes here.. couldn't these be combined with the forced_boundaries/.. part into a single parameter/field? > } > =20 > @@ -305,6 +308,14 @@ where > MatchType::Exclude, > )?); > } > + let (previous_payload_index, accessor) =3D if let Some(refs) =3D opt= ions.previous_ref { "accessor" is a bit broad, maybe sneak in the fact what is accessed ;) > + ( > + Some(refs.payload_index), > + refs.accessor.open_root().await.ok(), > + ) > + } else { > + (None, None) > + }; > =20 > let mut archiver =3D Archiver { > feature_flags, > @@ -322,11 +333,12 @@ where > file_copy_buffer: vec::undefined(4 * 1024 * 1024), > skip_e2big_xattr: options.skip_e2big_xattr, > reused_chunks: ReusedChunks::new(), > + previous_payload_index, > forced_boundaries, > }; > =20 > archiver > - .archive_dir_contents(&mut encoder, source_dir, true) > + .archive_dir_contents(&mut encoder, accessor, source_dir, true) > .await?; > encoder.finish().await?; > Ok(()) > @@ -356,6 +368,7 @@ impl Archiver { > fn archive_dir_contents<'a, T: SeqWrite + Send>( > &'a mut self, > encoder: &'a mut Encoder<'_, T>, > + mut accessor: Option>>, > mut dir: Dir, > is_root: bool, > ) -> BoxFuture<'a, Result<(), Error>> { > @@ -390,9 +403,15 @@ impl Archiver { > =20 > (self.callback)(&file_entry.path)?; > self.path =3D file_entry.path; > - self.add_entry(encoder, dir_fd, &file_entry.name, &file_= entry.stat) > - .await > - .map_err(|err| self.wrap_err(err))?; > + self.add_entry( > + encoder, > + &mut accessor, > + dir_fd, > + &file_entry.name, > + &file_entry.stat, > + ) > + .await > + .map_err(|err| self.wrap_err(err))?; > } > self.path =3D old_path; > self.entry_counter =3D entry_counter; > @@ -640,6 +659,7 @@ impl Archiver { > async fn add_entry( > &mut self, > encoder: &mut Encoder<'_, T>, > + accessor: &mut Option>>, > parent: RawFd, > c_file_name: &CStr, > stat: &FileStat, > @@ -729,7 +749,7 @@ impl Archiver { > catalog.lock().unwrap().start_directory(c_file_name)= ?; > } > let result =3D self > - .add_directory(encoder, dir, c_file_name, &metadata,= stat) > + .add_directory(encoder, accessor, dir, c_file_name, = &metadata, stat) > .await; > if let Some(ref catalog) =3D self.catalog { > catalog.lock().unwrap().end_directory()?; > @@ -782,6 +802,7 @@ impl Archiver { > async fn add_directory( > &mut self, > encoder: &mut Encoder<'_, T>, > + accessor: &mut Option>>, > dir: Dir, > dir_name: &CStr, > metadata: &Metadata, > @@ -812,7 +833,17 @@ impl Archiver { > log::info!("skipping mount point: {:?}", self.path); > Ok(()) > } else { > - self.archive_dir_contents(encoder, dir, false).await > + let mut dir_accessor =3D None; > + if let Some(accessor) =3D accessor.as_mut() { > + if let Some(file_entry) =3D accessor.lookup(dir_name).aw= ait? { > + if file_entry.entry().is_dir() { > + let dir =3D file_entry.enter_directory().await?; > + dir_accessor =3D Some(dir); > + } > + } > + } > + self.archive_dir_contents(encoder, dir_accessor, dir, false) > + .await > }; > =20 > self.fs_magic =3D old_fs_magic; > diff --git a/proxmox-backup-client/src/main.rs b/proxmox-backup-client/sr= c/main.rs > index 8d657c15..7c2c6983 100644 > --- a/proxmox-backup-client/src/main.rs > +++ b/proxmox-backup-client/src/main.rs > @@ -44,10 +44,10 @@ use pbs_client::tools::{ > CHUNK_SIZE_SCHEMA, REPO_URL_SCHEMA, > }; > use pbs_client::{ > - delete_ticket_info, parse_backup_specification, view_task_result, Ba= ckupReader, > - BackupRepository, BackupSpecificationType, BackupStats, BackupWriter= , ChunkStream, > - FixedChunkStream, HttpClient, PxarBackupStream, RemoteChunkReader, U= ploadOptions, > - BACKUP_SOURCE_SCHEMA, > + delete_ticket_info, parse_backup_detection_mode_specification, parse= _backup_specification, > + view_task_result, BackupReader, BackupRepository, BackupSpecificatio= nType, BackupStats, > + BackupWriter, ChunkStream, FixedChunkStream, HttpClient, PxarBackupS= tream, RemoteChunkReader, > + UploadOptions, BACKUP_DETECTION_MODE_SPEC, BACKUP_SOURCE_SCHEMA, > }; > use pbs_datastore::catalog::{BackupCatalogWriter, CatalogReader, Catalog= Writer}; > use pbs_datastore::chunk_store::verify_chunk_size; > @@ -699,6 +699,10 @@ fn spawn_catalog_upload( > schema: TRAFFIC_CONTROL_BURST_SCHEMA, > optional: true, > }, > + "change-detection-mode": { > + schema: BACKUP_DETECTION_MODE_SPEC, > + optional: true, > + }, > "exclude": { > type: Array, > description: "List of paths or patterns for matching file= s to exclude.", > @@ -893,6 +897,9 @@ async fn create_backup( > =20 > let backup_time =3D backup_time_opt.unwrap_or_else(epoch_i64); > =20 > + let detection_mode =3D param["change-detection-mode"].as_str().unwra= p_or("data"); > + let detection_mode =3D parse_backup_detection_mode_specification(det= ection_mode)?; > + > let client =3D connect_rate_limited(&repo, rate_limit)?; > record_repository(&repo); > =20 > @@ -944,6 +951,28 @@ async fn create_backup( > } > }; > =20 > + let backup_reader =3D if detection_mode.is_metadata() { > + if let Ok(backup_dir) =3D > + api_datastore_latest_snapshot(&client, repo.store(), &backup= _ns, snapshot.group.clone()) > + .await > + { > + BackupReader::start( > + &client, > + crypt_config.clone(), > + repo.store(), > + &backup_ns, > + &backup_dir, > + true, > + ) > + .await > + .ok() > + } else { > + None > + } > + } else { > + None > + }; > + this reader should be started after the writer (else somebody else might be -however unlikely- faster and the reader is no longer for the previous snapshot). upside - it can then be moved into the download_previous_manifest arm, if the previous manifest was not downloadable using the writer, or the key changed, or something else -> we can already skip re-using it based on those facts. > let client =3D BackupWriter::start( > client, > crypt_config.clone(), > @@ -1040,7 +1069,10 @@ async fn create_backup( > manifest.add_file(target, stats.size, stats.csum, crypto= .mode)?; > } > (BackupSpecificationType::PXAR, false) =3D> { > - let metadata_mode =3D false; // Until enabled via param > + let archives =3D detection_mode.metadata_archive_names()= ; > + let metadata_mode =3D detection_mode.is_metadata() > + && (archives.contains(&target_base) || archives.is_e= mpty()); I wonder - do we really need such fine-grained control here? wouldn't a simple per-backup job switch between metadata or not be enough? > + > let (target, payload_target) =3D if metadata_mode { > ( > format!("{target_base}.meta.{extension}"), > @@ -1065,12 +1097,27 @@ async fn create_backup( > .unwrap() > .start_directory(std::ffi::CString::new(target.as_st= r())?.as_c_str())?; > =20 > + let previous_ref =3D if metadata_mode { > + prepare_reference( > + &target_base, > + extension, > + previous_manifest.clone(), > + &client, > + backup_reader.clone(), > + crypt_config.clone(), > + ) > + .await? > + } else { > + None > + }; > + > let pxar_options =3D pbs_client::pxar::PxarCreateOptions= { > device_set: devices.clone(), > patterns: pattern_list.clone(), > entries_max: entries_max as usize, > skip_lost_and_found, > skip_e2big_xattr, > + previous_ref, > }; > =20 > let upload_options =3D UploadOptions { > diff --git a/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs b/p= roxmox-restore-daemon/src/proxmox_restore_daemon/api.rs > index d912734c..449a7e4c 100644 > --- a/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs > +++ b/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs > @@ -355,6 +355,7 @@ fn extract( > patterns, > skip_lost_and_found: false, > skip_e2big_xattr: false, > + previous_ref: None, > }; > =20 > let pxar_writer =3D TokioWriter::new(writer); > diff --git a/pxar-bin/src/main.rs b/pxar-bin/src/main.rs > index 74ee04f7..f3945801 100644 > --- a/pxar-bin/src/main.rs > +++ b/pxar-bin/src/main.rs > @@ -336,6 +336,7 @@ async fn create_archive( > patterns, > skip_lost_and_found: false, > skip_e2big_xattr: false, > + previous_ref: None, > }; > =20 > let source =3D PathBuf::from(source); > --=20 > 2.39.2 >=20 >=20 >=20 > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel >=20 >=20 >=20