From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 3D4DFB939B for ; Wed, 13 Mar 2024 12:12:43 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 1EF1234A61 for ; Wed, 13 Mar 2024 12:12:13 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Wed, 13 Mar 2024 12:12:09 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 665A4454E1 for ; Wed, 13 Mar 2024 12:12:09 +0100 (CET) Date: Wed, 13 Mar 2024 12:12:00 +0100 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Proxmox Backup Server development discussion References: <20240305092703.126906-1-c.ebner@proxmox.com> <20240305092703.126906-35-c.ebner@proxmox.com> In-Reply-To: <20240305092703.126906-35-c.ebner@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.16.0 (https://github.com/astroidmail/astroid) Message-Id: <1710325718.hwp7rvfzh8.astroid@yuna.none> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-SPAM-LEVEL: Spam detection results: 0 AWL 0.065 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [proxmox.com, create.rs] Subject: Re: [pbs-devel] [RFC v2 proxmox-backup 34/36] fix #3174: client: pxar: enable caching and meta comparison X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 13 Mar 2024 11:12:43 -0000 On March 5, 2024 10:27 am, Christian Ebner wrote: > Add the final glue logic to enable the look-ahead caching and > metadata comparison introduced in the preparatory patches. I have to say the call stacks here are not getting easier to follow with all the intermingled caching_enabled logic... create_archive -> archive_dir_contents --> loop over files -> add_entry --->.add_entry_to_archive or flush cache and cache or add_entry_to_archive -> flush_cached_to_archive -> encoder.finish maybe it does get a bit disentangled or easier if add_entry/flush_entry_to_archive are merged? > Signed-off-by: Christian Ebner > --- > changes since version 1: > - fix pxar exclude cli entry caching >=20 > pbs-client/src/pxar/create.rs | 121 +++++++++++++++++++++++++++++++--- > 1 file changed, 113 insertions(+), 8 deletions(-) >=20 > diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.r= s > index b2ce898f..bb4597bc 100644 > --- a/pbs-client/src/pxar/create.rs > +++ b/pbs-client/src/pxar/create.rs > @@ -32,10 +32,14 @@ use pbs_datastore::dynamic_index::{ > }; > =20 > use crate::inject_reused_chunks::InjectChunks; > +use crate::pxar::look_ahead_cache::{CacheEntry, CacheEntryData}; > use crate::pxar::metadata::errno_is_unsupported; > use crate::pxar::tools::assert_single_path_component; > use crate::pxar::Flags; > =20 > +const MAX_CACHE_SIZE: usize =3D 512; > +const CACHED_PAYLOAD_THRESHOLD: u64 =3D 2 * 1024 * 1024; > + > #[derive(Default)] > struct ReusedChunks { > start_boundary: PayloadOffset, > @@ -253,6 +257,9 @@ struct Archiver { > reused_chunks: ReusedChunks, > previous_payload_index: Option, > forced_boundaries: Arc>>, > + cached_entries: Vec, > + caching_enabled: bool, > + cached_payload_size: u64, you can probably already guess ;) this should be combined/refactored into some common "re-use enabled" struct. > } > =20 > type Encoder<'a, T> =3D pxar::encoder::aio::Encoder<'a, T>; > @@ -335,16 +342,32 @@ where > reused_chunks: ReusedChunks::new(), > previous_payload_index, > forced_boundaries, > + cached_entries: Vec::new(), > + caching_enabled: false, > + cached_payload_size: 0, > }; > =20 > archiver > .archive_dir_contents(&mut encoder, accessor, source_dir, true) > .await?; > + > + if let Some(last) =3D archiver.cached_entries.pop() { > + match last { > + // do not close final directory, this is done by the caller > + CacheEntry::DirEnd =3D> {} > + _ =3D> archiver.cached_entries.push(last), > + } > + } // do not close final directory, this is done by the caller if let Some(CacheEntry::DirEnd) =3D archiver.cached_entries.last() { archiver.cached_entries.pop(); } =20 should do the same but a bit cheaper / easier to read. but - "caller" is kind of misleading here, right? because it's not the caller of `create_archive` that handles the top-level DirEnd, it's the call to `encoder.finish()` right below.. it kinda seems like it would be an error to end up here with some other last cached element? if so, then maybe it would make sense to make this an invariant instead: match archiver.cached_entries.pop() { Some(CachEntry::DirEnd) | None =3D> { // OK }, Some(entry) =3D> { bail!("Finished creating archive with cache, but last cache element is {entry:?} instead of top-level directory end marker."); }, } OTOH, it's archive_dir_contents itself that adds that entry, and it knows whether it is called for the top-level dir or not, so it could just skip adding it in the first place in the root case? > + > + archiver > + .flush_cached_to_archive(&mut encoder, true, false) > + .await?; > + > encoder.finish().await?; > Ok(()) > } > =20 > -struct FileListEntry { > +pub(crate) struct FileListEntry { > name: CString, > path: PathBuf, > stat: FileStat, > @@ -396,8 +419,15 @@ impl Archiver { > let file_name =3D file_entry.name.to_bytes(); > =20 > if is_root && file_name =3D=3D b".pxarexclude-cli" { > - self.encode_pxarexclude_cli(encoder, &file_entry.nam= e, old_patterns_count) > - .await?; > + if self.caching_enabled { > + self.cached_entries.push(CacheEntry::PxarExclude= CliEntry( > + file_entry, > + old_patterns_count, > + )); > + } else { > + self.encode_pxarexclude_cli(encoder, &file_entry= .name, old_patterns_count) > + .await?; > + } > continue; > } > =20 > @@ -413,6 +443,11 @@ impl Archiver { > .await > .map_err(|err| self.wrap_err(err))?; > } > + > + if self.caching_enabled { > + self.cached_entries.push(CacheEntry::DirEnd); > + } > + > self.path =3D old_path; > self.entry_counter =3D entry_counter; > self.patterns.truncate(old_patterns_count); > @@ -693,8 +728,6 @@ impl Archiver { > c_file_name: &CStr, > stat: &FileStat, > ) -> Result<(), Error> { > - use pxar::format::mode; > - > let file_mode =3D stat.st_mode & libc::S_IFMT; > let open_mode =3D if file_mode =3D=3D libc::S_IFREG || file_mode= =3D=3D libc::S_IFDIR { > OFlag::empty() > @@ -732,6 +765,71 @@ impl Archiver { > self.skip_e2big_xattr, > )?; > =20 > + if self.previous_payload_index.is_none() { > + return self > + .add_entry_to_archive(encoder, accessor, c_file_name, st= at, fd, &metadata) > + .await; > + } > + > + // Avoid having to many open file handles in cached entries > + if self.cached_entries.len() > MAX_CACHE_SIZE { > + self.flush_cached_to_archive(encoder, false, true).await?; > + } > + > + if metadata.is_regular_file() { > + self.cache_or_flush_entries(encoder, accessor, c_file_name, = stat, fd, &metadata) > + .await > + } else { > + if self.caching_enabled { > + if stat.st_mode & libc::S_IFMT =3D=3D libc::S_IFDIR { > + let fd_clone =3D fd.try_clone()?; > + let cache_entry =3D CacheEntry::DirEntry(CacheEntryD= ata::new( > + fd, > + c_file_name.into(), > + stat.clone(), > + metadata.clone(), > + PayloadOffset::default(), > + )); > + self.cached_entries.push(cache_entry); > + > + let dir =3D Dir::from_fd(fd_clone.into_raw_fd())?; > + self.add_directory(encoder, accessor, dir, c_file_na= me, &metadata, stat) > + .await?; > + > + if let Some(ref catalog) =3D self.catalog { > + if !self.caching_enabled { > + catalog.lock().unwrap().end_directory()?; > + } > + } > + } else { > + let cache_entry =3D CacheEntry::RegEntry(CacheEntryD= ata::new( > + fd, > + c_file_name.into(), > + stat.clone(), > + metadata, > + PayloadOffset::default(), > + )); > + self.cached_entries.push(cache_entry); > + } > + Ok(()) > + } else { > + self.add_entry_to_archive(encoder, accessor, c_file_name= , stat, fd, &metadata) > + .await > + } > + } > + } > + > + async fn add_entry_to_archive( > + &mut self, > + encoder: &mut Encoder<'_, T>, > + accessor: &mut Option>>, > + c_file_name: &CStr, > + stat: &FileStat, > + fd: OwnedFd, > + metadata: &Metadata, > + ) -> Result<(), Error> { > + use pxar::format::mode; > + > let file_name: &Path =3D OsStr::from_bytes(c_file_name.to_bytes(= )).as_ref(); > match metadata.file_type() { > mode::IFREG =3D> { > @@ -781,7 +879,9 @@ impl Archiver { > .add_directory(encoder, accessor, dir, c_file_name, = &metadata, stat) > .await; > if let Some(ref catalog) =3D self.catalog { > - catalog.lock().unwrap().end_directory()?; > + if !self.caching_enabled { > + catalog.lock().unwrap().end_directory()?; > + } > } > result > } > @@ -1132,7 +1232,9 @@ impl Archiver { > ) -> Result<(), Error> { > let dir_name =3D OsStr::from_bytes(dir_name.to_bytes()); > =20 > - encoder.create_directory(dir_name, metadata).await?; > + if !self.caching_enabled { > + encoder.create_directory(dir_name, metadata).await?; > + } > =20 > let old_fs_magic =3D self.fs_magic; > let old_fs_feature_flags =3D self.fs_feature_flags; > @@ -1172,7 +1274,10 @@ impl Archiver { > self.fs_feature_flags =3D old_fs_feature_flags; > self.current_st_dev =3D old_st_dev; > =20 > - encoder.finish().await?; > + if !self.caching_enabled { > + encoder.finish().await?; > + } > + > result > } > =20 > --=20 > 2.39.2 >=20 >=20 >=20 > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel >=20 >=20 >=20