From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id CD7ADD797 for ; Thu, 1 Dec 2022 14:29:49 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 8E2BF29F67 for ; Thu, 1 Dec 2022 14:29:19 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Thu, 1 Dec 2022 14:29:17 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 544BC432F2 for ; Thu, 1 Dec 2022 14:29:17 +0100 (CET) Message-ID: Date: Thu, 1 Dec 2022 14:29:15 +0100 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Thunderbird/102.5.0 Content-Language: en-US To: pbs-devel@lists.proxmox.com References: <20221129141730.740199-1-l.wagner@proxmox.com> <20221129141730.740199-3-l.wagner@proxmox.com> From: Stefan Hanreich In-Reply-To: <20221129141730.740199-3-l.wagner@proxmox.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.717 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment NICE_REPLY_A -0.257 Looks like a legit reply (A) SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [diff.rs] Subject: Re: [pbs-devel] [PATCH proxmox-backup 2/2] debug cli: add 'compare-content' flag to `diff archive` command X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 01 Dec 2022 13:29:49 -0000 On 11/29/22 15:17, Lukas Wagner wrote: > When --compare-content is set, the command will compare the > file content instead on relying on mtime to detect modified files. > > Signed-off-by: Lukas Wagner > --- > src/bin/proxmox_backup_debug/diff.rs | 141 +++++++++++++++++++++++++-- > 1 file changed, 135 insertions(+), 6 deletions(-) > > diff --git a/src/bin/proxmox_backup_debug/diff.rs b/src/bin/proxmox_backup_debug/diff.rs > index d5a4a1fe..9b916760 100644 > --- a/src/bin/proxmox_backup_debug/diff.rs > +++ b/src/bin/proxmox_backup_debug/diff.rs > @@ -28,12 +28,15 @@ use pbs_tools::json::required_string_param; > use pxar::accessor::ReadAt; > use pxar::EntryKind; > use serde_json::Value; > +use tokio::io::AsyncReadExt; > > type ChunkDigest = [u8; 32]; > type FileEntry = pxar::accessor::aio::FileEntry>; > type Accessor = pxar::accessor::aio::Accessor>; > type Directory = pxar::accessor::aio::Directory>; > > +const BUFFERSIZE: usize = 1024; > + > pub fn diff_commands() -> CommandLineInterface { > let cmd_def = CliCommandMap::new().insert( > "archive", > @@ -79,18 +82,30 @@ pub fn diff_commands() -> CommandLineInterface { > schema: KEYFD_SCHEMA, > optional: true, > }, > + "compare-content": { > + optional: true, > + type: bool, > + description: "Compare file content rather than solely relying on mtime for detecting modified files.", > + }, > } > } > )] > /// Diff an archive in two snapshots. The command will output a list of added, modified and deleted files. > -/// For modified files, only the file metadata (e.g. mtime, size, etc.) will be considered. The actual > -/// file contents will not be compared. > +/// For modified files, the file metadata (e.g. mode, uid, gid, size, etc.) will be considered. For detecting > +/// modification of file content, only mtime will be used by default. If the --compare-content flag is provided, > +/// mtime is ignored and file content will be compared. > async fn diff_archive_cmd(param: Value) -> Result<(), Error> { > let repo = extract_repository_from_value(¶m)?; > let snapshot_a = required_string_param(¶m, "prev-snapshot")?; > let snapshot_b = required_string_param(¶m, "snapshot")?; > let archive_name = required_string_param(¶m, "archive-name")?; > > + let compare_contents = match param.get("compare-content") { > + Some(Value::Bool(value)) => *value, > + Some(_) => bail!("invalid flag for compare-content"), > + None => false, > + }; > + > let namespace = match param.get("ns") { > Some(Value::String(ns)) => ns.parse()?, > Some(_) => bail!("invalid namespace parameter"), > @@ -120,7 +135,14 @@ async fn diff_archive_cmd(param: Value) -> Result<(), Error> { > > if archive_name.ends_with(".pxar") { > let file_name = format!("{}.didx", archive_name); > - diff_archive(snapshot_a, snapshot_b, &file_name, &repo_params).await?; > + diff_archive( > + snapshot_a, > + snapshot_b, > + &file_name, > + &repo_params, > + compare_contents, > + ) > + .await?; > } else { > bail!("Only .pxar files are supported"); > } > @@ -133,6 +155,7 @@ async fn diff_archive( > snapshot_b: &str, > file_name: &str, > repo_params: &RepoParams, > + compare_contents: bool, > ) -> Result<(), Error> { > let (index_a, accessor_a) = open_dynamic_index(snapshot_a, file_name, repo_params).await?; > let (index_b, accessor_b) = open_dynamic_index(snapshot_b, file_name, repo_params).await?; > @@ -184,7 +207,7 @@ async fn diff_archive( > > // ... so we compare the file metadata/contents to narrow the selection down to files > // which where *really* modified. > - let modified_files = compare_files(potentially_modified).await?; > + let modified_files = compare_files(potentially_modified, compare_contents).await?; > > show_file_list(&added_files, &deleted_files, &modified_files); > > @@ -352,11 +375,12 @@ fn visit_directory<'f, 'c>( > /// Check if files were actually modified > async fn compare_files<'a>( > files: HashMap<&'a OsStr, (&'a FileEntry, &'a FileEntry)>, > + compare_contents: bool, > ) -> Result, Error> { > let mut modified_files = HashMap::new(); > > for (path, (entry_a, entry_b)) in files { > - if let Some(changed) = compare_file(entry_a, entry_b).await? { > + if let Some(changed) = compare_file(entry_a, entry_b, compare_contents).await? { > modified_files.insert(path, (entry_b, changed)); > } > } > @@ -367,6 +391,7 @@ async fn compare_files<'a>( > async fn compare_file( > file_a: &FileEntry, > file_b: &FileEntry, > + compare_contents: bool, > ) -> Result, Error> { > let mut changed = ChangedProperties::default(); > > @@ -388,6 +413,15 @@ async fn compare_file( > if size_a != size_b { > changed.size = true; > changed.content = true; > + } else if compare_contents { > + let content_identical = compare_file_contents(file_a, file_b).await?; > + if content_identical && !changed.any_without_mtime() { > + // If the content is identical and nothing, exluding mtime, > + // has changed, we don't consider the entry as modified. > + changed.mtime = false; > + } > + > + changed.content = !content_identical; > }; > } > (EntryKind::Directory, EntryKind::Directory) => {} > @@ -405,6 +439,44 @@ async fn compare_file( > } > } > > +async fn compare_file_contents(file_a: &FileEntry, file_b: &FileEntry) -> Result { > + let mut contents_a = file_a.contents().await?; > + let mut contents_b = file_b.contents().await?; > + > + compare_readers(&mut contents_a, &mut contents_b).await > +} > + > +async fn compare_readers(reader_a: &mut T, reader_b: &mut T) -> Result > +where > + T: AsyncReadExt + Unpin, > +{ > + let mut buf_a = vec![0u8; BUFFERSIZE]; > + let mut buf_b = vec![0u8; BUFFERSIZE]; > + > + loop { > + // normally, a join! would be nice here, but this leads to > + // some 'higher-order lifetime error' for which I'm not smart enough to solve. > + // let (bytes_read_a, bytes_read_b) = > + // tokio::try_join!(reader_a.read(&mut buf_a), reader_b.read(&mut buf_b))?; > + let bytes_read_a = reader_a.read(&mut buf_a).await?; > + let bytes_read_b = reader_b.read(&mut buf_b).await?; > + > + if bytes_read_a != bytes_read_b { > + return Ok(false); > + } > + > + if bytes_read_a == 0 { > + break; > + } > + > + if buf_a[..bytes_read_a] != buf_b[..bytes_read_b] { > + return Ok(false); > + } > + } > + > + Ok(true) > +} > + > #[derive(Copy, Clone, Default)] > struct ChangedProperties { > entry_type: bool, > @@ -438,8 +510,11 @@ impl ChangedProperties { > } > > fn any(&self) -> bool { > + self.any_without_mtime() || self.mtime > + } > + > + fn any_without_mtime(&self) -> bool { > self.entry_type > - || self.mtime > || self.acl > || self.xattrs > || self.fcaps > @@ -553,3 +628,57 @@ fn show_file_list( > println!("{op} {entry_type:>2} {mode:>5} {uid:>6} {gid:>6} {size:>10} {mtime:11} {name}"); > } > } > + > +#[cfg(test)] > +mod tests { > + use super::*; > + > + use std::{ > + io::Cursor, > + pin::Pin, > + task::{Context, Poll}, > + }; > + use tokio::io::AsyncRead; > + > + struct MockedAsyncReader(Cursor>); > + > + impl AsyncRead for MockedAsyncReader { > + fn poll_read( > + self: Pin<&mut Self>, > + _cx: &mut Context<'_>, > + read_buf: &mut tokio::io::ReadBuf<'_>, > + ) -> std::task::Poll> { very minor nit: Poll already gets imported above, so it could be removed > + let mut buf = vec![0u8; 100]; > + > + let res = std::io::Read::read(&mut self.get_mut().0, &mut buf); > + > + if let Ok(bytes) = res { > + read_buf.put_slice(&buf[..bytes]); > + } > + > + Poll::Ready(res.map(|_| ())) > + } > + } > + > + #[test] > + fn test_do_compare_file_contents() { > + fn compare(a: Vec, b: Vec) -> Result { > + let mut mock_a = MockedAsyncReader(Cursor::new(a)); > + let mut mock_b = MockedAsyncReader(Cursor::new(b)); > + > + proxmox_async::runtime::block_on(compare_readers(&mut mock_a, &mut mock_b)) > + } > + > + assert!(matches!(compare(vec![0; 15], vec![0; 15]), Ok(true))); > + assert!(matches!(compare(vec![0; 15], vec![0; 14]), Ok(false))); > + assert!(matches!(compare(vec![0; 15], vec![1; 15]), Ok(false))); > + > + let mut buf = vec![1u8; 2 * BUFFERSIZE]; > + buf[BUFFERSIZE] = 0; > + assert!(matches!(compare(vec![1u8; 2 * BUFFERSIZE], buf), Ok(false))); > + > + let mut buf = vec![1u8; 2 * BUFFERSIZE]; > + buf[2 * BUFFERSIZE - 1] = 0; > + assert!(matches!(compare(vec![1u8; 2 * BUFFERSIZE], buf), Ok(false))); > + } > +}