From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id F252DC745 for ; Tue, 29 Nov 2022 15:17:43 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id DABCD709F for ; Tue, 29 Nov 2022 15:17:43 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Tue, 29 Nov 2022 15:17:42 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 223D344D09 for ; Tue, 29 Nov 2022 15:17:42 +0100 (CET) From: Lukas Wagner To: pbs-devel@lists.proxmox.com Date: Tue, 29 Nov 2022 15:17:30 +0100 Message-Id: <20221129141730.740199-3-l.wagner@proxmox.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20221129141730.740199-1-l.wagner@proxmox.com> References: <20221129141730.740199-1-l.wagner@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.000 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [diff.rs] Subject: [pbs-devel] [PATCH proxmox-backup 2/2] debug cli: add 'compare-content' flag to `diff archive` command X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 29 Nov 2022 14:17:44 -0000 When --compare-content is set, the command will compare the file content instead on relying on mtime to detect modified files. Signed-off-by: Lukas Wagner --- src/bin/proxmox_backup_debug/diff.rs | 141 +++++++++++++++++++++++++-- 1 file changed, 135 insertions(+), 6 deletions(-) diff --git a/src/bin/proxmox_backup_debug/diff.rs b/src/bin/proxmox_backup_debug/diff.rs index d5a4a1fe..9b916760 100644 --- a/src/bin/proxmox_backup_debug/diff.rs +++ b/src/bin/proxmox_backup_debug/diff.rs @@ -28,12 +28,15 @@ use pbs_tools::json::required_string_param; use pxar::accessor::ReadAt; use pxar::EntryKind; use serde_json::Value; +use tokio::io::AsyncReadExt; type ChunkDigest = [u8; 32]; type FileEntry = pxar::accessor::aio::FileEntry>; type Accessor = pxar::accessor::aio::Accessor>; type Directory = pxar::accessor::aio::Directory>; +const BUFFERSIZE: usize = 1024; + pub fn diff_commands() -> CommandLineInterface { let cmd_def = CliCommandMap::new().insert( "archive", @@ -79,18 +82,30 @@ pub fn diff_commands() -> CommandLineInterface { schema: KEYFD_SCHEMA, optional: true, }, + "compare-content": { + optional: true, + type: bool, + description: "Compare file content rather than solely relying on mtime for detecting modified files.", + }, } } )] /// Diff an archive in two snapshots. The command will output a list of added, modified and deleted files. -/// For modified files, only the file metadata (e.g. mtime, size, etc.) will be considered. The actual -/// file contents will not be compared. +/// For modified files, the file metadata (e.g. mode, uid, gid, size, etc.) will be considered. For detecting +/// modification of file content, only mtime will be used by default. If the --compare-content flag is provided, +/// mtime is ignored and file content will be compared. async fn diff_archive_cmd(param: Value) -> Result<(), Error> { let repo = extract_repository_from_value(¶m)?; let snapshot_a = required_string_param(¶m, "prev-snapshot")?; let snapshot_b = required_string_param(¶m, "snapshot")?; let archive_name = required_string_param(¶m, "archive-name")?; + let compare_contents = match param.get("compare-content") { + Some(Value::Bool(value)) => *value, + Some(_) => bail!("invalid flag for compare-content"), + None => false, + }; + let namespace = match param.get("ns") { Some(Value::String(ns)) => ns.parse()?, Some(_) => bail!("invalid namespace parameter"), @@ -120,7 +135,14 @@ async fn diff_archive_cmd(param: Value) -> Result<(), Error> { if archive_name.ends_with(".pxar") { let file_name = format!("{}.didx", archive_name); - diff_archive(snapshot_a, snapshot_b, &file_name, &repo_params).await?; + diff_archive( + snapshot_a, + snapshot_b, + &file_name, + &repo_params, + compare_contents, + ) + .await?; } else { bail!("Only .pxar files are supported"); } @@ -133,6 +155,7 @@ async fn diff_archive( snapshot_b: &str, file_name: &str, repo_params: &RepoParams, + compare_contents: bool, ) -> Result<(), Error> { let (index_a, accessor_a) = open_dynamic_index(snapshot_a, file_name, repo_params).await?; let (index_b, accessor_b) = open_dynamic_index(snapshot_b, file_name, repo_params).await?; @@ -184,7 +207,7 @@ async fn diff_archive( // ... so we compare the file metadata/contents to narrow the selection down to files // which where *really* modified. - let modified_files = compare_files(potentially_modified).await?; + let modified_files = compare_files(potentially_modified, compare_contents).await?; show_file_list(&added_files, &deleted_files, &modified_files); @@ -352,11 +375,12 @@ fn visit_directory<'f, 'c>( /// Check if files were actually modified async fn compare_files<'a>( files: HashMap<&'a OsStr, (&'a FileEntry, &'a FileEntry)>, + compare_contents: bool, ) -> Result, Error> { let mut modified_files = HashMap::new(); for (path, (entry_a, entry_b)) in files { - if let Some(changed) = compare_file(entry_a, entry_b).await? { + if let Some(changed) = compare_file(entry_a, entry_b, compare_contents).await? { modified_files.insert(path, (entry_b, changed)); } } @@ -367,6 +391,7 @@ async fn compare_files<'a>( async fn compare_file( file_a: &FileEntry, file_b: &FileEntry, + compare_contents: bool, ) -> Result, Error> { let mut changed = ChangedProperties::default(); @@ -388,6 +413,15 @@ async fn compare_file( if size_a != size_b { changed.size = true; changed.content = true; + } else if compare_contents { + let content_identical = compare_file_contents(file_a, file_b).await?; + if content_identical && !changed.any_without_mtime() { + // If the content is identical and nothing, exluding mtime, + // has changed, we don't consider the entry as modified. + changed.mtime = false; + } + + changed.content = !content_identical; }; } (EntryKind::Directory, EntryKind::Directory) => {} @@ -405,6 +439,44 @@ async fn compare_file( } } +async fn compare_file_contents(file_a: &FileEntry, file_b: &FileEntry) -> Result { + let mut contents_a = file_a.contents().await?; + let mut contents_b = file_b.contents().await?; + + compare_readers(&mut contents_a, &mut contents_b).await +} + +async fn compare_readers(reader_a: &mut T, reader_b: &mut T) -> Result +where + T: AsyncReadExt + Unpin, +{ + let mut buf_a = vec![0u8; BUFFERSIZE]; + let mut buf_b = vec![0u8; BUFFERSIZE]; + + loop { + // normally, a join! would be nice here, but this leads to + // some 'higher-order lifetime error' for which I'm not smart enough to solve. + // let (bytes_read_a, bytes_read_b) = + // tokio::try_join!(reader_a.read(&mut buf_a), reader_b.read(&mut buf_b))?; + let bytes_read_a = reader_a.read(&mut buf_a).await?; + let bytes_read_b = reader_b.read(&mut buf_b).await?; + + if bytes_read_a != bytes_read_b { + return Ok(false); + } + + if bytes_read_a == 0 { + break; + } + + if buf_a[..bytes_read_a] != buf_b[..bytes_read_b] { + return Ok(false); + } + } + + Ok(true) +} + #[derive(Copy, Clone, Default)] struct ChangedProperties { entry_type: bool, @@ -438,8 +510,11 @@ impl ChangedProperties { } fn any(&self) -> bool { + self.any_without_mtime() || self.mtime + } + + fn any_without_mtime(&self) -> bool { self.entry_type - || self.mtime || self.acl || self.xattrs || self.fcaps @@ -553,3 +628,57 @@ fn show_file_list( println!("{op} {entry_type:>2} {mode:>5} {uid:>6} {gid:>6} {size:>10} {mtime:11} {name}"); } } + +#[cfg(test)] +mod tests { + use super::*; + + use std::{ + io::Cursor, + pin::Pin, + task::{Context, Poll}, + }; + use tokio::io::AsyncRead; + + struct MockedAsyncReader(Cursor>); + + impl AsyncRead for MockedAsyncReader { + fn poll_read( + self: Pin<&mut Self>, + _cx: &mut Context<'_>, + read_buf: &mut tokio::io::ReadBuf<'_>, + ) -> std::task::Poll> { + let mut buf = vec![0u8; 100]; + + let res = std::io::Read::read(&mut self.get_mut().0, &mut buf); + + if let Ok(bytes) = res { + read_buf.put_slice(&buf[..bytes]); + } + + Poll::Ready(res.map(|_| ())) + } + } + + #[test] + fn test_do_compare_file_contents() { + fn compare(a: Vec, b: Vec) -> Result { + let mut mock_a = MockedAsyncReader(Cursor::new(a)); + let mut mock_b = MockedAsyncReader(Cursor::new(b)); + + proxmox_async::runtime::block_on(compare_readers(&mut mock_a, &mut mock_b)) + } + + assert!(matches!(compare(vec![0; 15], vec![0; 15]), Ok(true))); + assert!(matches!(compare(vec![0; 15], vec![0; 14]), Ok(false))); + assert!(matches!(compare(vec![0; 15], vec![1; 15]), Ok(false))); + + let mut buf = vec![1u8; 2 * BUFFERSIZE]; + buf[BUFFERSIZE] = 0; + assert!(matches!(compare(vec![1u8; 2 * BUFFERSIZE], buf), Ok(false))); + + let mut buf = vec![1u8; 2 * BUFFERSIZE]; + buf[2 * BUFFERSIZE - 1] = 0; + assert!(matches!(compare(vec![1u8; 2 * BUFFERSIZE], buf), Ok(false))); + } +} -- 2.30.2