From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id A82AED6E2 for ; Thu, 1 Dec 2022 14:31:01 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 7B8142A1E1 for ; Thu, 1 Dec 2022 14:31:01 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Thu, 1 Dec 2022 14:30:57 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 37DC143324 for ; Thu, 1 Dec 2022 14:30:57 +0100 (CET) Message-ID: Date: Thu, 1 Dec 2022 14:30:55 +0100 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Thunderbird/102.5.0 Content-Language: en-US From: Stefan Hanreich To: pbs-devel@lists.proxmox.com Reply-To: Proxmox Backup Server development discussion References: <20221129141730.740199-1-l.wagner@proxmox.com> <20221129141730.740199-3-l.wagner@proxmox.com> In-Reply-To: Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.699 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment NICE_REPLY_A -0.257 Looks like a legit reply (A) SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [proxmox.com, diff.rs] Subject: Re: [pbs-devel] [PATCH proxmox-backup 2/2] debug cli: add 'compare-content' flag to `diff archive` command X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 01 Dec 2022 13:31:01 -0000 On 12/1/22 14:29, Stefan Hanreich wrote: > > > On 11/29/22 15:17, Lukas Wagner wrote: >> When --compare-content is set, the command will compare the >> file content instead on relying on mtime to detect modified files. >> >> Signed-off-by: Lukas Wagner >> --- >>   src/bin/proxmox_backup_debug/diff.rs | 141 +++++++++++++++++++++++++-- >>   1 file changed, 135 insertions(+), 6 deletions(-) >> >> diff --git a/src/bin/proxmox_backup_debug/diff.rs >> b/src/bin/proxmox_backup_debug/diff.rs >> index d5a4a1fe..9b916760 100644 >> --- a/src/bin/proxmox_backup_debug/diff.rs >> +++ b/src/bin/proxmox_backup_debug/diff.rs >> @@ -28,12 +28,15 @@ use pbs_tools::json::required_string_param; >>   use pxar::accessor::ReadAt; >>   use pxar::EntryKind; >>   use serde_json::Value; >> +use tokio::io::AsyncReadExt; >>     type ChunkDigest = [u8; 32]; >>   type FileEntry = pxar::accessor::aio::FileEntry> Send + Sync>>; >>   type Accessor = pxar::accessor::aio::Accessor> + Sync>>; >>   type Directory = pxar::accessor::aio::Directory> Send + Sync>>; >>   +const BUFFERSIZE: usize = 1024; >> + >>   pub fn diff_commands() -> CommandLineInterface { >>       let cmd_def = CliCommandMap::new().insert( >>           "archive", >> @@ -79,18 +82,30 @@ pub fn diff_commands() -> CommandLineInterface { >>                   schema: KEYFD_SCHEMA, >>                   optional: true, >>               }, >> +            "compare-content": { >> +                optional: true, >> +                type: bool, >> +                description: "Compare file content rather than >> solely relying on mtime for detecting modified files.", >> +            }, >>           } >>       } >>   )] >>   /// Diff an archive in two snapshots. The command will output a >> list of added, modified and deleted files. >> -/// For modified files, only the file metadata (e.g. mtime, size, >> etc.) will be considered. The actual >> -/// file contents will not be compared. >> +/// For modified files, the file metadata (e.g. mode, uid, gid, >> size, etc.) will be considered. For detecting >> +/// modification of file content, only mtime will be used by >> default. If the --compare-content flag is provided, >> +/// mtime is ignored and file content will be compared. >>   async fn diff_archive_cmd(param: Value) -> Result<(), Error> { >>       let repo = extract_repository_from_value(¶m)?; >>       let snapshot_a = required_string_param(¶m, "prev-snapshot")?; >>       let snapshot_b = required_string_param(¶m, "snapshot")?; >>       let archive_name = required_string_param(¶m, "archive-name")?; >>   +    let compare_contents = match param.get("compare-content") { >> +        Some(Value::Bool(value)) => *value, >> +        Some(_) => bail!("invalid flag for compare-content"), >> +        None => false, >> +    }; >> + >>       let namespace = match param.get("ns") { >>           Some(Value::String(ns)) => ns.parse()?, >>           Some(_) => bail!("invalid namespace parameter"), >> @@ -120,7 +135,14 @@ async fn diff_archive_cmd(param: Value) -> >> Result<(), Error> { >>         if archive_name.ends_with(".pxar") { >>           let file_name = format!("{}.didx", archive_name); >> -        diff_archive(snapshot_a, snapshot_b, &file_name, >> &repo_params).await?; >> +        diff_archive( >> +            snapshot_a, >> +            snapshot_b, >> +            &file_name, >> +            &repo_params, >> +            compare_contents, >> +        ) >> +        .await?; >>       } else { >>           bail!("Only .pxar files are supported"); >>       } >> @@ -133,6 +155,7 @@ async fn diff_archive( >>       snapshot_b: &str, >>       file_name: &str, >>       repo_params: &RepoParams, >> +    compare_contents: bool, >>   ) -> Result<(), Error> { >>       let (index_a, accessor_a) = open_dynamic_index(snapshot_a, >> file_name, repo_params).await?; >>       let (index_b, accessor_b) = open_dynamic_index(snapshot_b, >> file_name, repo_params).await?; >> @@ -184,7 +207,7 @@ async fn diff_archive( >>         // ... so we compare the file metadata/contents to narrow the >> selection down to files >>       // which where *really* modified. >> -    let modified_files = compare_files(potentially_modified).await?; >> +    let modified_files = compare_files(potentially_modified, >> compare_contents).await?; >>         show_file_list(&added_files, &deleted_files, &modified_files); >>   @@ -352,11 +375,12 @@ fn visit_directory<'f, 'c>( >>   /// Check if files were actually modified >>   async fn compare_files<'a>( >>       files: HashMap<&'a OsStr, (&'a FileEntry, &'a FileEntry)>, >> +    compare_contents: bool, >>   ) -> Result, >> Error> { >>       let mut modified_files = HashMap::new(); >>         for (path, (entry_a, entry_b)) in files { >> -        if let Some(changed) = compare_file(entry_a, entry_b).await? { >> +        if let Some(changed) = compare_file(entry_a, entry_b, >> compare_contents).await? { >>               modified_files.insert(path, (entry_b, changed)); >>           } >>       } >> @@ -367,6 +391,7 @@ async fn compare_files<'a>( >>   async fn compare_file( >>       file_a: &FileEntry, >>       file_b: &FileEntry, >> +    compare_contents: bool, >>   ) -> Result, Error> { >>       let mut changed = ChangedProperties::default(); >>   @@ -388,6 +413,15 @@ async fn compare_file( >>               if size_a != size_b { >>                   changed.size = true; >>                   changed.content = true; >> +            } else if compare_contents { >> +                let content_identical = >> compare_file_contents(file_a, file_b).await?; >> +                if content_identical && !changed.any_without_mtime() { >> +                    // If the content is identical and nothing, >> exluding mtime, >> +                    // has changed, we don't consider the entry as >> modified. >> +                    changed.mtime = false; >> +                } >> + >> +                changed.content = !content_identical; >>               }; >>           } >>           (EntryKind::Directory, EntryKind::Directory) => {} >> @@ -405,6 +439,44 @@ async fn compare_file( >>       } >>   } >>   +async fn compare_file_contents(file_a: &FileEntry, file_b: >> &FileEntry) -> Result { >> +    let mut contents_a = file_a.contents().await?; >> +    let mut contents_b = file_b.contents().await?; >> + >> +    compare_readers(&mut contents_a, &mut contents_b).await >> +} >> + >> +async fn compare_readers(reader_a: &mut T, reader_b: &mut T) -> >> Result >> +where >> +    T: AsyncReadExt + Unpin, >> +{ >> +    let mut buf_a = vec![0u8; BUFFERSIZE]; >> +    let mut buf_b = vec![0u8; BUFFERSIZE]; >> + >> +    loop { >> +        // normally, a join! would be nice here, but this leads to >> +        // some 'higher-order lifetime error' for which I'm not >> smart enough to solve. >> +        // let (bytes_read_a, bytes_read_b) = >> +        //     tokio::try_join!(reader_a.read(&mut buf_a), >> reader_b.read(&mut buf_b))?; >> +        let bytes_read_a = reader_a.read(&mut buf_a).await?; >> +        let bytes_read_b = reader_b.read(&mut buf_b).await?; >> + >> +        if bytes_read_a != bytes_read_b { >> +            return Ok(false); >> +        } >> + >> +        if bytes_read_a == 0 { >> +            break; >> +        } >> + >> +        if buf_a[..bytes_read_a] != buf_b[..bytes_read_b] { >> +            return Ok(false); >> +        } >> +    } >> + >> +    Ok(true) >> +} >> + >>   #[derive(Copy, Clone, Default)] >>   struct ChangedProperties { >>       entry_type: bool, >> @@ -438,8 +510,11 @@ impl ChangedProperties { >>       } >>         fn any(&self) -> bool { >> +        self.any_without_mtime() || self.mtime >> +    } >> + >> +    fn any_without_mtime(&self) -> bool { >>           self.entry_type >> -            || self.mtime >>               || self.acl >>               || self.xattrs >>               || self.fcaps >> @@ -553,3 +628,57 @@ fn show_file_list( >>           println!("{op} {entry_type:>2} {mode:>5} {uid:>6} {gid:>6} >> {size:>10} {mtime:11} {name}"); >>       } >>   } >> + >> +#[cfg(test)] >> +mod tests { >> +    use super::*; >> + >> +    use std::{ >> +        io::Cursor, >> +        pin::Pin, >> +        task::{Context, Poll}, >> +    }; >> +    use tokio::io::AsyncRead; >> + >> +    struct MockedAsyncReader(Cursor>); >> + >> +    impl AsyncRead for MockedAsyncReader { >> +        fn poll_read( >> +            self: Pin<&mut Self>, >> +            _cx: &mut Context<'_>, >> +            read_buf: &mut tokio::io::ReadBuf<'_>, >> +        ) -> std::task::Poll> { > > very minor nit: Poll already gets imported above, so it could be removed I mean std::task:: of course > >> +            let mut buf = vec![0u8; 100]; >> + >> +            let res = std::io::Read::read(&mut self.get_mut().0, >> &mut buf); >> + >> +            if let Ok(bytes) = res { >> +                read_buf.put_slice(&buf[..bytes]); >> +            } >> + >> +            Poll::Ready(res.map(|_| ())) >> +        } >> +    } >> + >> +    #[test] >> +    fn test_do_compare_file_contents() { >> +        fn compare(a: Vec, b: Vec) -> Result { >> +            let mut mock_a = MockedAsyncReader(Cursor::new(a)); >> +            let mut mock_b = MockedAsyncReader(Cursor::new(b)); >> + >> + proxmox_async::runtime::block_on(compare_readers(&mut mock_a, &mut >> mock_b)) >> +        } >> + >> +        assert!(matches!(compare(vec![0; 15], vec![0; 15]), Ok(true))); >> +        assert!(matches!(compare(vec![0; 15], vec![0; 14]), >> Ok(false))); >> +        assert!(matches!(compare(vec![0; 15], vec![1; 15]), >> Ok(false))); >> + >> +        let mut buf = vec![1u8; 2 * BUFFERSIZE]; >> +        buf[BUFFERSIZE] = 0; >> +        assert!(matches!(compare(vec![1u8; 2 * BUFFERSIZE], buf), >> Ok(false))); >> + >> +        let mut buf = vec![1u8; 2 * BUFFERSIZE]; >> +        buf[2 * BUFFERSIZE - 1] = 0; >> +        assert!(matches!(compare(vec![1u8; 2 * BUFFERSIZE], buf), >> Ok(false))); >> +    } >> +} > > > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel > >