From: Lukas Wagner <l.wagner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH proxmox-backup 2/2] debug cli: add 'compare-content' flag to `diff archive` command
Date: Tue, 29 Nov 2022 15:17:30 +0100 [thread overview]
Message-ID: <20221129141730.740199-3-l.wagner@proxmox.com> (raw)
In-Reply-To: <20221129141730.740199-1-l.wagner@proxmox.com>
When --compare-content is set, the command will compare the
file content instead on relying on mtime to detect modified files.
Signed-off-by: Lukas Wagner <l.wagner@proxmox.com>
---
src/bin/proxmox_backup_debug/diff.rs | 141 +++++++++++++++++++++++++--
1 file changed, 135 insertions(+), 6 deletions(-)
diff --git a/src/bin/proxmox_backup_debug/diff.rs b/src/bin/proxmox_backup_debug/diff.rs
index d5a4a1fe..9b916760 100644
--- a/src/bin/proxmox_backup_debug/diff.rs
+++ b/src/bin/proxmox_backup_debug/diff.rs
@@ -28,12 +28,15 @@ use pbs_tools::json::required_string_param;
use pxar::accessor::ReadAt;
use pxar::EntryKind;
use serde_json::Value;
+use tokio::io::AsyncReadExt;
type ChunkDigest = [u8; 32];
type FileEntry = pxar::accessor::aio::FileEntry<Arc<dyn ReadAt + Send + Sync>>;
type Accessor = pxar::accessor::aio::Accessor<Arc<dyn ReadAt + Send + Sync>>;
type Directory = pxar::accessor::aio::Directory<Arc<dyn ReadAt + Send + Sync>>;
+const BUFFERSIZE: usize = 1024;
+
pub fn diff_commands() -> CommandLineInterface {
let cmd_def = CliCommandMap::new().insert(
"archive",
@@ -79,18 +82,30 @@ pub fn diff_commands() -> CommandLineInterface {
schema: KEYFD_SCHEMA,
optional: true,
},
+ "compare-content": {
+ optional: true,
+ type: bool,
+ description: "Compare file content rather than solely relying on mtime for detecting modified files.",
+ },
}
}
)]
/// Diff an archive in two snapshots. The command will output a list of added, modified and deleted files.
-/// For modified files, only the file metadata (e.g. mtime, size, etc.) will be considered. The actual
-/// file contents will not be compared.
+/// For modified files, the file metadata (e.g. mode, uid, gid, size, etc.) will be considered. For detecting
+/// modification of file content, only mtime will be used by default. If the --compare-content flag is provided,
+/// mtime is ignored and file content will be compared.
async fn diff_archive_cmd(param: Value) -> Result<(), Error> {
let repo = extract_repository_from_value(¶m)?;
let snapshot_a = required_string_param(¶m, "prev-snapshot")?;
let snapshot_b = required_string_param(¶m, "snapshot")?;
let archive_name = required_string_param(¶m, "archive-name")?;
+ let compare_contents = match param.get("compare-content") {
+ Some(Value::Bool(value)) => *value,
+ Some(_) => bail!("invalid flag for compare-content"),
+ None => false,
+ };
+
let namespace = match param.get("ns") {
Some(Value::String(ns)) => ns.parse()?,
Some(_) => bail!("invalid namespace parameter"),
@@ -120,7 +135,14 @@ async fn diff_archive_cmd(param: Value) -> Result<(), Error> {
if archive_name.ends_with(".pxar") {
let file_name = format!("{}.didx", archive_name);
- diff_archive(snapshot_a, snapshot_b, &file_name, &repo_params).await?;
+ diff_archive(
+ snapshot_a,
+ snapshot_b,
+ &file_name,
+ &repo_params,
+ compare_contents,
+ )
+ .await?;
} else {
bail!("Only .pxar files are supported");
}
@@ -133,6 +155,7 @@ async fn diff_archive(
snapshot_b: &str,
file_name: &str,
repo_params: &RepoParams,
+ compare_contents: bool,
) -> Result<(), Error> {
let (index_a, accessor_a) = open_dynamic_index(snapshot_a, file_name, repo_params).await?;
let (index_b, accessor_b) = open_dynamic_index(snapshot_b, file_name, repo_params).await?;
@@ -184,7 +207,7 @@ async fn diff_archive(
// ... so we compare the file metadata/contents to narrow the selection down to files
// which where *really* modified.
- let modified_files = compare_files(potentially_modified).await?;
+ let modified_files = compare_files(potentially_modified, compare_contents).await?;
show_file_list(&added_files, &deleted_files, &modified_files);
@@ -352,11 +375,12 @@ fn visit_directory<'f, 'c>(
/// Check if files were actually modified
async fn compare_files<'a>(
files: HashMap<&'a OsStr, (&'a FileEntry, &'a FileEntry)>,
+ compare_contents: bool,
) -> Result<HashMap<&'a OsStr, (&'a FileEntry, ChangedProperties)>, Error> {
let mut modified_files = HashMap::new();
for (path, (entry_a, entry_b)) in files {
- if let Some(changed) = compare_file(entry_a, entry_b).await? {
+ if let Some(changed) = compare_file(entry_a, entry_b, compare_contents).await? {
modified_files.insert(path, (entry_b, changed));
}
}
@@ -367,6 +391,7 @@ async fn compare_files<'a>(
async fn compare_file(
file_a: &FileEntry,
file_b: &FileEntry,
+ compare_contents: bool,
) -> Result<Option<ChangedProperties>, Error> {
let mut changed = ChangedProperties::default();
@@ -388,6 +413,15 @@ async fn compare_file(
if size_a != size_b {
changed.size = true;
changed.content = true;
+ } else if compare_contents {
+ let content_identical = compare_file_contents(file_a, file_b).await?;
+ if content_identical && !changed.any_without_mtime() {
+ // If the content is identical and nothing, exluding mtime,
+ // has changed, we don't consider the entry as modified.
+ changed.mtime = false;
+ }
+
+ changed.content = !content_identical;
};
}
(EntryKind::Directory, EntryKind::Directory) => {}
@@ -405,6 +439,44 @@ async fn compare_file(
}
}
+async fn compare_file_contents(file_a: &FileEntry, file_b: &FileEntry) -> Result<bool, Error> {
+ let mut contents_a = file_a.contents().await?;
+ let mut contents_b = file_b.contents().await?;
+
+ compare_readers(&mut contents_a, &mut contents_b).await
+}
+
+async fn compare_readers<T>(reader_a: &mut T, reader_b: &mut T) -> Result<bool, Error>
+where
+ T: AsyncReadExt + Unpin,
+{
+ let mut buf_a = vec![0u8; BUFFERSIZE];
+ let mut buf_b = vec![0u8; BUFFERSIZE];
+
+ loop {
+ // normally, a join! would be nice here, but this leads to
+ // some 'higher-order lifetime error' for which I'm not smart enough to solve.
+ // let (bytes_read_a, bytes_read_b) =
+ // tokio::try_join!(reader_a.read(&mut buf_a), reader_b.read(&mut buf_b))?;
+ let bytes_read_a = reader_a.read(&mut buf_a).await?;
+ let bytes_read_b = reader_b.read(&mut buf_b).await?;
+
+ if bytes_read_a != bytes_read_b {
+ return Ok(false);
+ }
+
+ if bytes_read_a == 0 {
+ break;
+ }
+
+ if buf_a[..bytes_read_a] != buf_b[..bytes_read_b] {
+ return Ok(false);
+ }
+ }
+
+ Ok(true)
+}
+
#[derive(Copy, Clone, Default)]
struct ChangedProperties {
entry_type: bool,
@@ -438,8 +510,11 @@ impl ChangedProperties {
}
fn any(&self) -> bool {
+ self.any_without_mtime() || self.mtime
+ }
+
+ fn any_without_mtime(&self) -> bool {
self.entry_type
- || self.mtime
|| self.acl
|| self.xattrs
|| self.fcaps
@@ -553,3 +628,57 @@ fn show_file_list(
println!("{op} {entry_type:>2} {mode:>5} {uid:>6} {gid:>6} {size:>10} {mtime:11} {name}");
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ use std::{
+ io::Cursor,
+ pin::Pin,
+ task::{Context, Poll},
+ };
+ use tokio::io::AsyncRead;
+
+ struct MockedAsyncReader(Cursor<Vec<u8>>);
+
+ impl AsyncRead for MockedAsyncReader {
+ fn poll_read(
+ self: Pin<&mut Self>,
+ _cx: &mut Context<'_>,
+ read_buf: &mut tokio::io::ReadBuf<'_>,
+ ) -> std::task::Poll<std::io::Result<()>> {
+ let mut buf = vec![0u8; 100];
+
+ let res = std::io::Read::read(&mut self.get_mut().0, &mut buf);
+
+ if let Ok(bytes) = res {
+ read_buf.put_slice(&buf[..bytes]);
+ }
+
+ Poll::Ready(res.map(|_| ()))
+ }
+ }
+
+ #[test]
+ fn test_do_compare_file_contents() {
+ fn compare(a: Vec<u8>, b: Vec<u8>) -> Result<bool, Error> {
+ let mut mock_a = MockedAsyncReader(Cursor::new(a));
+ let mut mock_b = MockedAsyncReader(Cursor::new(b));
+
+ proxmox_async::runtime::block_on(compare_readers(&mut mock_a, &mut mock_b))
+ }
+
+ assert!(matches!(compare(vec![0; 15], vec![0; 15]), Ok(true)));
+ assert!(matches!(compare(vec![0; 15], vec![0; 14]), Ok(false)));
+ assert!(matches!(compare(vec![0; 15], vec![1; 15]), Ok(false)));
+
+ let mut buf = vec![1u8; 2 * BUFFERSIZE];
+ buf[BUFFERSIZE] = 0;
+ assert!(matches!(compare(vec![1u8; 2 * BUFFERSIZE], buf), Ok(false)));
+
+ let mut buf = vec![1u8; 2 * BUFFERSIZE];
+ buf[2 * BUFFERSIZE - 1] = 0;
+ assert!(matches!(compare(vec![1u8; 2 * BUFFERSIZE], buf), Ok(false)));
+ }
+}
--
2.30.2
next prev parent reply other threads:[~2022-11-29 14:17 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-29 14:17 [pbs-devel] [PATCH proxmox-backup 0/2] debug cli: improve output, optionally compare file content for `diff archive` Lukas Wagner
2022-11-29 14:17 ` [pbs-devel] [PATCH proxmox-backup 1/2] debug cli: show more file attributes for `diff archive` command Lukas Wagner
2022-11-29 14:17 ` Lukas Wagner [this message]
2022-12-01 13:29 ` [pbs-devel] [PATCH proxmox-backup 2/2] debug cli: add 'compare-content' flag to " Stefan Hanreich
2022-12-01 13:30 ` Stefan Hanreich
2022-11-30 16:27 ` [pbs-devel] [PATCH proxmox-backup 0/2] debug cli: improve output, optionally compare file content for `diff archive` Thomas Lamprecht
2022-12-01 7:30 ` Lukas Wagner
2022-12-01 8:02 ` Thomas Lamprecht
2022-12-01 13:29 ` Stefan Hanreich
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221129141730.740199-3-l.wagner@proxmox.com \
--to=l.wagner@proxmox.com \
--cc=pbs-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.