From: Lukas Wagner <l.wagner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH v2 proxmox-backup 2/3] debug cli: add 'compare-content' flag to `diff archive` command
Date: Mon, 5 Dec 2022 15:55:13 +0100 [thread overview]
Message-ID: <20221205145514.645111-3-l.wagner@proxmox.com> (raw)
In-Reply-To: <20221205145514.645111-1-l.wagner@proxmox.com>
When --compare-content is set, the command will compare the
file content instead on relying on mtime to detect modified files.
Signed-off-by: Lukas Wagner <l.wagner@proxmox.com>
---
Changes from v1:
- Made `changed indicator for file content` a bit less confusing.
For regular files, it is now only displayed if
- the --comapare-content flag is set and
- the file contents *actually* differ
- Removed unnecessesary namespace prefix for std::task::Pin in unit tests
src/bin/proxmox_backup_debug/diff.rs | 157 +++++++++++++++++++++++++--
1 file changed, 145 insertions(+), 12 deletions(-)
diff --git a/src/bin/proxmox_backup_debug/diff.rs b/src/bin/proxmox_backup_debug/diff.rs
index d5a4a1fe..b5ecd721 100644
--- a/src/bin/proxmox_backup_debug/diff.rs
+++ b/src/bin/proxmox_backup_debug/diff.rs
@@ -28,12 +28,15 @@ use pbs_tools::json::required_string_param;
use pxar::accessor::ReadAt;
use pxar::EntryKind;
use serde_json::Value;
+use tokio::io::AsyncReadExt;
type ChunkDigest = [u8; 32];
type FileEntry = pxar::accessor::aio::FileEntry<Arc<dyn ReadAt + Send + Sync>>;
type Accessor = pxar::accessor::aio::Accessor<Arc<dyn ReadAt + Send + Sync>>;
type Directory = pxar::accessor::aio::Directory<Arc<dyn ReadAt + Send + Sync>>;
+const BUFFERSIZE: usize = 1024;
+
pub fn diff_commands() -> CommandLineInterface {
let cmd_def = CliCommandMap::new().insert(
"archive",
@@ -79,18 +82,30 @@ pub fn diff_commands() -> CommandLineInterface {
schema: KEYFD_SCHEMA,
optional: true,
},
+ "compare-content": {
+ optional: true,
+ type: bool,
+ description: "Compare file content rather than solely relying on mtime for detecting modified files.",
+ },
}
}
)]
/// Diff an archive in two snapshots. The command will output a list of added, modified and deleted files.
-/// For modified files, only the file metadata (e.g. mtime, size, etc.) will be considered. The actual
-/// file contents will not be compared.
+/// For modified files, the file metadata (e.g. mode, uid, gid, size, etc.) will be considered. For detecting
+/// modification of file content, only mtime will be used by default. If the --compare-content flag is provided,
+/// mtime is ignored and file content will be compared.
async fn diff_archive_cmd(param: Value) -> Result<(), Error> {
let repo = extract_repository_from_value(¶m)?;
let snapshot_a = required_string_param(¶m, "prev-snapshot")?;
let snapshot_b = required_string_param(¶m, "snapshot")?;
let archive_name = required_string_param(¶m, "archive-name")?;
+ let compare_contents = match param.get("compare-content") {
+ Some(Value::Bool(value)) => *value,
+ Some(_) => bail!("invalid flag for compare-content"),
+ None => false,
+ };
+
let namespace = match param.get("ns") {
Some(Value::String(ns)) => ns.parse()?,
Some(_) => bail!("invalid namespace parameter"),
@@ -120,7 +135,14 @@ async fn diff_archive_cmd(param: Value) -> Result<(), Error> {
if archive_name.ends_with(".pxar") {
let file_name = format!("{}.didx", archive_name);
- diff_archive(snapshot_a, snapshot_b, &file_name, &repo_params).await?;
+ diff_archive(
+ snapshot_a,
+ snapshot_b,
+ &file_name,
+ &repo_params,
+ compare_contents,
+ )
+ .await?;
} else {
bail!("Only .pxar files are supported");
}
@@ -133,6 +155,7 @@ async fn diff_archive(
snapshot_b: &str,
file_name: &str,
repo_params: &RepoParams,
+ compare_contents: bool,
) -> Result<(), Error> {
let (index_a, accessor_a) = open_dynamic_index(snapshot_a, file_name, repo_params).await?;
let (index_b, accessor_b) = open_dynamic_index(snapshot_b, file_name, repo_params).await?;
@@ -184,7 +207,7 @@ async fn diff_archive(
// ... so we compare the file metadata/contents to narrow the selection down to files
// which where *really* modified.
- let modified_files = compare_files(potentially_modified).await?;
+ let modified_files = compare_files(potentially_modified, compare_contents).await?;
show_file_list(&added_files, &deleted_files, &modified_files);
@@ -352,11 +375,12 @@ fn visit_directory<'f, 'c>(
/// Check if files were actually modified
async fn compare_files<'a>(
files: HashMap<&'a OsStr, (&'a FileEntry, &'a FileEntry)>,
+ compare_contents: bool,
) -> Result<HashMap<&'a OsStr, (&'a FileEntry, ChangedProperties)>, Error> {
let mut modified_files = HashMap::new();
for (path, (entry_a, entry_b)) in files {
- if let Some(changed) = compare_file(entry_a, entry_b).await? {
+ if let Some(changed) = compare_file(entry_a, entry_b, compare_contents).await? {
modified_files.insert(path, (entry_b, changed));
}
}
@@ -367,6 +391,7 @@ async fn compare_files<'a>(
async fn compare_file(
file_a: &FileEntry,
file_b: &FileEntry,
+ compare_contents: bool,
) -> Result<Option<ChangedProperties>, Error> {
let mut changed = ChangedProperties::default();
@@ -385,10 +410,22 @@ async fn compare_file(
changed.content = a.major != b.major || a.minor != b.minor
}
(EntryKind::File { size: size_a, .. }, EntryKind::File { size: size_b, .. }) => {
- if size_a != size_b {
- changed.size = true;
- changed.content = true;
- };
+ changed.size = size_a != size_b;
+
+ if compare_contents {
+ if changed.size {
+ changed.content = true;
+ } else {
+ let content_identical = compare_file_contents(file_a, file_b).await?;
+ if content_identical && !changed.any_without_mtime() {
+ // If the content is identical and nothing, exluding mtime,
+ // has changed, we don't consider the entry as modified.
+ changed.mtime = false;
+ }
+
+ changed.content = !content_identical;
+ }
+ }
}
(EntryKind::Directory, EntryKind::Directory) => {}
(EntryKind::Socket, EntryKind::Socket) => {}
@@ -405,6 +442,44 @@ async fn compare_file(
}
}
+async fn compare_file_contents(file_a: &FileEntry, file_b: &FileEntry) -> Result<bool, Error> {
+ let mut contents_a = file_a.contents().await?;
+ let mut contents_b = file_b.contents().await?;
+
+ compare_readers(&mut contents_a, &mut contents_b).await
+}
+
+async fn compare_readers<T>(reader_a: &mut T, reader_b: &mut T) -> Result<bool, Error>
+where
+ T: AsyncReadExt + Unpin,
+{
+ let mut buf_a = vec![0u8; BUFFERSIZE];
+ let mut buf_b = vec![0u8; BUFFERSIZE];
+
+ loop {
+ // normally, a join! would be nice here, but this leads to
+ // some 'higher-order lifetime error' for which I'm not smart enough to solve.
+ // let (bytes_read_a, bytes_read_b) =
+ // tokio::try_join!(reader_a.read(&mut buf_a), reader_b.read(&mut buf_b))?;
+ let bytes_read_a = reader_a.read(&mut buf_a).await?;
+ let bytes_read_b = reader_b.read(&mut buf_b).await?;
+
+ if bytes_read_a != bytes_read_b {
+ return Ok(false);
+ }
+
+ if bytes_read_a == 0 {
+ break;
+ }
+
+ if buf_a[..bytes_read_a] != buf_b[..bytes_read_b] {
+ return Ok(false);
+ }
+ }
+
+ Ok(true)
+}
+
#[derive(Copy, Clone, Default)]
struct ChangedProperties {
entry_type: bool,
@@ -438,8 +513,11 @@ impl ChangedProperties {
}
fn any(&self) -> bool {
+ self.any_without_mtime() || self.mtime
+ }
+
+ fn any_without_mtime(&self) -> bool {
self.entry_type
- || self.mtime
|| self.acl
|| self.xattrs
|| self.fcaps
@@ -475,9 +553,10 @@ fn format_filesize(entry: &FileEntry, changed: bool) -> String {
fn format_mtime(entry: &FileEntry, changed: bool) -> String {
let mtime = &entry.metadata().stat.mtime;
- let format = if changed { "*%F %T" } else { " %F %T" };
+ let mut format = change_indicator(changed).to_owned();
+ format.push_str("%F %T");
- proxmox_time::strftime_local(format, mtime.secs).unwrap_or_default()
+ proxmox_time::strftime_local(&format, mtime.secs).unwrap_or_default()
}
fn format_mode(entry: &FileEntry, changed: bool) -> String {
@@ -553,3 +632,57 @@ fn show_file_list(
println!("{op} {entry_type:>2} {mode:>5} {uid:>6} {gid:>6} {size:>10} {mtime:11} {name}");
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ use std::{
+ io::Cursor,
+ pin::Pin,
+ task::{Context, Poll},
+ };
+ use tokio::io::{AsyncRead, ReadBuf};
+
+ struct MockedAsyncReader(Cursor<Vec<u8>>);
+
+ impl AsyncRead for MockedAsyncReader {
+ fn poll_read(
+ self: Pin<&mut Self>,
+ _cx: &mut Context<'_>,
+ read_buf: &mut ReadBuf<'_>,
+ ) -> Poll<std::io::Result<()>> {
+ let mut buf = vec![0u8; 100];
+
+ let res = std::io::Read::read(&mut self.get_mut().0, &mut buf);
+
+ if let Ok(bytes) = res {
+ read_buf.put_slice(&buf[..bytes]);
+ }
+
+ Poll::Ready(res.map(|_| ()))
+ }
+ }
+
+ #[test]
+ fn test_do_compare_file_contents() {
+ fn compare(a: Vec<u8>, b: Vec<u8>) -> Result<bool, Error> {
+ let mut mock_a = MockedAsyncReader(Cursor::new(a));
+ let mut mock_b = MockedAsyncReader(Cursor::new(b));
+
+ proxmox_async::runtime::block_on(compare_readers(&mut mock_a, &mut mock_b))
+ }
+
+ assert!(matches!(compare(vec![0; 15], vec![0; 15]), Ok(true)));
+ assert!(matches!(compare(vec![0; 15], vec![0; 14]), Ok(false)));
+ assert!(matches!(compare(vec![0; 15], vec![1; 15]), Ok(false)));
+
+ let mut buf = vec![1u8; 2 * BUFFERSIZE];
+ buf[BUFFERSIZE] = 0;
+ assert!(matches!(compare(vec![1u8; 2 * BUFFERSIZE], buf), Ok(false)));
+
+ let mut buf = vec![1u8; 2 * BUFFERSIZE];
+ buf[2 * BUFFERSIZE - 1] = 0;
+ assert!(matches!(compare(vec![1u8; 2 * BUFFERSIZE], buf), Ok(false)));
+ }
+}
--
2.30.2
next prev parent reply other threads:[~2022-12-05 14:56 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-12-05 14:55 [pbs-devel] [PATCH v2 proxmox-backup 0/3] debug cli: improve output, optionally compare file content for `diff archive` Lukas Wagner
2022-12-05 14:55 ` [pbs-devel] [PATCH v2 proxmox-backup 1/3] debug cli: show more file attributes for `diff archive` command Lukas Wagner
2022-12-05 14:55 ` Lukas Wagner [this message]
2022-12-06 11:39 ` [pbs-devel] [PATCH v2 proxmox-backup 2/3] debug cli: add 'compare-content' flag to " Wolfgang Bumiller
2022-12-06 14:44 ` Lukas Wagner
2022-12-06 15:01 ` Wolfgang Bumiller
2022-12-05 14:55 ` [pbs-devel] [PATCH v2 proxmox-backup 3/3] debug cli: add colored output " Lukas Wagner
2022-12-06 11:49 ` Wolfgang Bumiller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221205145514.645111-3-l.wagner@proxmox.com \
--to=l.wagner@proxmox.com \
--cc=pbs-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox