all lists on lists.proxmox.com
 help / color / mirror / Atom feed
From: Lukas Wagner <l.wagner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH v3 proxmox-backup 2/4] debug cli: add 'compare-content' flag to `diff archive` command
Date: Wed,  7 Dec 2022 10:38:17 +0100	[thread overview]
Message-ID: <20221207093819.75847-3-l.wagner@proxmox.com> (raw)
In-Reply-To: <20221207093819.75847-1-l.wagner@proxmox.com>

When --compare-content is set, the command will compare the
file content instead on relying on mtime to detect modified files.

Signed-off-by: Lukas Wagner <l.wagner@proxmox.com>
---
Changes from v2:
  - Increase buffersize to 4K
  - Added workaround for weird issue with tokio::try_join! and #[api]


 src/bin/proxmox_backup_debug/diff.rs   | 158 +++++++++++++++++++++++--
 src/bin/proxmox_backup_manager/ldap.rs | 102 ++++++++++++++++
 2 files changed, 248 insertions(+), 12 deletions(-)
 create mode 100644 src/bin/proxmox_backup_manager/ldap.rs

diff --git a/src/bin/proxmox_backup_debug/diff.rs b/src/bin/proxmox_backup_debug/diff.rs
index d5a4a1fe..a8a4e08d 100644
--- a/src/bin/proxmox_backup_debug/diff.rs
+++ b/src/bin/proxmox_backup_debug/diff.rs
@@ -28,12 +28,15 @@ use pbs_tools::json::required_string_param;
 use pxar::accessor::ReadAt;
 use pxar::EntryKind;
 use serde_json::Value;
+use tokio::io::AsyncReadExt;
 
 type ChunkDigest = [u8; 32];
 type FileEntry = pxar::accessor::aio::FileEntry<Arc<dyn ReadAt + Send + Sync>>;
 type Accessor = pxar::accessor::aio::Accessor<Arc<dyn ReadAt + Send + Sync>>;
 type Directory = pxar::accessor::aio::Directory<Arc<dyn ReadAt + Send + Sync>>;
 
+const BUFFERSIZE: usize = 4096;
+
 pub fn diff_commands() -> CommandLineInterface {
     let cmd_def = CliCommandMap::new().insert(
         "archive",
@@ -79,18 +82,30 @@ pub fn diff_commands() -> CommandLineInterface {
                 schema: KEYFD_SCHEMA,
                 optional: true,
             },
+            "compare-content": {
+                optional: true,
+                type: bool,
+                description: "Compare file content rather than solely relying on mtime for detecting modified files.",
+            },
         }
     }
 )]
 /// Diff an archive in two snapshots. The command will output a list of added, modified and deleted files.
-/// For modified files, only the file metadata (e.g. mtime, size, etc.) will be considered. The actual
-/// file contents will not be compared.
+/// For modified files, the file metadata (e.g. mode, uid, gid, size, etc.) will be considered. For detecting
+/// modification of file content, only mtime will be used by default. If the --compare-content flag is provided,
+/// mtime is ignored and file content will be compared.
 async fn diff_archive_cmd(param: Value) -> Result<(), Error> {
     let repo = extract_repository_from_value(&param)?;
     let snapshot_a = required_string_param(&param, "prev-snapshot")?;
     let snapshot_b = required_string_param(&param, "snapshot")?;
     let archive_name = required_string_param(&param, "archive-name")?;
 
+    let compare_contents = match param.get("compare-content") {
+        Some(Value::Bool(value)) => *value,
+        Some(_) => bail!("invalid flag for compare-content"),
+        None => false,
+    };
+
     let namespace = match param.get("ns") {
         Some(Value::String(ns)) => ns.parse()?,
         Some(_) => bail!("invalid namespace parameter"),
@@ -120,7 +135,14 @@ async fn diff_archive_cmd(param: Value) -> Result<(), Error> {
 
     if archive_name.ends_with(".pxar") {
         let file_name = format!("{}.didx", archive_name);
-        diff_archive(snapshot_a, snapshot_b, &file_name, &repo_params).await?;
+        diff_archive(
+            snapshot_a,
+            snapshot_b,
+            &file_name,
+            &repo_params,
+            compare_contents,
+        )
+        .await?;
     } else {
         bail!("Only .pxar files are supported");
     }
@@ -133,6 +155,7 @@ async fn diff_archive(
     snapshot_b: &str,
     file_name: &str,
     repo_params: &RepoParams,
+    compare_contents: bool,
 ) -> Result<(), Error> {
     let (index_a, accessor_a) = open_dynamic_index(snapshot_a, file_name, repo_params).await?;
     let (index_b, accessor_b) = open_dynamic_index(snapshot_b, file_name, repo_params).await?;
@@ -184,7 +207,7 @@ async fn diff_archive(
 
     // ... so we compare the file metadata/contents to narrow the selection down to files
     // which where *really* modified.
-    let modified_files = compare_files(potentially_modified).await?;
+    let modified_files = compare_files(potentially_modified, compare_contents).await?;
 
     show_file_list(&added_files, &deleted_files, &modified_files);
 
@@ -352,11 +375,12 @@ fn visit_directory<'f, 'c>(
 /// Check if files were actually modified
 async fn compare_files<'a>(
     files: HashMap<&'a OsStr, (&'a FileEntry, &'a FileEntry)>,
+    compare_contents: bool,
 ) -> Result<HashMap<&'a OsStr, (&'a FileEntry, ChangedProperties)>, Error> {
     let mut modified_files = HashMap::new();
 
     for (path, (entry_a, entry_b)) in files {
-        if let Some(changed) = compare_file(entry_a, entry_b).await? {
+        if let Some(changed) = compare_file(entry_a, entry_b, compare_contents).await? {
             modified_files.insert(path, (entry_b, changed));
         }
     }
@@ -367,6 +391,7 @@ async fn compare_files<'a>(
 async fn compare_file(
     file_a: &FileEntry,
     file_b: &FileEntry,
+    compare_contents: bool,
 ) -> Result<Option<ChangedProperties>, Error> {
     let mut changed = ChangedProperties::default();
 
@@ -385,10 +410,22 @@ async fn compare_file(
             changed.content = a.major != b.major || a.minor != b.minor
         }
         (EntryKind::File { size: size_a, .. }, EntryKind::File { size: size_b, .. }) => {
-            if size_a != size_b {
-                changed.size = true;
-                changed.content = true;
-            };
+            changed.size = size_a != size_b;
+
+            if compare_contents {
+                if changed.size {
+                    changed.content = true;
+                } else {
+                    let content_identical = compare_file_contents(file_a, file_b).await?;
+                    if content_identical && !changed.any_without_mtime() {
+                        // If the content is identical and nothing, exluding mtime,
+                        // has changed, we don't consider the entry as modified.
+                        changed.mtime = false;
+                    }
+
+                    changed.content = !content_identical;
+                }
+            }
         }
         (EntryKind::Directory, EntryKind::Directory) => {}
         (EntryKind::Socket, EntryKind::Socket) => {}
@@ -405,6 +442,45 @@ async fn compare_file(
     }
 }
 
+async fn compare_file_contents(file_a: &FileEntry, file_b: &FileEntry) -> Result<bool, Error> {
+    let mut contents_a = file_a.contents().await?;
+    let mut contents_b = file_b.contents().await?;
+
+    compare_readers(&mut contents_a, &mut contents_b).await
+}
+
+async fn compare_readers<T>(reader_a: &mut T, reader_b: &mut T) -> Result<bool, Error>
+where
+    T: AsyncReadExt + Unpin,
+{
+    let mut buf_a = Box::new([0u8; BUFFERSIZE]);
+    let mut buf_b = Box::new([0u8; BUFFERSIZE]);
+
+    loop {
+        // Put the both read calls into their own async blocks, otherwise
+        // tokio::try_join! in combination with our #[api] macro leads to some
+        // weird `higher-order lifetime error`
+        let read_fut_a = async { reader_a.read(buf_a.as_mut_slice()).await };
+        let read_fut_b = async { reader_b.read(buf_b.as_mut_slice()).await };
+
+        let (bytes_read_a, bytes_read_b) = tokio::try_join!(read_fut_a, read_fut_b)?;
+
+        if bytes_read_a != bytes_read_b {
+            return Ok(false);
+        }
+
+        if bytes_read_a == 0 {
+            break;
+        }
+
+        if buf_a[..bytes_read_a] != buf_b[..bytes_read_b] {
+            return Ok(false);
+        }
+    }
+
+    Ok(true)
+}
+
 #[derive(Copy, Clone, Default)]
 struct ChangedProperties {
     entry_type: bool,
@@ -438,8 +514,11 @@ impl ChangedProperties {
     }
 
     fn any(&self) -> bool {
+        self.any_without_mtime() || self.mtime
+    }
+
+    fn any_without_mtime(&self) -> bool {
         self.entry_type
-            || self.mtime
             || self.acl
             || self.xattrs
             || self.fcaps
@@ -475,9 +554,10 @@ fn format_filesize(entry: &FileEntry, changed: bool) -> String {
 fn format_mtime(entry: &FileEntry, changed: bool) -> String {
     let mtime = &entry.metadata().stat.mtime;
 
-    let format = if changed { "*%F %T" } else { " %F %T" };
+    let mut format = change_indicator(changed).to_owned();
+    format.push_str("%F %T");
 
-    proxmox_time::strftime_local(format, mtime.secs).unwrap_or_default()
+    proxmox_time::strftime_local(&format, mtime.secs).unwrap_or_default()
 }
 
 fn format_mode(entry: &FileEntry, changed: bool) -> String {
@@ -553,3 +633,57 @@ fn show_file_list(
         println!("{op} {entry_type:>2} {mode:>5} {uid:>6} {gid:>6} {size:>10} {mtime:11} {name}");
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::{
+        io::Cursor,
+        pin::Pin,
+        task::{Context, Poll},
+    };
+    use tokio::io::{AsyncRead, ReadBuf};
+
+    struct MockedAsyncReader(Cursor<Vec<u8>>);
+
+    impl AsyncRead for MockedAsyncReader {
+        fn poll_read(
+            self: Pin<&mut Self>,
+            _cx: &mut Context<'_>,
+            read_buf: &mut ReadBuf<'_>,
+        ) -> Poll<std::io::Result<()>> {
+            let mut buf = vec![0u8; 100];
+
+            let res = std::io::Read::read(&mut self.get_mut().0, &mut buf);
+
+            if let Ok(bytes) = res {
+                read_buf.put_slice(&buf[..bytes]);
+            }
+
+            Poll::Ready(res.map(|_| ()))
+        }
+    }
+
+    #[test]
+    fn test_do_compare_file_contents() {
+        fn compare(a: Vec<u8>, b: Vec<u8>) -> Result<bool, Error> {
+            let mut mock_a = MockedAsyncReader(Cursor::new(a));
+            let mut mock_b = MockedAsyncReader(Cursor::new(b));
+
+            proxmox_async::runtime::block_on(compare_readers(&mut mock_a, &mut mock_b))
+        }
+
+        assert!(matches!(compare(vec![0; 15], vec![0; 15]), Ok(true)));
+        assert!(matches!(compare(vec![0; 15], vec![0; 14]), Ok(false)));
+        assert!(matches!(compare(vec![0; 15], vec![1; 15]), Ok(false)));
+
+        let mut buf = vec![1u8; 2 * BUFFERSIZE];
+        buf[BUFFERSIZE] = 0;
+        assert!(matches!(compare(vec![1u8; 2 * BUFFERSIZE], buf), Ok(false)));
+
+        let mut buf = vec![1u8; 2 * BUFFERSIZE];
+        buf[2 * BUFFERSIZE - 1] = 0;
+        assert!(matches!(compare(vec![1u8; 2 * BUFFERSIZE], buf), Ok(false)));
+    }
+}
diff --git a/src/bin/proxmox_backup_manager/ldap.rs b/src/bin/proxmox_backup_manager/ldap.rs
new file mode 100644
index 00000000..a2f10f66
--- /dev/null
+++ b/src/bin/proxmox_backup_manager/ldap.rs
@@ -0,0 +1,102 @@
+use anyhow::Error;
+use serde_json::Value;
+
+use proxmox_router::{cli::*, ApiHandler, RpcEnvironment};
+use proxmox_schema::api;
+
+use pbs_api_types::REALM_ID_SCHEMA;
+
+use proxmox_backup::api2;
+
+#[api(
+    input: {
+        properties: {
+            "output-format": {
+                schema: OUTPUT_FORMAT,
+                optional: true,
+            },
+        }
+    }
+)]
+/// List configured LDAP realms
+fn list_ldap_realms(param: Value, rpcenv: &mut dyn RpcEnvironment) -> Result<Value, Error> {
+    let output_format = get_output_format(&param);
+
+    let info = &api2::config::access::ldap::API_METHOD_LIST_LDAP_REALMS;
+    let mut data = match info.handler {
+        ApiHandler::Sync(handler) => (handler)(param, info, rpcenv)?,
+        _ => unreachable!(),
+    };
+
+    let options = default_table_format_options()
+        .column(ColumnConfig::new("realm"))
+        // .column(ColumnConfig::new("issuer-url"))
+        .column(ColumnConfig::new("comment"));
+
+    format_and_print_result_full(&mut data, &info.returns, &output_format, &options);
+
+    Ok(Value::Null)
+}
+#[api(
+    input: {
+        properties: {
+            realm: {
+                schema: REALM_ID_SCHEMA,
+            },
+            "output-format": {
+                schema: OUTPUT_FORMAT,
+                optional: true,
+            },
+        }
+    }
+)]
+
+/// Show LDAP realm configuration
+fn show_ldap_realm(param: Value, rpcenv: &mut dyn RpcEnvironment) -> Result<Value, Error> {
+    let output_format = get_output_format(&param);
+
+    let info = &api2::config::access::ldap::API_METHOD_READ_LDAP_REALM;
+    let mut data = match info.handler {
+        ApiHandler::Sync(handler) => (handler)(param, info, rpcenv)?,
+        _ => unreachable!(),
+    };
+
+    let options = default_table_format_options();
+    format_and_print_result_full(&mut data, &info.returns, &output_format, &options);
+
+    Ok(Value::Null)
+}
+
+pub fn ldap_commands() -> CommandLineInterface {
+    let cmd_def = CliCommandMap::new()
+        .insert("list", CliCommand::new(&API_METHOD_LIST_LDAP_REALMS))
+        .insert(
+            "show",
+            CliCommand::new(&API_METHOD_SHOW_LDAP_REALM)
+                .arg_param(&["realm"])
+                .completion_cb("realm", pbs_config::domains::complete_ldap_realm_name),
+        )
+        .insert(
+            "create",
+            CliCommand::new(&api2::config::access::ldap::API_METHOD_CREATE_LDAP_REALM)
+                .arg_param(&["realm"])
+                .arg_param(&["realm"])
+                .completion_cb("realm", pbs_config::domains::complete_ldap_realm_name),
+        )
+        .insert(
+            "update",
+            CliCommand::new(&api2::config::access::ldap::API_METHOD_UPDATE_LDAP_REALM)
+                .arg_param(&["realm"])
+                .arg_param(&["realm"])
+                .completion_cb("realm", pbs_config::domains::complete_ldap_realm_name),
+        )
+        .insert(
+            "delete",
+            CliCommand::new(&api2::config::access::ldap::API_METHOD_DELETE_LDAP_REALM)
+                .arg_param(&["realm"])
+                .arg_param(&["realm"])
+                .completion_cb("realm", pbs_config::domains::complete_ldap_realm_name),
+        );
+
+    cmd_def.into()
+}
-- 
2.30.2





  parent reply	other threads:[~2022-12-07  9:38 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-07  9:38 [pbs-devel] [PATCH v3 proxmox-backup 0/4] debug cli: improve output, optionally compare file content for `diff archive` Lukas Wagner
2022-12-07  9:38 ` [pbs-devel] [PATCH v3 proxmox-backup 1/4] debug cli: show more file attributes for `diff archive` command Lukas Wagner
2022-12-09  9:13   ` Wolfgang Bumiller
2022-12-09 10:45     ` Lukas Wagner
2022-12-07  9:38 ` Lukas Wagner [this message]
2022-12-07  9:38 ` [pbs-devel] [PATCH v3 proxmox-backup 3/4] debug cli: add colored output for `diff archive` Lukas Wagner
2022-12-07  9:38 ` [pbs-devel] [PATCH v3 proxmox-backup 4/4] debug cli: move parameters into the function signature Lukas Wagner
2022-12-09  9:22   ` Wolfgang Bumiller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221207093819.75847-3-l.wagner@proxmox.com \
    --to=l.wagner@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal