public inbox for pve-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: "Fabian Grünbichler" <f.gruenbichler@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH proxmox-offline-mirror 1/4] pool: add diff and list helpers
Date: Wed, 21 Sep 2022 10:12:39 +0200	[thread overview]
Message-ID: <20220921081242.1139249-2-f.gruenbichler@proxmox.com> (raw)
In-Reply-To: <20220921081242.1139249-1-f.gruenbichler@proxmox.com>

one for diffing two relative paths within a pool (e.g., for comparing
snapshots), one for diffing two pools (e.g., for diffing mirror and
mirror on medium), and one for listing paths.

Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
we could extend Diff with a list of error paths and make most of the
errors here non-fatal, not sure whether that is nicer?

 src/pool.rs  | 167 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 src/types.rs |  16 ++++-
 2 files changed, 181 insertions(+), 2 deletions(-)

diff --git a/src/pool.rs b/src/pool.rs
index b6047b8..5dba775 100644
--- a/src/pool.rs
+++ b/src/pool.rs
@@ -1,7 +1,7 @@
 use std::{
     cmp::max,
     collections::{hash_map::Entry, HashMap},
-    fs::{hard_link, remove_dir, File},
+    fs::{hard_link, remove_dir, File, Metadata},
     ops::Deref,
     os::linux::fs::MetadataExt,
     path::{Path, PathBuf},
@@ -15,6 +15,8 @@ use proxmox_sys::fs::{create_path, file_get_contents, replace_file, CreateOption
 use proxmox_time::epoch_i64;
 use walkdir::WalkDir;
 
+use crate::types::Diff;
+
 #[derive(Debug)]
 /// Pool consisting of two (possibly overlapping) directory trees:
 /// - pool_dir contains checksum files added by `add_file`
@@ -542,6 +544,169 @@ impl PoolLockGuard<'_> {
         std::fs::rename(&abs_from, &abs_to)
             .map_err(|err| format_err!("Failed to rename {abs_from:?} to {abs_to:?} - {err}"))
     }
+
+    /// Calculate diff between two pool dirs
+    pub(crate) fn diff_dirs(&self, path: &Path, other_path: &Path) -> Result<Diff, Error> {
+        let mut diff = Diff::default();
+
+        let handle_entry = |entry: Result<walkdir::DirEntry, walkdir::Error>,
+                            base: &Path,
+                            other_base: &Path,
+                            changed: Option<&mut Vec<(PathBuf, u64)>>,
+                            missing: &mut Vec<(PathBuf, u64)>|
+         -> Result<(), Error> {
+            let path = entry?.into_path();
+
+            let meta = path.metadata()?;
+            if !meta.is_file() {
+                return Ok(());
+            };
+
+            let relative = path.strip_prefix(base)?;
+            let mut absolute = other_base.to_path_buf();
+            absolute.push(relative);
+            if absolute.exists() {
+                if let Some(changed) = changed {
+                    let other_meta = absolute.metadata()?;
+                    if other_meta.st_ino() != meta.st_ino() {
+                        changed.push((
+                            relative.to_path_buf(),
+                            meta.st_size().abs_diff(other_meta.st_size()),
+                        ));
+                    }
+                }
+            } else {
+                missing.push((relative.to_path_buf(), meta.st_size()));
+            }
+
+            Ok(())
+        };
+
+        let path = self.get_path(path)?;
+        let other_path = self.get_path(other_path)?;
+
+        WalkDir::new(&path).into_iter().try_for_each(|entry| {
+            handle_entry(
+                entry,
+                &path,
+                &other_path,
+                Some(&mut diff.changed.paths),
+                &mut diff.removed.paths,
+            )
+        })?;
+        WalkDir::new(&other_path)
+            .into_iter()
+            .try_for_each(|entry| {
+                handle_entry(entry, &other_path, &path, None, &mut diff.added.paths)
+            })?;
+
+        Ok(diff)
+    }
+
+    /// Calculate diff between two pools
+    pub(crate) fn diff_pools(&self, other: &Pool) -> Result<Diff, Error> {
+        let mut diff = Diff::default();
+
+        let handle_entry = |entry: Result<walkdir::DirEntry, walkdir::Error>,
+                            pool: &Pool,
+                            pool_csums: &HashMap<u64, CheckSums>,
+                            other_pool: &Pool,
+                            other_csums: &HashMap<u64, CheckSums>,
+                            changed: Option<&mut Vec<(PathBuf, u64)>>,
+                            missing: &mut Vec<(PathBuf, u64)>|
+         -> Result<(), Error> {
+            let path = entry?.into_path();
+
+            let meta = path.metadata()?;
+            if !meta.is_file() {
+                return Ok(());
+            };
+
+            let base = &pool.link_dir;
+
+            let relative = path.strip_prefix(base)?;
+            let absolute = other_pool.get_path(relative)?;
+            if absolute.exists() {
+                if let Some(changed) = changed {
+                    let csum = match pool_csums.get(&meta.st_ino()) {
+                        Some(csum) => csum,
+                        None => {
+                            eprintln!("{path:?} path not registered with pool.");
+                            changed.push((relative.to_path_buf(), 0)); // TODO add warning/error field?
+                            return Ok(());
+                        }
+                    };
+                    let other_meta = absolute.metadata()?;
+                    let other_csum = match other_csums.get(&other_meta.st_ino()) {
+                        Some(csum) => csum,
+                        None => {
+                            eprintln!("{absolute:?} path not registered with pool.");
+                            changed.push((relative.to_path_buf(), 0)); // TODO add warning/error field?
+                            return Ok(());
+                        }
+                    };
+                    if csum != other_csum {
+                        changed.push((
+                            relative.to_path_buf(),
+                            meta.st_size().abs_diff(other_meta.st_size()),
+                        ));
+                    }
+                }
+            } else {
+                missing.push((relative.to_path_buf(), meta.st_size()));
+            }
+
+            Ok(())
+        };
+
+        let other = other.lock()?;
+        let (csums, _) = self.get_inode_csum_map()?;
+        let (other_csums, _) = other.get_inode_csum_map()?;
+
+        WalkDir::new(&self.link_dir)
+            .into_iter()
+            .try_for_each(|entry| {
+                handle_entry(
+                    entry,
+                    self,
+                    &csums,
+                    &other,
+                    &other_csums,
+                    Some(&mut diff.changed.paths),
+                    &mut diff.removed.paths,
+                )
+            })?;
+        WalkDir::new(&other.link_dir)
+            .into_iter()
+            .try_for_each(|entry| {
+                handle_entry(
+                    entry,
+                    &other,
+                    &other_csums,
+                    self,
+                    &csums,
+                    None,
+                    &mut diff.added.paths,
+                )
+            })?;
+
+        Ok(diff)
+    }
+
+    pub(crate) fn list_files(&self) -> Result<Vec<(PathBuf, Metadata)>, Error> {
+        let mut file_list = Vec::new();
+        WalkDir::new(&self.link_dir)
+            .into_iter()
+            .try_for_each(|entry| -> Result<(), Error> {
+                let path = entry?.into_path();
+                let meta = path.metadata()?;
+                let relative = path.strip_prefix(&self.link_dir)?;
+
+                file_list.push((relative.to_path_buf(), meta));
+                Ok(())
+            })?;
+        Ok(file_list)
+    }
 }
 
 fn link_file_do(source: &Path, target: &Path) -> Result<bool, Error> {
diff --git a/src/types.rs b/src/types.rs
index 7a1348a..3098a8d 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -1,4 +1,4 @@
-use std::{fmt::Display, str::FromStr};
+use std::{fmt::Display, path::PathBuf, str::FromStr};
 
 use anyhow::Error;
 use proxmox_schema::{api, const_regex, ApiStringFormat, Schema, StringSchema, Updater};
@@ -140,3 +140,17 @@ impl FromStr for ProductType {
         }
     }
 }
+
+/// Entries of Diff
+#[derive(Default)]
+pub struct DiffMember {
+    pub paths: Vec<(PathBuf, u64)>,
+}
+
+/// Differences between two pools or pool directories
+#[derive(Default)]
+pub struct Diff {
+    pub added: DiffMember,
+    pub changed: DiffMember,
+    pub removed: DiffMember,
+}
-- 
2.30.2





  reply	other threads:[~2022-09-21  8:13 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-21  8:12 [pve-devel] [PATCH proxmox-offline-mirror 0/4] extend/add commands Fabian Grünbichler
2022-09-21  8:12 ` Fabian Grünbichler [this message]
2022-09-21  8:12 ` [pve-devel] [PATCH proxmox-offline-mirror 2/4] snapshots: add diff command Fabian Grünbichler
2022-09-21  8:12 ` [pve-devel] [PATCH proxmox-offline-mirror 3/4] medium: " Fabian Grünbichler
2022-09-21  8:12 ` [pve-devel] [PATCH proxmox-offline-mirror 4/4] cli: allow listing snapshots of all mirrors Fabian Grünbichler
2022-09-26  7:51 ` [pve-devel] applied-series: [PATCH proxmox-offline-mirror 0/4] extend/add commands Wolfgang Bumiller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220921081242.1139249-2-f.gruenbichler@proxmox.com \
    --to=f.gruenbichler@proxmox.com \
    --cc=pve-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal