From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <pbs-devel-bounces@lists.proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
	by lore.proxmox.com (Postfix) with ESMTPS id E5F7C1FF199
	for <inbox@lore.proxmox.com>; Mon, 10 Mar 2025 12:17:32 +0100 (CET)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
	by firstgate.proxmox.com (Proxmox) with ESMTP id 509E2179F0;
	Mon, 10 Mar 2025 12:17:24 +0100 (CET)
From: Christian Ebner <c.ebner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Date: Mon, 10 Mar 2025 12:16:34 +0100
Message-Id: <20250310111634.162156-5-c.ebner@proxmox.com>
X-Mailer: git-send-email 2.39.5
In-Reply-To: <20250310111634.162156-1-c.ebner@proxmox.com>
References: <20250310111634.162156-1-c.ebner@proxmox.com>
MIME-Version: 1.0
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.031 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
Subject: [pbs-devel] [PATCH v2 proxmox-backup 4/4] fix #5331: garbage
 collection: avoid multiple chunk atime updates
X-BeenThere: pbs-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe>
Reply-To: Proxmox Backup Server development discussion
 <pbs-devel@lists.proxmox.com>
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: pbs-devel-bounces@lists.proxmox.com
Sender: "pbs-devel" <pbs-devel-bounces@lists.proxmox.com>

Reduce the number of atime updates on the same chunk by logically
iterating over image index files, following the incremental backup
logic. By inserting paths for encountered images during
`list_images` using the GroupedImageList structure, the iteration
happens now for the same image filenames in the same image namespace
and group in a order based on the snapshot timestamp. For each image,
keep track of the encountered chunk digests, and remember these as
seen for the next snapshot. Chunks which have been encountered in the
previous image index, but are not present anymore are removed from
the list after each image, in order to reduce memory footprint.

Fixes: https://bugzilla.proxmox.com/show_bug.cgi?id=5331
Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
---
changes since version 1:
- Use pre-existing datastore iterator helpers, following the logic other
  datastore operations take.
- Chunks are now remembered for all archives per snapshot, not just a
  single archive per snapshot as previously, this mimics more closely
  the backup behaviour.

 pbs-datastore/src/datastore.rs | 117 +++++++++++++++++++++++----------
 1 file changed, 84 insertions(+), 33 deletions(-)

diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index fdbb33a98..a80343d9b 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -25,7 +25,7 @@ use pbs_api_types::{
     MaintenanceMode, MaintenanceType, Operation, UPID,
 };
 
-use crate::backup_info::{BackupDir, BackupGroup};
+use crate::backup_info::{BackupDir, BackupGroup, BackupInfo};
 use crate::chunk_store::ChunkStore;
 use crate::dynamic_index::{DynamicIndexReader, DynamicIndexWriter};
 use crate::fixed_index::{FixedIndexReader, FixedIndexWriter};
@@ -970,10 +970,10 @@ impl DataStore {
         ListGroups::new(Arc::clone(self), ns)?.collect()
     }
 
-    fn list_images(&self) -> Result<Vec<PathBuf>, Error> {
+    fn list_images(&self) -> Result<HashSet<PathBuf>, Error> {
         let base = self.base_path();
 
-        let mut list = vec![];
+        let mut list = HashSet::new();
 
         use walkdir::WalkDir;
 
@@ -1021,7 +1021,7 @@ impl DataStore {
                 if archive_type == ArchiveType::FixedIndex
                     || archive_type == ArchiveType::DynamicIndex
                 {
-                    list.push(path);
+                    list.insert(path);
                 }
             }
         }
@@ -1071,6 +1071,7 @@ impl DataStore {
         &self,
         index: Box<dyn IndexFile>,
         file_name: &Path, // only used for error reporting
+        touched_chunks: &mut TouchedChunks,
         status: &mut GarbageCollectionStatus,
         worker: &dyn WorkerTaskContext,
     ) -> Result<(), Error> {
@@ -1081,6 +1082,12 @@ impl DataStore {
             worker.check_abort()?;
             worker.fail_on_shutdown()?;
             let digest = index.index_digest(pos).unwrap();
+
+            // Avoid multiple expensive atime updates by utimensat
+            if touched_chunks.insert(*digest) {
+                continue;
+            }
+
             if !self.inner.chunk_store.cond_touch_chunk(digest, false)? {
                 let hex = hex::encode(digest);
                 warn!(
@@ -1107,43 +1114,87 @@ impl DataStore {
         status: &mut GarbageCollectionStatus,
         worker: &dyn WorkerTaskContext,
     ) -> Result<(), Error> {
-        let image_list = self.list_images()?;
-        let image_count = image_list.len();
-
+        // Iterate twice over the datastore to fetch images, even if this comes with an additional
+        // runtime cost:
+        // - First iteration to find all index files, no matter if they are in a location expected
+        //   by the datastore's hierarchy
+        // - Iterate using the datastore's helpers, so the namespaces, groups and snapshots are
+        //   looked up given the expected hierarchy and iterator logic
+        //
+        // By this it is assured that all index files are used, even if they would not have been
+        // seen by the regular logic and the user is informed by the garbage collection run about
+        // the detected index files not following the iterators logic.
+
+        let mut unprocessed_image_list = self.list_images()?;
+        let image_count = unprocessed_image_list.len();
+
+        // Optimize for avoiding updates of chunks atime in same group for consecutive
+        // snapshots multiple times.
+        let mut touched_chunks = TouchedChunks::new();
+        let mut processed_images = 0;
         let mut last_percentage: usize = 0;
 
-        let mut strange_paths_count: u64 = 0;
-
-        for (i, img) in image_list.into_iter().enumerate() {
-            worker.check_abort()?;
-            worker.fail_on_shutdown()?;
-
-            if let Some(backup_dir_path) = img.parent() {
-                let backup_dir_path = backup_dir_path.strip_prefix(self.base_path())?;
-                if let Some(backup_dir_str) = backup_dir_path.to_str() {
-                    if pbs_api_types::parse_ns_and_snapshot(backup_dir_str).is_err() {
-                        strange_paths_count += 1;
+        let arc_self = Arc::new(self.clone());
+        for namespace in arc_self
+            .recursive_iter_backup_ns(BackupNamespace::root())
+            .context("creating namespace iterator failed")?
+        {
+            let namespace = namespace.context("iterating namespaces failed")?;
+            for group in arc_self.iter_backup_groups(namespace)? {
+                let group = group.context("iterating backup groups failed")?;
+                let mut snapshots = group.list_backups().context("listing snapshots failed")?;
+                // Sort by snapshot timestamp to iterate over consecutive snapshots for each image.
+                BackupInfo::sort_list(&mut snapshots, true);
+                for snapshot in snapshots {
+                    for file in snapshot.files {
+                        worker.check_abort()?;
+                        worker.fail_on_shutdown()?;
+
+                        let mut path = snapshot.backup_dir.full_path();
+                        path.push(file);
+
+                        let index = match self.open_index_reader(&path)? {
+                            Some(index) => index,
+                            None => continue,
+                        };
+                        self.index_mark_used_chunks(
+                            index,
+                            &path,
+                            &mut touched_chunks,
+                            status,
+                            worker,
+                        )?;
+
+                        unprocessed_image_list.remove(&path);
+
+                        let percentage = (processed_images + 1) * 100 / image_count;
+                        if percentage > last_percentage {
+                            info!(
+                                "marked {percentage}% ({} of {image_count} index files)",
+                                processed_images + 1,
+                            );
+                            last_percentage = percentage;
+                        }
+                        processed_images += 1;
                     }
+                    touched_chunks.reset();
                 }
             }
-
-            if let Some(index) = self.open_index_reader(&img)? {
-                self.index_mark_used_chunks(index, &img, status, worker)?;
-            }
-
-            let percentage = (i + 1) * 100 / image_count;
-            if percentage > last_percentage {
-                info!(
-                    "marked {percentage}% ({} of {image_count} index files)",
-                    i + 1,
-                );
-                last_percentage = percentage;
-            }
         }
 
+        let strange_paths_count = unprocessed_image_list.len();
         if strange_paths_count > 0 {
-            info!(
-                "found (and marked) {strange_paths_count} index files outside of expected directory scheme"
+            warn!("found {strange_paths_count} index files outside of expected directory scheme");
+        }
+        for path in unprocessed_image_list {
+            let index = match self.open_index_reader(&path)? {
+                Some(index) => index,
+                None => continue,
+            };
+            self.index_mark_used_chunks(index, &path, &mut touched_chunks, status, worker)?;
+            warn!(
+                "Marked chunks for unexpected index file at '{}'",
+                path.to_string_lossy()
             );
         }
 
-- 
2.39.5



_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel