From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <d.csapak@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id DC8AB73780
 for <pbs-devel@lists.proxmox.com>; Fri, 18 Jun 2021 11:29:18 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id C92742558A
 for <pbs-devel@lists.proxmox.com>; Fri, 18 Jun 2021 11:29:18 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256)
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS id A3FB725576
 for <pbs-devel@lists.proxmox.com>; Fri, 18 Jun 2021 11:29:17 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 6F99B44153
 for <pbs-devel@lists.proxmox.com>; Fri, 18 Jun 2021 11:29:11 +0200 (CEST)
From: Dominik Csapak <d.csapak@proxmox.com>
To: pbs-devel@lists.proxmox.com
Date: Fri, 18 Jun 2021 11:29:09 +0200
Message-Id: <20210618092910.4518-1-d.csapak@proxmox.com>
X-Mailer: git-send-email 2.20.1
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.877 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See
 http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more
 information. [datastore.rs, verify.rs]
Subject: [pbs-devel] [PATCH proxmox-backup 1/2] backup/datastore: refactor
 chunk inode sorting to the datastore
X-BeenThere: pbs-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Fri, 18 Jun 2021 09:29:18 -0000

so that we can reuse that information

the removal of the adding to the corrupted list is ok, since
'get_chunks_in_order' returns them at the end of the list
and we do the same if the loading fails later in 'verify_index_chunks'
so we still mark them corrupt
(assuming that the load will fail if the stat does)

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
alternatively, we could return 2 lists, or check after the loop
for the u64::MAX value, but imho it's ok this way

 src/backup/datastore.rs | 38 ++++++++++++++++++++++++++++++++++++++
 src/backup/verify.rs    | 38 +++++++-------------------------------
 2 files changed, 45 insertions(+), 31 deletions(-)

diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs
index a0cf50b2..116d2441 100644
--- a/src/backup/datastore.rs
+++ b/src/backup/datastore.rs
@@ -825,4 +825,42 @@ impl DataStore {
     pub fn verify_new(&self) -> bool {
         self.verify_new
     }
+
+    /// returns a list of chunks sorted by their inode number on disk
+    /// chunks that could not be stat'ed are at the end of the list
+    pub fn get_chunks_in_order<F, A>(
+        &self,
+        index: &Box<dyn IndexFile + Send>,
+        skip_chunk: F,
+        check_abort: A,
+    ) -> Result<Vec<(usize, u64)>, Error>
+    where
+        F: Fn(&[u8; 32]) -> bool,
+        A: Fn(usize) -> Result<(), Error>,
+    {
+        let index_count = index.index_count();
+        let mut chunk_list = Vec::with_capacity(index_count);
+        use std::os::unix::fs::MetadataExt;
+        for pos in 0..index_count {
+            check_abort(pos)?;
+
+            let info = index.chunk_info(pos).unwrap();
+
+            if skip_chunk(&info.digest) {
+                continue;
+            }
+
+            let ino = match self.stat_chunk(&info.digest) {
+                Err(_) => u64::MAX, // could not stat, move to end of list
+                Ok(metadata) => metadata.ino(),
+            };
+
+            chunk_list.push((pos, ino));
+        }
+
+        // sorting by inode improves data locality, which makes it lots faster on spinners
+        chunk_list.sort_unstable_by(|(_, ino_a), (_, ino_b)| ino_a.cmp(&ino_b));
+
+        Ok(chunk_list)
+    }
 }
diff --git a/src/backup/verify.rs b/src/backup/verify.rs
index a1b1e6dd..74b0cfe7 100644
--- a/src/backup/verify.rs
+++ b/src/backup/verify.rs
@@ -179,42 +179,18 @@ fn verify_index_chunks(
         }
     };
 
-    let index_count = index.index_count();
-    let mut chunk_list = Vec::with_capacity(index_count);
-
-    use std::os::unix::fs::MetadataExt;
-
-    for pos in 0..index_count {
+    let check_abort = |pos: usize| -> Result<(), Error> {
         if pos & 1023 == 0 {
             verify_worker.worker.check_abort()?;
             crate::tools::fail_on_shutdown()?;
         }
+        Ok(())
+    };
 
-        let info = index.chunk_info(pos).unwrap();
-
-        if skip_chunk(&info.digest) {
-            continue; // already verified or marked corrupt
-        }
-
-        match verify_worker.datastore.stat_chunk(&info.digest) {
-            Err(err) => {
-                verify_worker.corrupt_chunks.lock().unwrap().insert(info.digest);
-                task_log!(verify_worker.worker, "can't verify chunk, stat failed - {}", err);
-                errors.fetch_add(1, Ordering::SeqCst);
-                rename_corrupted_chunk(
-                    verify_worker.datastore.clone(),
-                    &info.digest,
-                    &verify_worker.worker,
-                );
-            }
-            Ok(metadata) => {
-                chunk_list.push((pos, metadata.ino()));
-            }
-        }
-    }
-
-    // sorting by inode improves data locality, which makes it lots faster on spinners
-    chunk_list.sort_unstable_by(|(_, ino_a), (_, ino_b)| ino_a.cmp(&ino_b));
+    let chunk_list =
+        verify_worker
+            .datastore
+            .get_chunks_in_order(&index, skip_chunk, check_abort)?;
 
     for (pos, _) in chunk_list {
         verify_worker.worker.check_abort()?;
-- 
2.20.1