From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <pbs-devel-bounces@lists.proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
	by lore.proxmox.com (Postfix) with ESMTPS id 2EBE31FF389
	for <inbox@lore.proxmox.com>; Tue,  7 May 2024 17:54:19 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
	by firstgate.proxmox.com (Proxmox) with ESMTP id C86251276B;
	Tue,  7 May 2024 17:54:13 +0200 (CEST)
From: Christian Ebner <c.ebner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Date: Tue,  7 May 2024 17:52:20 +0200
Message-Id: <20240507155244.793819-39-c.ebner@proxmox.com>
X-Mailer: git-send-email 2.39.2
In-Reply-To: <20240507155244.793819-1-c.ebner@proxmox.com>
References: <20240507155244.793819-1-c.ebner@proxmox.com>
MIME-Version: 1.0
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.027 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
Subject: [pbs-devel] [PATCH v5 proxmox-backup 38/62] client: pxar: helper
 for lookup of reusable dynamic entries
X-BeenThere: pbs-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe>
Reply-To: Proxmox Backup Server development discussion
 <pbs-devel@lists.proxmox.com>
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: pbs-devel-bounces@lists.proxmox.com
Sender: "pbs-devel" <pbs-devel-bounces@lists.proxmox.com>

The helper method allows to lookup the entries of a dynamic index
which fully cover a given offset range. Further, the helper returns
the start padding from the start offset of the dynamic index entry
to the start offset of the given range and the end padding.

This will be used to lookup size and digest for chunks covering the
payload range of a regular file in order to re-use found chunks by
indexing them in the archives index file instead of re-encoding the
payload.

Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
---
changes since version 4:
- no changes

 pbs-client/src/pxar/create.rs | 70 +++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs
index 2bb5a6253..0f32efcce 100644
--- a/pbs-client/src/pxar/create.rs
+++ b/pbs-client/src/pxar/create.rs
@@ -2,6 +2,7 @@ use std::collections::{HashMap, HashSet};
 use std::ffi::{CStr, CString, OsStr};
 use std::fmt;
 use std::io::{self, Read};
+use std::ops::Range;
 use std::os::unix::ffi::OsStrExt;
 use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd};
 use std::path::{Path, PathBuf};
@@ -25,6 +26,8 @@ use proxmox_lang::c_str;
 use proxmox_sys::fs::{self, acl, xattr};
 
 use pbs_datastore::catalog::BackupCatalogWriter;
+use pbs_datastore::dynamic_index::DynamicIndexReader;
+use pbs_datastore::index::IndexFile;
 
 use crate::pxar::metadata::errno_is_unsupported;
 use crate::pxar::tools::assert_single_path_component;
@@ -791,6 +794,73 @@ impl Archiver {
     }
 }
 
+/// Dynamic entry reusable by payload references
+#[derive(Clone, Debug)]
+#[repr(C)]
+pub struct ReusableDynamicEntry {
+    size: u64,
+    padding: u64,
+    digest: [u8; 32],
+}
+
+impl ReusableDynamicEntry {
+    #[inline]
+    pub fn size(&self) -> u64 {
+        self.size
+    }
+
+    #[inline]
+    pub fn digest(&self) -> [u8; 32] {
+        self.digest
+    }
+}
+
+/// List of dynamic entries containing the data given by an offset range
+fn lookup_dynamic_entries(
+    index: &DynamicIndexReader,
+    range: Range<u64>,
+) -> Result<(Vec<ReusableDynamicEntry>, u64, u64), Error> {
+    let end_idx = index.index_count() - 1;
+    let chunk_end = index.chunk_end(end_idx);
+    let start = index.binary_search(0, 0, end_idx, chunk_end, range.start)?;
+
+    let mut prev_end = if start == 0 {
+        0
+    } else {
+        index.chunk_end(start - 1)
+    };
+    let padding_start = range.start - prev_end;
+    let mut padding_end = 0;
+
+    let mut indices = Vec::new();
+    for dynamic_entry in &index.index()[start..] {
+        let end = dynamic_entry.end();
+
+        let reusable_dynamic_entry = ReusableDynamicEntry {
+            size: (end - prev_end),
+            padding: 0,
+            digest: dynamic_entry.digest(),
+        };
+        indices.push(reusable_dynamic_entry);
+
+        if range.end < end {
+            padding_end = end - range.end;
+            break;
+        }
+        prev_end = end;
+    }
+
+    if let Some(first) = indices.first_mut() {
+        first.padding += padding_start;
+    }
+
+    if let Some(last) = indices.last_mut() {
+        last.padding += padding_end;
+    }
+
+    Ok((indices, padding_start, padding_end))
+}
+
 fn get_metadata(
     fd: RawFd,
     stat: &FileStat,
-- 
2.39.2



_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel