From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <c.ebner@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id 9E5229EEA1
 for <pbs-devel@lists.proxmox.com>; Fri,  3 Nov 2023 14:38:06 +0100 (CET)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id B2A5A1E3A0
 for <pbs-devel@lists.proxmox.com>; Fri,  3 Nov 2023 14:38:05 +0100 (CET)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS
 for <pbs-devel@lists.proxmox.com>; Fri,  3 Nov 2023 14:38:01 +0100 (CET)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 7224044311
 for <pbs-devel@lists.proxmox.com>; Fri,  3 Nov 2023 14:38:01 +0100 (CET)
From: Christian Ebner <c.ebner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Date: Fri,  3 Nov 2023 14:37:26 +0100
Message-Id: <20231103133729.2252730-22-c.ebner@proxmox.com>
X-Mailer: git-send-email 2.39.2
In-Reply-To: <20231103133729.2252730-1-c.ebner@proxmox.com>
References: <20231103133729.2252730-1-c.ebner@proxmox.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.071 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 T_SCC_BODY_TEXT_LINE    -0.01 -
Subject: [pbs-devel] [PATCH v3 proxmox-backup 21/24] fix #3174: client: Add
 detection mode to backup creation
X-BeenThere: pbs-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Fri, 03 Nov 2023 13:38:06 -0000

Introduces the `change-detection-mode` parameter to change file
encoding behavior.

When set to `metadata`, the catalog for the previous backup run and
the corresponding index file are fetched from the server and used as
reference during pxar archive creation.
This allows the archiver to skip encoding of file payloads for
unchanged regular files and referencing their existing chunks to be
included in the new backups index file instead, creating a pxar
archive with appendix section containing the payloads as concatenation
of chunks.

Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
---
Changes since version 2:
- Fix issue with reference catalog and index download when either the
  backup group contains no snapshots or the snapshot does not contain an
  archive with the given name.

Changes since version 1:
- Replace `incremental` flag with `change-detection-mode` param

 proxmox-backup-client/src/main.rs | 133 ++++++++++++++++++++++++++++--
 1 file changed, 125 insertions(+), 8 deletions(-)

diff --git a/proxmox-backup-client/src/main.rs b/proxmox-backup-client/src/main.rs
index cbdd9f43..e986716b 100644
--- a/proxmox-backup-client/src/main.rs
+++ b/proxmox-backup-client/src/main.rs
@@ -1,5 +1,6 @@
 use std::collections::{HashSet, VecDeque};
 use std::io::{self, Read, Seek, SeekFrom, Write};
+use std::os::unix::fs::OpenOptionsExt;
 use std::path::{Path, PathBuf};
 use std::pin::Pin;
 use std::sync::{Arc, Mutex};
@@ -43,10 +44,10 @@ use pbs_client::tools::{
     CHUNK_SIZE_SCHEMA, REPO_URL_SCHEMA,
 };
 use pbs_client::{
-    delete_ticket_info, parse_backup_specification, view_task_result, BackupReader,
-    BackupRepository, BackupSpecificationType, BackupStats, BackupWriter, ChunkStream,
-    FixedChunkStream, HttpClient, PxarBackupStream, RemoteChunkReader, UploadOptions,
-    BACKUP_SOURCE_SCHEMA,
+    delete_ticket_info, parse_backup_detection_mode_specification, parse_backup_specification,
+    view_task_result, BackupDetectionMode, BackupReader, BackupRepository, BackupSpecificationType,
+    BackupStats, BackupWriter, ChunkStream, FixedChunkStream, HttpClient, PxarBackupStream,
+    RemoteChunkReader, UploadOptions, BACKUP_DETECTION_MODE_SPEC, BACKUP_SOURCE_SCHEMA,
 };
 use pbs_datastore::catalog::{CatalogReader, CatalogWriter};
 use pbs_datastore::chunk_store::verify_chunk_size;
@@ -666,6 +667,10 @@ fn spawn_catalog_upload(
                schema: TRAFFIC_CONTROL_BURST_SCHEMA,
                optional: true,
            },
+           "change-detection-mode": {
+               schema: BACKUP_DETECTION_MODE_SPEC,
+               optional: true,
+           },
            "exclude": {
                type: Array,
                description: "List of paths or patterns for matching files to exclude.",
@@ -849,7 +854,20 @@ async fn create_backup(
 
     let backup_time = backup_time_opt.unwrap_or_else(epoch_i64);
 
-    let client = connect_rate_limited(&repo, rate_limit)?;
+    let cd_mode = param["change-detection-mode"].as_str().unwrap_or("data");
+    let detection_mode = parse_backup_detection_mode_specification(cd_mode)?;
+
+    let client = connect_rate_limited(&repo, rate_limit.clone())?;
+    let backup_group = BackupGroup::new(backup_type, backup_id);
+
+    let previous_snapshot = if let BackupDetectionMode::Metadata(_) = detection_mode {
+        api_datastore_latest_snapshot(&client, &repo.store(), &backup_ns, backup_group)
+            .await
+            .ok()
+    } else {
+        None
+    };
+
     record_repository(&repo);
 
     let snapshot = BackupDir::from((backup_type, backup_id.to_owned(), backup_time));
@@ -959,8 +977,8 @@ async fn create_backup(
         log::info!("{} {} '{}' to '{}' as {}", what, desc, file, repo, target);
     };
 
-    for (backup_type, filename, target, size) in upload_list {
-        match (backup_type, dry_run) {
+    for (backup_spec_type, filename, target, size) in upload_list {
+        match (backup_spec_type, dry_run) {
             // dry-run
             (BackupSpecificationType::CONFIG, true) => log_file("config file", &filename, &target),
             (BackupSpecificationType::LOGFILE, true) => log_file("log file", &filename, &target),
@@ -1006,12 +1024,62 @@ async fn create_backup(
 
                 log_file("directory", &filename, &target);
 
+                let known_chunks = Arc::new(Mutex::new(HashSet::new()));
+                let previous_ref =
+                    if let BackupDetectionMode::Metadata(ref archives) = detection_mode {
+                        if archives.is_empty() || archives.contains(&target) {
+                            match previous_manifest {
+                                None => {
+                                    log::info!("No previous manifest, fallback to regular mode");
+                                    None
+                                }
+                                Some(ref manifest) => {
+                                    let reference_index = client
+                                        .download_previous_dynamic_index(
+                                            &target,
+                                            &manifest,
+                                            known_chunks.clone(),
+                                        )
+                                        .await
+                                        .ok();
+                                    let reference_catalog = download_reference_catalog(
+                                        &repo,
+                                        previous_snapshot.as_ref().unwrap(),
+                                        &backup_ns,
+                                        crypt_config.clone(),
+                                    )
+                                    .await
+                                    .ok();
+
+                                    match  (reference_index, reference_catalog) {
+                                        (Some(reference_index), Some(reference_catalog)) => {
+                                            log::info!(
+                                                "Using previous catalog as metadata reference for '{target}'"
+                                            );
+
+                                            Some(pbs_client::pxar::PxarPrevRef {
+                                                index: reference_index,
+                                                catalog: reference_catalog,
+                                                archive_name: target.clone(),
+                                            })
+                                        }
+                                        _ => None,
+                                    }
+                                }
+                            }
+                        } else {
+                            None
+                        }
+                    } else {
+                        None
+                    };
+
                 let pxar_options = pbs_client::pxar::PxarCreateOptions {
                     device_set: devices.clone(),
                     patterns: pattern_list.clone(),
                     entries_max: entries_max as usize,
                     skip_lost_and_found,
-                    previous_ref: None,
+                    previous_ref,
                     archive_name: Some(std::ffi::CString::new(target.as_str())?),
                 };
 
@@ -1112,6 +1180,55 @@ async fn create_backup(
     Ok(Value::Null)
 }
 
+async fn download_reference_catalog(
+    repo: &BackupRepository,
+    previous_snapshot: &BackupDir,
+    backup_ns: &BackupNamespace,
+    crypt_config: Option<Arc<CryptConfig>>,
+) -> Result<CatalogReader<std::fs::File>, Error> {
+    let http_reader_client = connect(&repo)?;
+    let backup_reader = BackupReader::start(
+        http_reader_client,
+        crypt_config.clone(),
+        repo.store(),
+        &backup_ns,
+        &previous_snapshot,
+        true,
+    )
+    .await?;
+
+    let (manifest, _) = backup_reader.download_manifest().await?;
+    manifest.check_fingerprint(crypt_config.as_ref().map(Arc::as_ref))?;
+
+    let index = backup_reader
+        .download_dynamic_index(&manifest, CATALOG_NAME)
+        .await?;
+    let most_used = index.find_most_used_chunks(8);
+    let file_info = manifest.lookup_file_info(CATALOG_NAME)?;
+
+    let chunk_reader = RemoteChunkReader::new(
+        backup_reader,
+        crypt_config.clone(),
+        file_info.chunk_crypt_mode(),
+        most_used,
+    );
+
+    let mut reader = BufferedDynamicReader::new(index, chunk_reader);
+
+    let mut catalogfile = std::fs::OpenOptions::new()
+        .write(true)
+        .read(true)
+        .custom_flags(libc::O_TMPFILE)
+        .open("/tmp")?;
+
+    std::io::copy(&mut reader, &mut catalogfile)
+        .map_err(|err| format_err!("failed to download reference catalog - {}", err))?;
+
+    catalogfile.seek(SeekFrom::Start(0))?;
+
+    Ok(CatalogReader::new(catalogfile))
+}
+
 async fn dump_image<W: Write>(
     client: Arc<BackupReader>,
     crypt_config: Option<Arc<CryptConfig>>,
-- 
2.39.2