public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Christian Ebner <c.ebner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH v3 proxmox-backup 21/24] fix #3174: client: Add detection mode to backup creation
Date: Fri,  3 Nov 2023 14:37:26 +0100	[thread overview]
Message-ID: <20231103133729.2252730-22-c.ebner@proxmox.com> (raw)
In-Reply-To: <20231103133729.2252730-1-c.ebner@proxmox.com>

Introduces the `change-detection-mode` parameter to change file
encoding behavior.

When set to `metadata`, the catalog for the previous backup run and
the corresponding index file are fetched from the server and used as
reference during pxar archive creation.
This allows the archiver to skip encoding of file payloads for
unchanged regular files and referencing their existing chunks to be
included in the new backups index file instead, creating a pxar
archive with appendix section containing the payloads as concatenation
of chunks.

Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
---
Changes since version 2:
- Fix issue with reference catalog and index download when either the
  backup group contains no snapshots or the snapshot does not contain an
  archive with the given name.

Changes since version 1:
- Replace `incremental` flag with `change-detection-mode` param

 proxmox-backup-client/src/main.rs | 133 ++++++++++++++++++++++++++++--
 1 file changed, 125 insertions(+), 8 deletions(-)

diff --git a/proxmox-backup-client/src/main.rs b/proxmox-backup-client/src/main.rs
index cbdd9f43..e986716b 100644
--- a/proxmox-backup-client/src/main.rs
+++ b/proxmox-backup-client/src/main.rs
@@ -1,5 +1,6 @@
 use std::collections::{HashSet, VecDeque};
 use std::io::{self, Read, Seek, SeekFrom, Write};
+use std::os::unix::fs::OpenOptionsExt;
 use std::path::{Path, PathBuf};
 use std::pin::Pin;
 use std::sync::{Arc, Mutex};
@@ -43,10 +44,10 @@ use pbs_client::tools::{
     CHUNK_SIZE_SCHEMA, REPO_URL_SCHEMA,
 };
 use pbs_client::{
-    delete_ticket_info, parse_backup_specification, view_task_result, BackupReader,
-    BackupRepository, BackupSpecificationType, BackupStats, BackupWriter, ChunkStream,
-    FixedChunkStream, HttpClient, PxarBackupStream, RemoteChunkReader, UploadOptions,
-    BACKUP_SOURCE_SCHEMA,
+    delete_ticket_info, parse_backup_detection_mode_specification, parse_backup_specification,
+    view_task_result, BackupDetectionMode, BackupReader, BackupRepository, BackupSpecificationType,
+    BackupStats, BackupWriter, ChunkStream, FixedChunkStream, HttpClient, PxarBackupStream,
+    RemoteChunkReader, UploadOptions, BACKUP_DETECTION_MODE_SPEC, BACKUP_SOURCE_SCHEMA,
 };
 use pbs_datastore::catalog::{CatalogReader, CatalogWriter};
 use pbs_datastore::chunk_store::verify_chunk_size;
@@ -666,6 +667,10 @@ fn spawn_catalog_upload(
                schema: TRAFFIC_CONTROL_BURST_SCHEMA,
                optional: true,
            },
+           "change-detection-mode": {
+               schema: BACKUP_DETECTION_MODE_SPEC,
+               optional: true,
+           },
            "exclude": {
                type: Array,
                description: "List of paths or patterns for matching files to exclude.",
@@ -849,7 +854,20 @@ async fn create_backup(
 
     let backup_time = backup_time_opt.unwrap_or_else(epoch_i64);
 
-    let client = connect_rate_limited(&repo, rate_limit)?;
+    let cd_mode = param["change-detection-mode"].as_str().unwrap_or("data");
+    let detection_mode = parse_backup_detection_mode_specification(cd_mode)?;
+
+    let client = connect_rate_limited(&repo, rate_limit.clone())?;
+    let backup_group = BackupGroup::new(backup_type, backup_id);
+
+    let previous_snapshot = if let BackupDetectionMode::Metadata(_) = detection_mode {
+        api_datastore_latest_snapshot(&client, &repo.store(), &backup_ns, backup_group)
+            .await
+            .ok()
+    } else {
+        None
+    };
+
     record_repository(&repo);
 
     let snapshot = BackupDir::from((backup_type, backup_id.to_owned(), backup_time));
@@ -959,8 +977,8 @@ async fn create_backup(
         log::info!("{} {} '{}' to '{}' as {}", what, desc, file, repo, target);
     };
 
-    for (backup_type, filename, target, size) in upload_list {
-        match (backup_type, dry_run) {
+    for (backup_spec_type, filename, target, size) in upload_list {
+        match (backup_spec_type, dry_run) {
             // dry-run
             (BackupSpecificationType::CONFIG, true) => log_file("config file", &filename, &target),
             (BackupSpecificationType::LOGFILE, true) => log_file("log file", &filename, &target),
@@ -1006,12 +1024,62 @@ async fn create_backup(
 
                 log_file("directory", &filename, &target);
 
+                let known_chunks = Arc::new(Mutex::new(HashSet::new()));
+                let previous_ref =
+                    if let BackupDetectionMode::Metadata(ref archives) = detection_mode {
+                        if archives.is_empty() || archives.contains(&target) {
+                            match previous_manifest {
+                                None => {
+                                    log::info!("No previous manifest, fallback to regular mode");
+                                    None
+                                }
+                                Some(ref manifest) => {
+                                    let reference_index = client
+                                        .download_previous_dynamic_index(
+                                            &target,
+                                            &manifest,
+                                            known_chunks.clone(),
+                                        )
+                                        .await
+                                        .ok();
+                                    let reference_catalog = download_reference_catalog(
+                                        &repo,
+                                        previous_snapshot.as_ref().unwrap(),
+                                        &backup_ns,
+                                        crypt_config.clone(),
+                                    )
+                                    .await
+                                    .ok();
+
+                                    match  (reference_index, reference_catalog) {
+                                        (Some(reference_index), Some(reference_catalog)) => {
+                                            log::info!(
+                                                "Using previous catalog as metadata reference for '{target}'"
+                                            );
+
+                                            Some(pbs_client::pxar::PxarPrevRef {
+                                                index: reference_index,
+                                                catalog: reference_catalog,
+                                                archive_name: target.clone(),
+                                            })
+                                        }
+                                        _ => None,
+                                    }
+                                }
+                            }
+                        } else {
+                            None
+                        }
+                    } else {
+                        None
+                    };
+
                 let pxar_options = pbs_client::pxar::PxarCreateOptions {
                     device_set: devices.clone(),
                     patterns: pattern_list.clone(),
                     entries_max: entries_max as usize,
                     skip_lost_and_found,
-                    previous_ref: None,
+                    previous_ref,
                     archive_name: Some(std::ffi::CString::new(target.as_str())?),
                 };
 
@@ -1112,6 +1180,55 @@ async fn create_backup(
     Ok(Value::Null)
 }
 
+async fn download_reference_catalog(
+    repo: &BackupRepository,
+    previous_snapshot: &BackupDir,
+    backup_ns: &BackupNamespace,
+    crypt_config: Option<Arc<CryptConfig>>,
+) -> Result<CatalogReader<std::fs::File>, Error> {
+    let http_reader_client = connect(&repo)?;
+    let backup_reader = BackupReader::start(
+        http_reader_client,
+        crypt_config.clone(),
+        repo.store(),
+        &backup_ns,
+        &previous_snapshot,
+        true,
+    )
+    .await?;
+
+    let (manifest, _) = backup_reader.download_manifest().await?;
+    manifest.check_fingerprint(crypt_config.as_ref().map(Arc::as_ref))?;
+
+    let index = backup_reader
+        .download_dynamic_index(&manifest, CATALOG_NAME)
+        .await?;
+    let most_used = index.find_most_used_chunks(8);
+    let file_info = manifest.lookup_file_info(CATALOG_NAME)?;
+
+    let chunk_reader = RemoteChunkReader::new(
+        backup_reader,
+        crypt_config.clone(),
+        file_info.chunk_crypt_mode(),
+        most_used,
+    );
+
+    let mut reader = BufferedDynamicReader::new(index, chunk_reader);
+
+    let mut catalogfile = std::fs::OpenOptions::new()
+        .write(true)
+        .read(true)
+        .custom_flags(libc::O_TMPFILE)
+        .open("/tmp")?;
+
+    std::io::copy(&mut reader, &mut catalogfile)
+        .map_err(|err| format_err!("failed to download reference catalog - {}", err))?;
+
+    catalogfile.seek(SeekFrom::Start(0))?;
+
+    Ok(CatalogReader::new(catalogfile))
+}
+
 async fn dump_image<W: Write>(
     client: Arc<BackupReader>,
     crypt_config: Option<Arc<CryptConfig>>,
-- 
2.39.2





  parent reply	other threads:[~2023-11-03 13:38 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-03 13:37 [pbs-devel] [PATCH-SERIES v3 pxar proxmox-backup proxmox-widget-toolkit 00/24] fix #3174: improve file-level backup Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 pxar 1/24] fix #3174: decoder: factor out skip_bytes from skip_entry Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 pxar 2/24] fix #3174: decoder: impl skip_bytes for sync dec Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 pxar 3/24] fix #3174: encoder: calc filename + metadata byte size Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 pxar 4/24] fix #3174: enc/dec: impl PXAR_APPENDIX_REF entrytype Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 pxar 5/24] fix #3174: enc/dec: impl PXAR_APPENDIX entrytype Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 pxar 6/24] fix #3174: encoder: helper to add to encoder position Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 pxar 7/24] fix #3174: enc/dec: impl PXAR_APPENDIX_TAIL entrytype Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 proxmox-backup 08/24] fix #3174: index: add fn index list from start/end-offsets Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 proxmox-backup 09/24] fix #3174: index: add fn digest for DynamicEntry Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 proxmox-backup 10/24] fix #3174: api: double catalog upload size Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 proxmox-backup 11/24] fix #3174: catalog: introduce extended format v2 Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 proxmox-backup 12/24] fix #3174: archiver/extractor: impl appendix ref Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 proxmox-backup 13/24] fix #3174: catalog: add specialized Archive entry Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 proxmox-backup 14/24] fix #3174: extractor: impl seq restore from appendix Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 proxmox-backup 15/24] fix #3174: archiver: store ref to previous backup Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 proxmox-backup 16/24] fix #3174: upload stream: impl reused chunk injector Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 proxmox-backup 17/24] fix #3174: chunker: add forced boundaries Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 proxmox-backup 18/24] fix #3174: backup writer: inject queued chunk in upload steam Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 proxmox-backup 19/24] fix #3174: archiver: reuse files with unchanged metadata Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 proxmox-backup 20/24] fix #3174: specs: add backup detection mode specification Christian Ebner
2023-11-03 13:37 ` Christian Ebner [this message]
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 proxmox-backup 22/24] test-suite: add detection mode change benchmark Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 proxmox-backup 23/24] test-suite: Add bin to deb, add shell completions Christian Ebner
2023-11-03 13:37 ` [pbs-devel] [PATCH v3 proxmox-widget-toolkit 24/24] file-browser: support pxar archive and fileref types Christian Ebner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231103133729.2252730-22-c.ebner@proxmox.com \
    --to=c.ebner@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal