public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Filip Schauer <f.schauer@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH v2 vma-to-pbs 2/5] add support for bulk import of a dump directory
Date: Tue,  8 Oct 2024 16:34:01 +0200	[thread overview]
Message-ID: <20241008143404.193345-3-f.schauer@proxmox.com> (raw)
In-Reply-To: <20241008143404.193345-1-f.schauer@proxmox.com>

When a path to a directory is provided in the vma_file argument, try to
upload all VMA backups in the directory. This also handles compressed
VMA files, notes and logs. If a vmid is specified with --vmid, only the
backups of that particular vmid are uploaded.

This is intended for use on a dump directory:

PBS_FINGERPRINT='PBS_FINGERPRINT' vma-to-pbs \
        --repository 'user@realm!token@server:port:datastore' \
        /var/lib/vz/dump

Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
---
 Cargo.toml     |   3 ++
 src/main.rs    | 124 ++++++++++++++++++++++++++++++++++++++++++++-----
 src/vma2pbs.rs |  64 ++++++++++++++++++++++---
 3 files changed, 173 insertions(+), 18 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index cd13426..fcbc2a3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,9 +7,12 @@ edition = "2021"
 [dependencies]
 anyhow = "1.0"
 bincode = "1.3"
+chrono = "0.4"
 hyper = "0.14.5"
+itertools = "0.10"
 pico-args = "0.5"
 md5 = "0.7.0"
+regex = "1.7"
 scopeguard = "1.1.0"
 serde = "1.0"
 serde_json = "1.0"
diff --git a/src/main.rs b/src/main.rs
index 3e25591..d001d81 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,26 +1,33 @@
 use std::ffi::OsString;
+use std::fs::read_dir;
+use std::path::PathBuf;
 
 use anyhow::{bail, Context, Error};
+use chrono::NaiveDateTime;
+use itertools::Itertools;
 use proxmox_sys::linux::tty;
 use proxmox_time::epoch_i64;
+use regex::Regex;
 
 mod vma;
 mod vma2pbs;
-use vma2pbs::{vma2pbs, BackupVmaToPbsArgs, PbsArgs, VmaBackupArgs};
+use vma2pbs::{vma2pbs, BackupVmaToPbsArgs, Compression, PbsArgs, VmaBackupArgs};
 
 const CMD_HELP: &str = "\
 Usage: vma-to-pbs [OPTIONS] --repository <auth_id@host:port:datastore> --vmid <VMID> [vma_file]
 
 Arguments:
-  [vma_file]
+  [vma_file | dump_directory]
 
 Options:
       --repository <auth_id@host:port:datastore>
           Repository URL
       [--ns <NAMESPACE>]
           Namespace
-      --vmid <VMID>
+      [--vmid <VMID>]
           Backup ID
+          If not specified, bulk import all VMA backups in the provided directory.
+          If specified with a dump directory, only import backups of the specified vmid.
       [--backup-time <EPOCH>]
           Backup timestamp
       --fingerprint <FINGERPRINT>
@@ -87,7 +94,7 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
 
     let pbs_repository = args.value_from_str("--repository")?;
     let namespace = args.opt_value_from_str("--ns")?;
-    let vmid = args.value_from_str("--vmid")?;
+    let vmid = args.opt_value_from_str("--vmid")?;
     let backup_time: Option<i64> = args.opt_value_from_str("--backup-time")?;
     let backup_time = backup_time.unwrap_or_else(epoch_i64);
     let fingerprint = args.opt_value_from_str("--fingerprint")?;
@@ -196,15 +203,110 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
         encrypt,
     };
 
-    let vma_args = VmaBackupArgs {
-        vma_file_path: vma_file_path.cloned(),
-        backup_id: vmid,
-        backup_time,
-        notes,
-        log_file_path,
+    let mut grouped_vmas = Vec::new();
+
+    let bulk = if let Some(vma_file_path) = vma_file_path {
+        PathBuf::from(vma_file_path).is_dir()
+    } else {
+        false
     };
 
-    let options = BackupVmaToPbsArgs { pbs_args, vma_args };
+    if bulk {
+        let dump_dir_path =
+            PathBuf::from(vma_file_path.expect("no directory specified for bulk import"));
+
+        let re = Regex::new(
+            r"vzdump-qemu-(\d+)-(\d{4}_\d{2}_\d{2}-\d{2}_\d{2}_\d{2}).vma(|.zst|.lzo|.gz)$",
+        )?;
+
+        let mut vmas = Vec::new();
+
+        for entry in read_dir(dump_dir_path)? {
+            let entry = entry?;
+            let path = entry.path();
+
+            if !path.is_file() {
+                continue;
+            }
+
+            if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
+                let Some((_, [backup_id, timestr, ext])) =
+                    re.captures(file_name).map(|c| c.extract())
+                else {
+                    // Skip the file, since it is not a VMA backup
+                    continue;
+                };
+
+                if let Some(ref vmid) = vmid {
+                    if backup_id != vmid {
+                        // Skip the backup, since it does not match the specified vmid
+                        continue;
+                    }
+                }
+
+                let compression = match ext {
+                    "" => None,
+                    ".zst" => Some(Compression::Zstd),
+                    ".lzo" => Some(Compression::Lzo),
+                    ".gz" => Some(Compression::GZip),
+                    _ => continue,
+                };
+
+                let backup_time = NaiveDateTime::parse_from_str(timestr, "%Y_%m_%d-%H_%M_%S")?
+                    .and_utc()
+                    .timestamp();
+
+                let mut notes_path_os_string: OsString = path.clone().into();
+                notes_path_os_string.push(".notes");
+                let notes_path: PathBuf = notes_path_os_string.into();
+                let notes = proxmox_sys::fs::file_read_optional_string(notes_path)?;
+
+                let mut log_path_os_string: OsString = path.clone().into();
+                log_path_os_string.push(".log");
+                let log_path: PathBuf = log_path_os_string.into();
+                let log_file_path = if log_path.exists() {
+                    Some(log_path.to_path_buf().into_os_string())
+                } else {
+                    None
+                };
+
+                let backup_args = VmaBackupArgs {
+                    vma_file_path: Some(path.clone().into()),
+                    compression,
+                    backup_id: backup_id.to_string(),
+                    backup_time,
+                    notes,
+                    log_file_path,
+                };
+                vmas.push(backup_args);
+            }
+        }
+
+        vmas.sort_by_key(|d| d.backup_time);
+        grouped_vmas = vmas
+            .into_iter()
+            .into_group_map_by(|d| d.backup_id.clone())
+            .into_values()
+            .collect();
+        grouped_vmas.sort_by_key(|d| d[0].backup_id.clone());
+    } else if let Some(vmid) = vmid {
+        let vmas = vec![VmaBackupArgs {
+            vma_file_path: vma_file_path.cloned(),
+            compression: None,
+            backup_id: vmid,
+            backup_time,
+            notes,
+            log_file_path,
+        }];
+        grouped_vmas.push(vmas);
+    } else {
+        bail!("No vmid specified for single backup file");
+    }
+
+    let options = BackupVmaToPbsArgs {
+        pbs_args,
+        grouped_vmas,
+    };
 
     Ok(options)
 }
diff --git a/src/vma2pbs.rs b/src/vma2pbs.rs
index 37ea308..05294f3 100644
--- a/src/vma2pbs.rs
+++ b/src/vma2pbs.rs
@@ -4,6 +4,7 @@ use std::collections::HashMap;
 use std::ffi::{c_char, CStr, CString, OsString};
 use std::fs::File;
 use std::io::{stdin, BufRead, BufReader, Read};
+use std::process::{Command, Stdio};
 use std::ptr;
 use std::time::SystemTime;
 
@@ -30,7 +31,7 @@ const VMA_CLUSTER_SIZE: usize = 65536;
 
 pub struct BackupVmaToPbsArgs {
     pub pbs_args: PbsArgs,
-    pub vma_args: VmaBackupArgs,
+    pub grouped_vmas: Vec<Vec<VmaBackupArgs>>,
 }
 
 pub struct PbsArgs {
@@ -45,8 +46,15 @@ pub struct PbsArgs {
     pub encrypt: bool,
 }
 
+pub enum Compression {
+    Zstd,
+    Lzo,
+    GZip,
+}
+
 pub struct VmaBackupArgs {
     pub vma_file_path: Option<OsString>,
+    pub compression: Option<Compression>,
     pub backup_id: String,
     pub backup_time: i64,
     pub notes: Option<String>,
@@ -468,7 +476,19 @@ pub fn vma2pbs(args: BackupVmaToPbsArgs) -> Result<(), Error> {
 
     let start_transfer_time = SystemTime::now();
 
-    upload_vma_file(pbs_args, &args.vma_args)?;
+    for vma_group in args.grouped_vmas {
+        for backup_args in vma_group {
+            if let Err(e) = upload_vma_file(pbs_args, &backup_args) {
+                eprintln!(
+                    "Failed to upload vma file at {:?} - {}",
+                    backup_args.vma_file_path.expect("missing VMA file path"),
+                    e
+                );
+                println!("Skipping VMID {}", backup_args.backup_id);
+                break;
+            }
+        }
+    }
 
     let transfer_duration = SystemTime::now().duration_since(start_transfer_time)?;
     let total_seconds = transfer_duration.as_secs();
@@ -481,13 +501,43 @@ pub fn vma2pbs(args: BackupVmaToPbsArgs) -> Result<(), Error> {
 }
 
 fn upload_vma_file(pbs_args: &PbsArgs, backup_args: &VmaBackupArgs) -> Result<(), Error> {
-    let vma_file: Box<dyn BufRead> = match &backup_args.vma_file_path {
-        Some(vma_file_path) => match File::open(vma_file_path) {
-            Err(why) => return Err(anyhow!("Couldn't open file: {}", why)),
-            Ok(file) => Box::new(BufReader::new(file)),
+    match &backup_args.vma_file_path {
+        Some(vma_file_path) => println!("Uploading VMA backup from {:?}", vma_file_path),
+        None => println!("Uploading VMA backup from (stdin)"),
+    };
+
+    let vma_file: Box<dyn BufRead> = match &backup_args.compression {
+        Some(compression) => {
+            let vma_file_path = backup_args
+                .vma_file_path
+                .as_ref()
+                .expect("No VMA file path provided");
+            let mut cmd = match compression {
+                Compression::Zstd => {
+                    let mut cmd = Command::new("zstd");
+                    cmd.args(["-q", "-d", "-c"]);
+                    cmd
+                }
+                Compression::Lzo => {
+                    let mut cmd = Command::new("lzop");
+                    cmd.args(["-d", "-c"]);
+                    cmd
+                }
+                Compression::GZip => Command::new("zcat"),
+            };
+            let process = cmd.arg(vma_file_path).stdout(Stdio::piped()).spawn()?;
+            let stdout = process.stdout.expect("Failed to capture stdout");
+            Box::new(BufReader::new(stdout))
+        }
+        None => match &backup_args.vma_file_path {
+            Some(vma_file_path) => match File::open(vma_file_path) {
+                Err(why) => return Err(anyhow!("Couldn't open file: {}", why)),
+                Ok(file) => Box::new(BufReader::new(file)),
+            },
+            None => Box::new(BufReader::new(stdin())),
         },
-        None => Box::new(BufReader::new(stdin())),
     };
+
     let vma_reader = VmaReader::new(vma_file)?;
 
     let pbs = create_pbs_backup_task(pbs_args, backup_args)?;
-- 
2.39.5



_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel


  parent reply	other threads:[~2024-10-08 14:33 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-08 14:33 [pbs-devel] [PATCH v2 vma-to-pbs 0/5] " Filip Schauer
2024-10-08 14:34 ` [pbs-devel] [PATCH v2 vma-to-pbs 1/5] split BackupVmaToPbsArgs into PbsArgs and VmaBackupArgs Filip Schauer
2024-10-08 14:34 ` Filip Schauer [this message]
2024-10-08 14:34 ` [pbs-devel] [PATCH v2 vma-to-pbs 3/5] remove hard coded values Filip Schauer
2024-10-08 14:34 ` [pbs-devel] [PATCH v2 vma-to-pbs 4/5] use level-based logging instead of println Filip Schauer
2024-10-08 14:34 ` [pbs-devel] [PATCH v2 vma-to-pbs 5/5] log device upload progress as a percentage Filip Schauer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241008143404.193345-3-f.schauer@proxmox.com \
    --to=f.schauer@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal