public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Filip Schauer <f.schauer@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH vma-to-pbs v4 2/6] add support for bulk import of a dump directory
Date: Wed, 30 Oct 2024 14:55:33 +0100	[thread overview]
Message-ID: <20241030135537.92595-3-f.schauer@proxmox.com> (raw)
In-Reply-To: <20241030135537.92595-1-f.schauer@proxmox.com>

When a path to a directory is provided in the vma_file argument, try to
upload all VMA backups in the directory. This also handles compressed
VMA files, notes and logs. If a vmid is specified with --vmid, only the
backups of that particular vmid are uploaded.

This is intended for use on a dump directory:

PBS_FINGERPRINT='PBS_FINGERPRINT' vma-to-pbs \
        --repository 'user@realm!token@server:port:datastore' \
        /var/lib/vz/dump

Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
---
 Cargo.toml     |   3 +
 src/main.rs    | 161 +++++++++++++++++++++++++++++++++++++++++++++----
 src/vma2pbs.rs |  64 +++++++++++++++++---
 3 files changed, 209 insertions(+), 19 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index cd13426..5c6a175 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,9 +7,12 @@ edition = "2021"
 [dependencies]
 anyhow = "1.0"
 bincode = "1.3"
+chrono = "0.4"
 hyper = "0.14.5"
+itertools = "0.13"
 pico-args = "0.5"
 md5 = "0.7.0"
+regex = "1.7"
 scopeguard = "1.1.0"
 serde = "1.0"
 serde_json = "1.0"
diff --git a/src/main.rs b/src/main.rs
index 3e25591..4c5135b 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,26 +1,35 @@
 use std::ffi::OsString;
+use std::fs::read_dir;
+use std::io::{BufRead, BufReader};
+use std::path::PathBuf;
 
 use anyhow::{bail, Context, Error};
+use chrono::NaiveDateTime;
+use itertools::Itertools;
 use proxmox_sys::linux::tty;
 use proxmox_time::epoch_i64;
+use regex::Regex;
 
 mod vma;
 mod vma2pbs;
-use vma2pbs::{vma2pbs, BackupVmaToPbsArgs, PbsArgs, VmaBackupArgs};
+use vma2pbs::{vma2pbs, BackupVmaToPbsArgs, Compression, PbsArgs, VmaBackupArgs};
 
 const CMD_HELP: &str = "\
 Usage: vma-to-pbs [OPTIONS] --repository <auth_id@host:port:datastore> --vmid <VMID> [vma_file]
 
 Arguments:
-  [vma_file]
+  [vma_file | dump_directory]
 
 Options:
       --repository <auth_id@host:port:datastore>
           Repository URL
       [--ns <NAMESPACE>]
           Namespace
-      --vmid <VMID>
+      [--vmid <VMID>]
           Backup ID
+          This is required if a single VMA file is provided.
+          If not specified, bulk import all VMA backups in the provided directory.
+          If specified with a dump directory, only import backups of the specified vmid.
       [--backup-time <EPOCH>]
           Backup timestamp
       --fingerprint <FINGERPRINT>
@@ -41,6 +50,8 @@ Options:
           File containing a comment/notes
       [--log-file <LOG_FILE>]
           Log file
+  -y, --yes
+          Automatic yes to prompts
   -h, --help
           Print help
   -V, --version
@@ -52,7 +63,16 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
     args.remove(0); // remove the executable path.
 
     let mut first_later_args_index = 0;
-    let options = ["-h", "--help", "-c", "--compress", "-e", "--encrypt"];
+    let options = [
+        "-h",
+        "--help",
+        "-c",
+        "--compress",
+        "-e",
+        "--encrypt",
+        "-y",
+        "--yes",
+    ];
 
     for (i, arg) in args.iter().enumerate() {
         if let Some(arg) = arg.to_str() {
@@ -87,7 +107,7 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
 
     let pbs_repository = args.value_from_str("--repository")?;
     let namespace = args.opt_value_from_str("--ns")?;
-    let vmid = args.value_from_str("--vmid")?;
+    let vmid = args.opt_value_from_str("--vmid")?;
     let backup_time: Option<i64> = args.opt_value_from_str("--backup-time")?;
     let backup_time = backup_time.unwrap_or_else(epoch_i64);
     let fingerprint = args.opt_value_from_str("--fingerprint")?;
@@ -99,6 +119,7 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
     let key_password_file: Option<OsString> = args.opt_value_from_str("--key-password-file")?;
     let notes_file: Option<OsString> = args.opt_value_from_str("--notes-file")?;
     let log_file_path: Option<OsString> = args.opt_value_from_str("--log-file")?;
+    let yes = args.contains(["-y", "--yes"]);
 
     match (encrypt, keyfile.is_some()) {
         (true, false) => bail!("--encrypt requires a --keyfile!"),
@@ -196,15 +217,131 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
         encrypt,
     };
 
-    let vma_args = VmaBackupArgs {
-        vma_file_path: vma_file_path.cloned(),
-        backup_id: vmid,
-        backup_time,
-        notes,
-        log_file_path,
+    let bulk =
+        vma_file_path
+            .map(PathBuf::from)
+            .and_then(|path| if path.is_dir() { Some(path) } else { None });
+
+    let grouped_vmas = if let Some(dump_dir_path) = bulk {
+        let re = Regex::new(
+            r"vzdump-qemu-(\d+)-(\d{4}_\d{2}_\d{2}-\d{2}_\d{2}_\d{2}).vma(|.zst|.lzo|.gz)$",
+        )?;
+
+        let mut vmas = Vec::new();
+
+        for entry in read_dir(dump_dir_path)? {
+            let entry = entry?;
+            let path = entry.path();
+
+            if !path.is_file() {
+                continue;
+            }
+
+            if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
+                let Some((_, [backup_id, timestr, ext])) =
+                    re.captures(file_name).map(|c| c.extract())
+                else {
+                    // Skip the file, since it is not a VMA backup
+                    continue;
+                };
+
+                if let Some(ref vmid) = vmid {
+                    if backup_id != vmid {
+                        // Skip the backup, since it does not match the specified vmid
+                        continue;
+                    }
+                }
+
+                let compression = match ext {
+                    "" => None,
+                    ".zst" => Some(Compression::Zstd),
+                    ".lzo" => Some(Compression::Lzo),
+                    ".gz" => Some(Compression::GZip),
+                    _ => bail!("Unexpected file extension: {ext}"),
+                };
+
+                let backup_time = NaiveDateTime::parse_from_str(timestr, "%Y_%m_%d-%H_%M_%S")?
+                    .and_utc()
+                    .timestamp();
+
+                let notes_path = path.with_file_name(format!("{}.notes", file_name));
+                let notes = proxmox_sys::fs::file_read_optional_string(notes_path)?;
+
+                let log_path = path.with_file_name(format!("{}.log", file_name));
+                let log_file_path = if log_path.exists() {
+                    Some(log_path.to_path_buf().into_os_string())
+                } else {
+                    None
+                };
+
+                let backup_args = VmaBackupArgs {
+                    vma_file_path: Some(path.clone().into()),
+                    compression,
+                    backup_id: backup_id.to_string(),
+                    backup_time,
+                    notes,
+                    log_file_path,
+                };
+                vmas.push(backup_args);
+            }
+        }
+
+        vmas.sort_by_key(|d| d.backup_time);
+        let total_vma_count = vmas.len();
+        let mut grouped_vmas: Vec<_> = vmas
+            .into_iter()
+            .into_group_map_by(|d| d.backup_id.clone())
+            .into_values()
+            .collect();
+        grouped_vmas.sort_by_key(|d| d[0].backup_id.clone());
+
+        println!(
+            "Found {} backup archive(s) of {} different VMID(s):",
+            total_vma_count,
+            grouped_vmas.len()
+        );
+
+        for vma_group in &grouped_vmas {
+            println!(
+                "- VMID {}: {} backups",
+                vma_group[0].backup_id,
+                vma_group.len()
+            );
+        }
+
+        if !yes {
+            loop {
+                eprint!("Proceed with the bulk import? (y/n): ");
+                let mut line = String::new();
+
+                BufReader::new(std::io::stdin()).read_line(&mut line)?;
+                let trimmed = line.trim();
+                if trimmed == "y" || trimmed == "Y" {
+                    break;
+                } else if trimmed == "n" || trimmed == "N" {
+                    bail!("Bulk import was not confirmed.");
+                }
+            }
+        }
+
+        grouped_vmas
+    } else if let Some(vmid) = vmid {
+        vec![vec![VmaBackupArgs {
+            vma_file_path: vma_file_path.cloned(),
+            compression: None,
+            backup_id: vmid,
+            backup_time,
+            notes,
+            log_file_path,
+        }]]
+    } else {
+        bail!("No vmid specified for single backup file");
     };
 
-    let options = BackupVmaToPbsArgs { pbs_args, vma_args };
+    let options = BackupVmaToPbsArgs {
+        pbs_args,
+        grouped_vmas,
+    };
 
     Ok(options)
 }
diff --git a/src/vma2pbs.rs b/src/vma2pbs.rs
index 37ea308..a3f1eee 100644
--- a/src/vma2pbs.rs
+++ b/src/vma2pbs.rs
@@ -4,6 +4,7 @@ use std::collections::HashMap;
 use std::ffi::{c_char, CStr, CString, OsString};
 use std::fs::File;
 use std::io::{stdin, BufRead, BufReader, Read};
+use std::process::{Command, Stdio};
 use std::ptr;
 use std::time::SystemTime;
 
@@ -30,7 +31,7 @@ const VMA_CLUSTER_SIZE: usize = 65536;
 
 pub struct BackupVmaToPbsArgs {
     pub pbs_args: PbsArgs,
-    pub vma_args: VmaBackupArgs,
+    pub grouped_vmas: Vec<Vec<VmaBackupArgs>>,
 }
 
 pub struct PbsArgs {
@@ -45,8 +46,15 @@ pub struct PbsArgs {
     pub encrypt: bool,
 }
 
+pub enum Compression {
+    Zstd,
+    Lzo,
+    GZip,
+}
+
 pub struct VmaBackupArgs {
     pub vma_file_path: Option<OsString>,
+    pub compression: Option<Compression>,
     pub backup_id: String,
     pub backup_time: i64,
     pub notes: Option<String>,
@@ -468,7 +476,19 @@ pub fn vma2pbs(args: BackupVmaToPbsArgs) -> Result<(), Error> {
 
     let start_transfer_time = SystemTime::now();
 
-    upload_vma_file(pbs_args, &args.vma_args)?;
+    for vma_group in args.grouped_vmas {
+        for backup_args in vma_group {
+            if let Err(e) = upload_vma_file(pbs_args, &backup_args) {
+                eprintln!(
+                    "Failed to upload vma file at {:?} - {}",
+                    backup_args.vma_file_path.unwrap_or("(stdin)".into()),
+                    e
+                );
+                println!("Skipping VMID {}", backup_args.backup_id);
+                break;
+            }
+        }
+    }
 
     let transfer_duration = SystemTime::now().duration_since(start_transfer_time)?;
     let total_seconds = transfer_duration.as_secs();
@@ -481,13 +501,43 @@ pub fn vma2pbs(args: BackupVmaToPbsArgs) -> Result<(), Error> {
 }
 
 fn upload_vma_file(pbs_args: &PbsArgs, backup_args: &VmaBackupArgs) -> Result<(), Error> {
-    let vma_file: Box<dyn BufRead> = match &backup_args.vma_file_path {
-        Some(vma_file_path) => match File::open(vma_file_path) {
-            Err(why) => return Err(anyhow!("Couldn't open file: {}", why)),
-            Ok(file) => Box::new(BufReader::new(file)),
+    match &backup_args.vma_file_path {
+        Some(vma_file_path) => println!("Uploading VMA backup from {:?}", vma_file_path),
+        None => println!("Uploading VMA backup from (stdin)"),
+    };
+
+    let vma_file: Box<dyn BufRead> = match &backup_args.compression {
+        Some(compression) => {
+            let vma_file_path = backup_args
+                .vma_file_path
+                .as_ref()
+                .expect("No VMA file path provided");
+            let mut cmd = match compression {
+                Compression::Zstd => {
+                    let mut cmd = Command::new("zstd");
+                    cmd.args(["-q", "-d", "-c"]);
+                    cmd
+                }
+                Compression::Lzo => {
+                    let mut cmd = Command::new("lzop");
+                    cmd.args(["-d", "-c"]);
+                    cmd
+                }
+                Compression::GZip => Command::new("zcat"),
+            };
+            let process = cmd.arg(vma_file_path).stdout(Stdio::piped()).spawn()?;
+            let stdout = process.stdout.expect("Failed to capture stdout");
+            Box::new(BufReader::new(stdout))
+        }
+        None => match &backup_args.vma_file_path {
+            Some(vma_file_path) => match File::open(vma_file_path) {
+                Err(why) => return Err(anyhow!("Couldn't open file: {}", why)),
+                Ok(file) => Box::new(BufReader::new(file)),
+            },
+            None => Box::new(BufReader::new(stdin())),
         },
-        None => Box::new(BufReader::new(stdin())),
     };
+
     let vma_reader = VmaReader::new(vma_file)?;
 
     let pbs = create_pbs_backup_task(pbs_args, backup_args)?;
-- 
2.39.5



_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel


  parent reply	other threads:[~2024-10-30 13:55 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-30 13:55 [pbs-devel] [PATCH vma-to-pbs v4 0/6] " Filip Schauer
2024-10-30 13:55 ` [pbs-devel] [PATCH vma-to-pbs v4 1/6] split BackupVmaToPbsArgs into PbsArgs and VmaBackupArgs Filip Schauer
2024-10-30 13:55 ` Filip Schauer [this message]
2024-11-04 13:06   ` [pbs-devel] [PATCH vma-to-pbs v4 2/6] add support for bulk import of a dump directory Fabian Grünbichler
2024-10-30 13:55 ` [pbs-devel] [PATCH vma-to-pbs v4 3/6] add option to skip vmids whose backups failed to upload Filip Schauer
2024-10-30 13:55 ` [pbs-devel] [PATCH vma-to-pbs v4 4/6] remove hard coded values Filip Schauer
2024-10-30 13:55 ` [pbs-devel] [PATCH vma-to-pbs v4 5/6] use level-based logging instead of println Filip Schauer
2024-10-30 13:55 ` [pbs-devel] [PATCH vma-to-pbs v4 6/6] log device upload progress as a percentage Filip Schauer
2024-11-04 13:09 ` [pbs-devel] partially-applied: [PATCH vma-to-pbs v4 0/6] add support for bulk import of a dump directory Fabian Grünbichler

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241030135537.92595-3-f.schauer@proxmox.com \
    --to=f.schauer@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal