all lists on lists.proxmox.com
 help / color / mirror / Atom feed
From: Filip Schauer <f.schauer@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH vma-to-pbs v4 2/6] add support for bulk import of a dump directory
Date: Wed, 30 Oct 2024 14:55:33 +0100	[thread overview]
Message-ID: <20241030135537.92595-3-f.schauer@proxmox.com> (raw)
In-Reply-To: <20241030135537.92595-1-f.schauer@proxmox.com>

When a path to a directory is provided in the vma_file argument, try to
upload all VMA backups in the directory. This also handles compressed
VMA files, notes and logs. If a vmid is specified with --vmid, only the
backups of that particular vmid are uploaded.

This is intended for use on a dump directory:

PBS_FINGERPRINT='PBS_FINGERPRINT' vma-to-pbs \
        --repository 'user@realm!token@server:port:datastore' \
        /var/lib/vz/dump

Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
---
 Cargo.toml     |   3 +
 src/main.rs    | 161 +++++++++++++++++++++++++++++++++++++++++++++----
 src/vma2pbs.rs |  64 +++++++++++++++++---
 3 files changed, 209 insertions(+), 19 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index cd13426..5c6a175 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,9 +7,12 @@ edition = "2021"
 [dependencies]
 anyhow = "1.0"
 bincode = "1.3"
+chrono = "0.4"
 hyper = "0.14.5"
+itertools = "0.13"
 pico-args = "0.5"
 md5 = "0.7.0"
+regex = "1.7"
 scopeguard = "1.1.0"
 serde = "1.0"
 serde_json = "1.0"
diff --git a/src/main.rs b/src/main.rs
index 3e25591..4c5135b 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,26 +1,35 @@
 use std::ffi::OsString;
+use std::fs::read_dir;
+use std::io::{BufRead, BufReader};
+use std::path::PathBuf;
 
 use anyhow::{bail, Context, Error};
+use chrono::NaiveDateTime;
+use itertools::Itertools;
 use proxmox_sys::linux::tty;
 use proxmox_time::epoch_i64;
+use regex::Regex;
 
 mod vma;
 mod vma2pbs;
-use vma2pbs::{vma2pbs, BackupVmaToPbsArgs, PbsArgs, VmaBackupArgs};
+use vma2pbs::{vma2pbs, BackupVmaToPbsArgs, Compression, PbsArgs, VmaBackupArgs};
 
 const CMD_HELP: &str = "\
 Usage: vma-to-pbs [OPTIONS] --repository <auth_id@host:port:datastore> --vmid <VMID> [vma_file]
 
 Arguments:
-  [vma_file]
+  [vma_file | dump_directory]
 
 Options:
       --repository <auth_id@host:port:datastore>
           Repository URL
       [--ns <NAMESPACE>]
           Namespace
-      --vmid <VMID>
+      [--vmid <VMID>]
           Backup ID
+          This is required if a single VMA file is provided.
+          If not specified, bulk import all VMA backups in the provided directory.
+          If specified with a dump directory, only import backups of the specified vmid.
       [--backup-time <EPOCH>]
           Backup timestamp
       --fingerprint <FINGERPRINT>
@@ -41,6 +50,8 @@ Options:
           File containing a comment/notes
       [--log-file <LOG_FILE>]
           Log file
+  -y, --yes
+          Automatic yes to prompts
   -h, --help
           Print help
   -V, --version
@@ -52,7 +63,16 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
     args.remove(0); // remove the executable path.
 
     let mut first_later_args_index = 0;
-    let options = ["-h", "--help", "-c", "--compress", "-e", "--encrypt"];
+    let options = [
+        "-h",
+        "--help",
+        "-c",
+        "--compress",
+        "-e",
+        "--encrypt",
+        "-y",
+        "--yes",
+    ];
 
     for (i, arg) in args.iter().enumerate() {
         if let Some(arg) = arg.to_str() {
@@ -87,7 +107,7 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
 
     let pbs_repository = args.value_from_str("--repository")?;
     let namespace = args.opt_value_from_str("--ns")?;
-    let vmid = args.value_from_str("--vmid")?;
+    let vmid = args.opt_value_from_str("--vmid")?;
     let backup_time: Option<i64> = args.opt_value_from_str("--backup-time")?;
     let backup_time = backup_time.unwrap_or_else(epoch_i64);
     let fingerprint = args.opt_value_from_str("--fingerprint")?;
@@ -99,6 +119,7 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
     let key_password_file: Option<OsString> = args.opt_value_from_str("--key-password-file")?;
     let notes_file: Option<OsString> = args.opt_value_from_str("--notes-file")?;
     let log_file_path: Option<OsString> = args.opt_value_from_str("--log-file")?;
+    let yes = args.contains(["-y", "--yes"]);
 
     match (encrypt, keyfile.is_some()) {
         (true, false) => bail!("--encrypt requires a --keyfile!"),
@@ -196,15 +217,131 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
         encrypt,
     };
 
-    let vma_args = VmaBackupArgs {
-        vma_file_path: vma_file_path.cloned(),
-        backup_id: vmid,
-        backup_time,
-        notes,
-        log_file_path,
+    let bulk =
+        vma_file_path
+            .map(PathBuf::from)
+            .and_then(|path| if path.is_dir() { Some(path) } else { None });
+
+    let grouped_vmas = if let Some(dump_dir_path) = bulk {
+        let re = Regex::new(
+            r"vzdump-qemu-(\d+)-(\d{4}_\d{2}_\d{2}-\d{2}_\d{2}_\d{2}).vma(|.zst|.lzo|.gz)$",
+        )?;
+
+        let mut vmas = Vec::new();
+
+        for entry in read_dir(dump_dir_path)? {
+            let entry = entry?;
+            let path = entry.path();
+
+            if !path.is_file() {
+                continue;
+            }
+
+            if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
+                let Some((_, [backup_id, timestr, ext])) =
+                    re.captures(file_name).map(|c| c.extract())
+                else {
+                    // Skip the file, since it is not a VMA backup
+                    continue;
+                };
+
+                if let Some(ref vmid) = vmid {
+                    if backup_id != vmid {
+                        // Skip the backup, since it does not match the specified vmid
+                        continue;
+                    }
+                }
+
+                let compression = match ext {
+                    "" => None,
+                    ".zst" => Some(Compression::Zstd),
+                    ".lzo" => Some(Compression::Lzo),
+                    ".gz" => Some(Compression::GZip),
+                    _ => bail!("Unexpected file extension: {ext}"),
+                };
+
+                let backup_time = NaiveDateTime::parse_from_str(timestr, "%Y_%m_%d-%H_%M_%S")?
+                    .and_utc()
+                    .timestamp();
+
+                let notes_path = path.with_file_name(format!("{}.notes", file_name));
+                let notes = proxmox_sys::fs::file_read_optional_string(notes_path)?;
+
+                let log_path = path.with_file_name(format!("{}.log", file_name));
+                let log_file_path = if log_path.exists() {
+                    Some(log_path.to_path_buf().into_os_string())
+                } else {
+                    None
+                };
+
+                let backup_args = VmaBackupArgs {
+                    vma_file_path: Some(path.clone().into()),
+                    compression,
+                    backup_id: backup_id.to_string(),
+                    backup_time,
+                    notes,
+                    log_file_path,
+                };
+                vmas.push(backup_args);
+            }
+        }
+
+        vmas.sort_by_key(|d| d.backup_time);
+        let total_vma_count = vmas.len();
+        let mut grouped_vmas: Vec<_> = vmas
+            .into_iter()
+            .into_group_map_by(|d| d.backup_id.clone())
+            .into_values()
+            .collect();
+        grouped_vmas.sort_by_key(|d| d[0].backup_id.clone());
+
+        println!(
+            "Found {} backup archive(s) of {} different VMID(s):",
+            total_vma_count,
+            grouped_vmas.len()
+        );
+
+        for vma_group in &grouped_vmas {
+            println!(
+                "- VMID {}: {} backups",
+                vma_group[0].backup_id,
+                vma_group.len()
+            );
+        }
+
+        if !yes {
+            loop {
+                eprint!("Proceed with the bulk import? (y/n): ");
+                let mut line = String::new();
+
+                BufReader::new(std::io::stdin()).read_line(&mut line)?;
+                let trimmed = line.trim();
+                if trimmed == "y" || trimmed == "Y" {
+                    break;
+                } else if trimmed == "n" || trimmed == "N" {
+                    bail!("Bulk import was not confirmed.");
+                }
+            }
+        }
+
+        grouped_vmas
+    } else if let Some(vmid) = vmid {
+        vec![vec![VmaBackupArgs {
+            vma_file_path: vma_file_path.cloned(),
+            compression: None,
+            backup_id: vmid,
+            backup_time,
+            notes,
+            log_file_path,
+        }]]
+    } else {
+        bail!("No vmid specified for single backup file");
     };
 
-    let options = BackupVmaToPbsArgs { pbs_args, vma_args };
+    let options = BackupVmaToPbsArgs {
+        pbs_args,
+        grouped_vmas,
+    };
 
     Ok(options)
 }
diff --git a/src/vma2pbs.rs b/src/vma2pbs.rs
index 37ea308..a3f1eee 100644
--- a/src/vma2pbs.rs
+++ b/src/vma2pbs.rs
@@ -4,6 +4,7 @@ use std::collections::HashMap;
 use std::ffi::{c_char, CStr, CString, OsString};
 use std::fs::File;
 use std::io::{stdin, BufRead, BufReader, Read};
+use std::process::{Command, Stdio};
 use std::ptr;
 use std::time::SystemTime;
 
@@ -30,7 +31,7 @@ const VMA_CLUSTER_SIZE: usize = 65536;
 
 pub struct BackupVmaToPbsArgs {
     pub pbs_args: PbsArgs,
-    pub vma_args: VmaBackupArgs,
+    pub grouped_vmas: Vec<Vec<VmaBackupArgs>>,
 }
 
 pub struct PbsArgs {
@@ -45,8 +46,15 @@ pub struct PbsArgs {
     pub encrypt: bool,
 }
 
+pub enum Compression {
+    Zstd,
+    Lzo,
+    GZip,
+}
+
 pub struct VmaBackupArgs {
     pub vma_file_path: Option<OsString>,
+    pub compression: Option<Compression>,
     pub backup_id: String,
     pub backup_time: i64,
     pub notes: Option<String>,
@@ -468,7 +476,19 @@ pub fn vma2pbs(args: BackupVmaToPbsArgs) -> Result<(), Error> {
 
     let start_transfer_time = SystemTime::now();
 
-    upload_vma_file(pbs_args, &args.vma_args)?;
+    for vma_group in args.grouped_vmas {
+        for backup_args in vma_group {
+            if let Err(e) = upload_vma_file(pbs_args, &backup_args) {
+                eprintln!(
+                    "Failed to upload vma file at {:?} - {}",
+                    backup_args.vma_file_path.unwrap_or("(stdin)".into()),
+                    e
+                );
+                println!("Skipping VMID {}", backup_args.backup_id);
+                break;
+            }
+        }
+    }
 
     let transfer_duration = SystemTime::now().duration_since(start_transfer_time)?;
     let total_seconds = transfer_duration.as_secs();
@@ -481,13 +501,43 @@ pub fn vma2pbs(args: BackupVmaToPbsArgs) -> Result<(), Error> {
 }
 
 fn upload_vma_file(pbs_args: &PbsArgs, backup_args: &VmaBackupArgs) -> Result<(), Error> {
-    let vma_file: Box<dyn BufRead> = match &backup_args.vma_file_path {
-        Some(vma_file_path) => match File::open(vma_file_path) {
-            Err(why) => return Err(anyhow!("Couldn't open file: {}", why)),
-            Ok(file) => Box::new(BufReader::new(file)),
+    match &backup_args.vma_file_path {
+        Some(vma_file_path) => println!("Uploading VMA backup from {:?}", vma_file_path),
+        None => println!("Uploading VMA backup from (stdin)"),
+    };
+
+    let vma_file: Box<dyn BufRead> = match &backup_args.compression {
+        Some(compression) => {
+            let vma_file_path = backup_args
+                .vma_file_path
+                .as_ref()
+                .expect("No VMA file path provided");
+            let mut cmd = match compression {
+                Compression::Zstd => {
+                    let mut cmd = Command::new("zstd");
+                    cmd.args(["-q", "-d", "-c"]);
+                    cmd
+                }
+                Compression::Lzo => {
+                    let mut cmd = Command::new("lzop");
+                    cmd.args(["-d", "-c"]);
+                    cmd
+                }
+                Compression::GZip => Command::new("zcat"),
+            };
+            let process = cmd.arg(vma_file_path).stdout(Stdio::piped()).spawn()?;
+            let stdout = process.stdout.expect("Failed to capture stdout");
+            Box::new(BufReader::new(stdout))
+        }
+        None => match &backup_args.vma_file_path {
+            Some(vma_file_path) => match File::open(vma_file_path) {
+                Err(why) => return Err(anyhow!("Couldn't open file: {}", why)),
+                Ok(file) => Box::new(BufReader::new(file)),
+            },
+            None => Box::new(BufReader::new(stdin())),
         },
-        None => Box::new(BufReader::new(stdin())),
     };
+
     let vma_reader = VmaReader::new(vma_file)?;
 
     let pbs = create_pbs_backup_task(pbs_args, backup_args)?;
-- 
2.39.5



_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel


  parent reply	other threads:[~2024-10-30 13:55 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-30 13:55 [pbs-devel] [PATCH vma-to-pbs v4 0/6] " Filip Schauer
2024-10-30 13:55 ` [pbs-devel] [PATCH vma-to-pbs v4 1/6] split BackupVmaToPbsArgs into PbsArgs and VmaBackupArgs Filip Schauer
2024-10-30 13:55 ` Filip Schauer [this message]
2024-11-04 13:06   ` [pbs-devel] [PATCH vma-to-pbs v4 2/6] add support for bulk import of a dump directory Fabian Grünbichler
2024-11-11 13:13     ` Filip Schauer
2024-10-30 13:55 ` [pbs-devel] [PATCH vma-to-pbs v4 3/6] add option to skip vmids whose backups failed to upload Filip Schauer
2024-10-30 13:55 ` [pbs-devel] [PATCH vma-to-pbs v4 4/6] remove hard coded values Filip Schauer
2024-10-30 13:55 ` [pbs-devel] [PATCH vma-to-pbs v4 5/6] use level-based logging instead of println Filip Schauer
2024-10-30 13:55 ` [pbs-devel] [PATCH vma-to-pbs v4 6/6] log device upload progress as a percentage Filip Schauer
2024-11-04 13:09 ` [pbs-devel] partially-applied: [PATCH vma-to-pbs v4 0/6] add support for bulk import of a dump directory Fabian Grünbichler

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241030135537.92595-3-f.schauer@proxmox.com \
    --to=f.schauer@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal