public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: "Shannon Sterz" <s.sterz@proxmox.com>
To: "Proxmox Backup Server development discussion"
	<pbs-devel@lists.proxmox.com>
Subject: Re: [pbs-devel] [PATCH vma-to-pbs v5 1/4] add support for bulk import of a dump directory
Date: Wed, 13 Nov 2024 12:41:12 +0100	[thread overview]
Message-ID: <D5L0U96TMHFA.2DRXOPBDV5GIT@proxmox.com> (raw)
In-Reply-To: <20241111130822.124584-2-f.schauer@proxmox.com>

comments in-line:

On Mon Nov 11, 2024 at 2:08 PM CET, Filip Schauer wrote:
> When a path to a directory is provided in the vma_file argument, try to
> upload all VMA backups in the directory. This also handles compressed
> VMA files, notes and logs. If a vmid is specified with --vmid, only the
> backups of that particular vmid are uploaded.
>
> This is intended for use on a dump directory:
>
> PBS_FINGERPRINT='PBS_FINGERPRINT' vma-to-pbs \
>         --repository 'user@realm!token@server:port:datastore' \
>         /var/lib/vz/dump
>
> Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
> ---
>  Cargo.toml     |   2 +
>  src/main.rs    | 167 +++++++++++++++++++++++++++++++++++++++++++++----
>  src/vma2pbs.rs |  64 ++++++++++++++++---
>  3 files changed, 214 insertions(+), 19 deletions(-)
>
> diff --git a/Cargo.toml b/Cargo.toml
> index cd13426..ad80304 100644
> --- a/Cargo.toml
> +++ b/Cargo.toml
> @@ -7,9 +7,11 @@ edition = "2021"
>  [dependencies]
>  anyhow = "1.0"
>  bincode = "1.3"
> +chrono = "0.4"
>  hyper = "0.14.5"
>  pico-args = "0.5"
>  md5 = "0.7.0"
> +regex = "1.7"
>  scopeguard = "1.1.0"
>  serde = "1.0"
>  serde_json = "1.0"
> diff --git a/src/main.rs b/src/main.rs
> index 3e25591..a394078 100644
> --- a/src/main.rs
> +++ b/src/main.rs
> @@ -1,26 +1,35 @@
> +use std::collections::HashMap;
>  use std::ffi::OsString;
> +use std::fs::read_dir;
> +use std::io::{BufRead, BufReader, Write};
> +use std::path::PathBuf;
>
>  use anyhow::{bail, Context, Error};
> +use chrono::NaiveDateTime;
>  use proxmox_sys::linux::tty;
>  use proxmox_time::epoch_i64;
> +use regex::Regex;
>
>  mod vma;
>  mod vma2pbs;
> -use vma2pbs::{vma2pbs, BackupVmaToPbsArgs, PbsArgs, VmaBackupArgs};
> +use vma2pbs::{vma2pbs, BackupVmaToPbsArgs, Compression, PbsArgs, VmaBackupArgs};
>
>  const CMD_HELP: &str = "\
>  Usage: vma-to-pbs [OPTIONS] --repository <auth_id@host:port:datastore> --vmid <VMID> [vma_file]
>
>  Arguments:
> -  [vma_file]
> +  [vma_file | dump_directory]
>
>  Options:
>        --repository <auth_id@host:port:datastore>
>            Repository URL
>        [--ns <NAMESPACE>]
>            Namespace
> -      --vmid <VMID>
> +      [--vmid <VMID>]

nit: this is marked as optional here (and in the code), but the usage
line above still make it look like it's required.

>            Backup ID
> +          This is required if a single VMA file is provided.
> +          If not specified, bulk import all VMA backups in the provided directory.
> +          If specified with a dump directory, only import backups of the specified vmid.
>        [--backup-time <EPOCH>]
>            Backup timestamp
>        --fingerprint <FINGERPRINT>
> @@ -41,6 +50,8 @@ Options:
>            File containing a comment/notes
>        [--log-file <LOG_FILE>]
>            Log file
> +  -y, --yes
> +          Automatic yes to prompts
>    -h, --help
>            Print help
>    -V, --version
> @@ -52,7 +63,16 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
>      args.remove(0); // remove the executable path.
>
>      let mut first_later_args_index = 0;
> -    let options = ["-h", "--help", "-c", "--compress", "-e", "--encrypt"];
> +    let options = [
> +        "-h",
> +        "--help",
> +        "-c",
> +        "--compress",
> +        "-e",
> +        "--encrypt",
> +        "-y",
> +        "--yes",
> +    ];
>
>      for (i, arg) in args.iter().enumerate() {
>          if let Some(arg) = arg.to_str() {
> @@ -87,7 +107,7 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
>
>      let pbs_repository = args.value_from_str("--repository")?;
>      let namespace = args.opt_value_from_str("--ns")?;
> -    let vmid = args.value_from_str("--vmid")?;
> +    let vmid: Option<String> = args.opt_value_from_str("--vmid")?;
>      let backup_time: Option<i64> = args.opt_value_from_str("--backup-time")?;
>      let backup_time = backup_time.unwrap_or_else(epoch_i64);
>      let fingerprint = args.opt_value_from_str("--fingerprint")?;
> @@ -99,6 +119,7 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
>      let key_password_file: Option<OsString> = args.opt_value_from_str("--key-password-file")?;
>      let notes_file: Option<OsString> = args.opt_value_from_str("--notes-file")?;
>      let log_file_path: Option<OsString> = args.opt_value_from_str("--log-file")?;
> +    let yes = args.contains(["-y", "--yes"]);
>
>      match (encrypt, keyfile.is_some()) {
>          (true, false) => bail!("--encrypt requires a --keyfile!"),
> @@ -196,15 +217,137 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
>          encrypt,
>      };
>
> -    let vma_args = VmaBackupArgs {
> -        vma_file_path: vma_file_path.cloned(),
> -        backup_id: vmid,
> -        backup_time,
> -        notes,
> -        log_file_path,
> +    let bulk =
> +        vma_file_path
> +            .map(PathBuf::from)
> +            .and_then(|path| if path.is_dir() { Some(path) } else { None });
> +
> +    let grouped_vmas = if let Some(dump_dir_path) = bulk {
> +        let re = Regex::new(
> +            r"vzdump-qemu-(\d+)-(\d{4}_\d{2}_\d{2}-\d{2}_\d{2}_\d{2}).vma(|.zst|.lzo|.gz)$",
> +        )?;
> +
> +        let mut vmas = Vec::new();
> +
> +        for entry in read_dir(dump_dir_path)? {
> +            let entry = entry?;
> +            let path = entry.path();
> +
> +            if !path.is_file() {
> +                continue;
> +            }
> +
> +            if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
> +                let Some((_, [backup_id, timestr, ext])) =
> +                    re.captures(file_name).map(|c| c.extract())
> +                else {
> +                    // Skip the file, since it is not a VMA backup
> +                    continue;
> +                };
> +
> +                if let Some(ref vmid) = vmid {
> +                    if backup_id != vmid {
> +                        // Skip the backup, since it does not match the specified vmid
> +                        continue;
> +                    }
> +                }
> +
> +                let compression = match ext {
> +                    "" => None,
> +                    ".zst" => Some(Compression::Zstd),
> +                    ".lzo" => Some(Compression::Lzo),
> +                    ".gz" => Some(Compression::GZip),
> +                    _ => bail!("Unexpected file extension: {ext}"),
> +                };
> +
> +                let backup_time = NaiveDateTime::parse_from_str(timestr, "%Y_%m_%d-%H_%M_%S")?
> +                    .and_utc()
> +                    .timestamp();
> +
> +                let notes_path = path.with_file_name(format!("{}.notes", file_name));
> +                let notes = proxmox_sys::fs::file_read_optional_string(notes_path)?;
> +
> +                let log_path = path.with_file_name(format!("{}.log", file_name));
> +                let log_file_path = if log_path.exists() {
> +                    Some(log_path.to_path_buf().into_os_string())
> +                } else {
> +                    None
> +                };
> +
> +                let backup_args = VmaBackupArgs {
> +                    vma_file_path: Some(path.clone().into()),
> +                    compression,
> +                    backup_id: backup_id.to_string(),
> +                    backup_time,
> +                    notes,
> +                    log_file_path,
> +                };
> +                vmas.push(backup_args);
> +            }
> +        }
> +
> +        vmas.sort_by_key(|d| d.backup_time);
> +        let total_vma_count = vmas.len();
> +        let grouped_vmas = vmas.into_iter().fold(
> +            HashMap::new(),
> +            |mut grouped: HashMap<String, Vec<VmaBackupArgs>>, vma_args| {
> +                grouped
> +                    .entry(vma_args.backup_id.clone())
> +                    .or_default()
> +                    .push(vma_args);
> +                grouped
> +            },
> +        );
> +
> +        if grouped_vmas.is_empty() {
> +            bail!("Did not find any backup archives");
> +        }
> +
> +        println!(
> +            "Found {} backup archive(s) of {} different VMID(s):",
> +            total_vma_count,
> +            grouped_vmas.len()
> +        );
> +
> +        for (backup_id, vma_group) in &grouped_vmas {
> +            println!("- VMID {}: {} backups", backup_id, vma_group.len());

nit: this should be:

```rs
println!("- VMID {backup_id}: {} backups", vma_group.len());
```

> +        }
> +
> +        if !yes {
> +            eprint!("Proceed with the bulk import? (Y/n): ");
> +            std::io::stdout().flush()?;
> +            let mut line = String::new();
> +
> +            BufReader::new(std::io::stdin()).read_line(&mut line)?;
> +            let trimmed = line.trim();
> +            match trimmed {
> +                "y" | "Y" | "" => {}
> +                "n" | "N" => bail!("Bulk import was not confirmed."),
> +                _ => bail!("Unexpected choice '{trimmed}'!"),
> +            }
> +        }
> +
> +        grouped_vmas
> +    } else if let Some(vmid) = vmid {
> +        HashMap::from([(
> +            vmid.clone(),
> +            vec![VmaBackupArgs {
> +                vma_file_path: vma_file_path.cloned(),
> +                compression: None,
> +                backup_id: vmid,
> +                backup_time,
> +                notes,
> +                log_file_path,
> +            }],
> +        )])
> +    } else {
> +        bail!("No vmid specified for single backup file");
>      };
>
> -    let options = BackupVmaToPbsArgs { pbs_args, vma_args };
> +    let options = BackupVmaToPbsArgs {
> +        pbs_args,
> +        grouped_vmas,
> +    };
>
>      Ok(options)
>  }
> diff --git a/src/vma2pbs.rs b/src/vma2pbs.rs
> index a888a7b..95ede9b 100644
> --- a/src/vma2pbs.rs
> +++ b/src/vma2pbs.rs
> @@ -4,6 +4,7 @@ use std::collections::HashMap;
>  use std::ffi::{c_char, CStr, CString, OsString};
>  use std::fs::File;
>  use std::io::{stdin, BufRead, BufReader, Read};
> +use std::process::{Command, Stdio};
>  use std::ptr;
>  use std::time::SystemTime;
>
> @@ -30,7 +31,7 @@ const VMA_CLUSTER_SIZE: usize = 65536;
>
>  pub struct BackupVmaToPbsArgs {
>      pub pbs_args: PbsArgs,
> -    pub vma_args: VmaBackupArgs,
> +    pub grouped_vmas: HashMap<String, Vec<VmaBackupArgs>>,
>  }
>
>  pub struct PbsArgs {
> @@ -45,8 +46,15 @@ pub struct PbsArgs {
>      pub encrypt: bool,
>  }
>
> +pub enum Compression {
> +    Zstd,
> +    Lzo,
> +    GZip,
> +}
> +
>  pub struct VmaBackupArgs {
>      pub vma_file_path: Option<OsString>,
> +    pub compression: Option<Compression>,
>      pub backup_id: String,
>      pub backup_time: i64,
>      pub notes: Option<String>,
> @@ -467,7 +475,19 @@ pub fn vma2pbs(args: BackupVmaToPbsArgs) -> Result<(), Error> {
>
>      let start_transfer_time = SystemTime::now();
>
> -    upload_vma_file(pbs_args, &args.vma_args)?;
> +    for (_, vma_group) in args.grouped_vmas {
> +        for backup_args in vma_group {
> +            if let Err(e) = upload_vma_file(pbs_args, &backup_args) {
> +                eprintln!(
> +                    "Failed to upload vma file at {:?} - {}",
> +                    backup_args.vma_file_path.unwrap_or("(stdin)".into()),
> +                    e

nit: same as above, move `e` into the format string

> +                );
> +                println!("Skipping VMID {}", backup_args.backup_id);
> +                break;
> +            }
> +        }
> +    }
>
>      let transfer_duration = SystemTime::now().duration_since(start_transfer_time)?;
>      let total_seconds = transfer_duration.as_secs();
> @@ -480,13 +500,43 @@ pub fn vma2pbs(args: BackupVmaToPbsArgs) -> Result<(), Error> {
>  }
>
>  fn upload_vma_file(pbs_args: &PbsArgs, backup_args: &VmaBackupArgs) -> Result<(), Error> {
> -    let vma_file: Box<dyn BufRead> = match &backup_args.vma_file_path {
> -        Some(vma_file_path) => match File::open(vma_file_path) {
> -            Err(why) => return Err(anyhow!("Couldn't open file: {}", why)),
> -            Ok(file) => Box::new(BufReader::new(file)),
> +    match &backup_args.vma_file_path {
> +        Some(vma_file_path) => println!("Uploading VMA backup from {:?}", vma_file_path),

nit: this could be

```rs
           Some(vma_file_path) => println!("Uploading VMA backup from {vma_file_path:?}"),
```

> +        None => println!("Uploading VMA backup from (stdin)"),
> +    };
> +
> +    let vma_file: Box<dyn BufRead> = match &backup_args.compression {
> +        Some(compression) => {
> +            let vma_file_path = backup_args
> +                .vma_file_path
> +                .as_ref()
> +                .expect("No VMA file path provided");
> +            let mut cmd = match compression {
> +                Compression::Zstd => {
> +                    let mut cmd = Command::new("zstd");
> +                    cmd.args(["-q", "-d", "-c"]);
> +                    cmd

i think the following would be more elegant here:


```rs
                   Compression::Zstd => Command::new("zstd")
                       .args(["-q", "-d", "-c"]),
```

it's a bit more concise imo

> +                }
> +                Compression::Lzo => {
> +                    let mut cmd = Command::new("lzop");
> +                    cmd.args(["-d", "-c"]);
> +                    cmd

same as above

> +                }
> +                Compression::GZip => Command::new("zcat"),
> +            };
> +            let process = cmd.arg(vma_file_path).stdout(Stdio::piped()).spawn()?;
> +            let stdout = process.stdout.expect("Failed to capture stdout");
> +            Box::new(BufReader::new(stdout))
> +        }
> +        None => match &backup_args.vma_file_path {
> +            Some(vma_file_path) => match File::open(vma_file_path) {
> +                Err(why) => return Err(anyhow!("Couldn't open file: {}", why)),

nit: `why` can be moved into the format string here

> +                Ok(file) => Box::new(BufReader::new(file)),
> +            },
> +            None => Box::new(BufReader::new(stdin())),
>          },
> -        None => Box::new(BufReader::new(stdin())),
>      };
> +
>      let vma_reader = VmaReader::new(vma_file)?;
>
>      let pbs = create_pbs_backup_task(pbs_args, backup_args)?;



_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel


  reply	other threads:[~2024-11-13 11:41 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-11 13:08 [pbs-devel] [PATCH vma-to-pbs v5 0/4] " Filip Schauer
2024-11-11 13:08 ` [pbs-devel] [PATCH vma-to-pbs v5 1/4] " Filip Schauer
2024-11-13 11:41   ` Shannon Sterz [this message]
2024-11-13 16:02     ` Filip Schauer
2024-11-11 13:08 ` [pbs-devel] [PATCH vma-to-pbs v5 2/4] add option to skip vmids whose backups failed to upload Filip Schauer
2024-11-13 11:41   ` Shannon Sterz
2024-11-11 13:08 ` [pbs-devel] [PATCH vma-to-pbs v5 3/4] use level-based logging instead of println Filip Schauer
2024-11-13 11:41   ` Shannon Sterz
2024-11-13 16:02     ` Filip Schauer
2024-11-11 13:08 ` [pbs-devel] [PATCH vma-to-pbs v5 4/4] log device upload progress as a percentage Filip Schauer
2024-11-13 11:41   ` Shannon Sterz
2024-11-13 16:07     ` Filip Schauer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=D5L0U96TMHFA.2DRXOPBDV5GIT@proxmox.com \
    --to=s.sterz@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal