From: "Fabian Grünbichler" <f.gruenbichler@proxmox.com>
To: Proxmox Backup Server development discussion
<pbs-devel@lists.proxmox.com>
Subject: Re: [pbs-devel] [PATCH vma-to-pbs v4 2/6] add support for bulk import of a dump directory
Date: Mon, 04 Nov 2024 14:06:13 +0100 [thread overview]
Message-ID: <1730724842.agk2is6zq8.astroid@yuna.none> (raw)
In-Reply-To: <20241030135537.92595-3-f.schauer@proxmox.com>
On October 30, 2024 2:55 pm, Filip Schauer wrote:
> When a path to a directory is provided in the vma_file argument, try to
> upload all VMA backups in the directory. This also handles compressed
> VMA files, notes and logs. If a vmid is specified with --vmid, only the
> backups of that particular vmid are uploaded.
>
> This is intended for use on a dump directory:
>
> PBS_FINGERPRINT='PBS_FINGERPRINT' vma-to-pbs \
> --repository 'user@realm!token@server:port:datastore' \
> /var/lib/vz/dump
>
> Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
> ---
> Cargo.toml | 3 +
> src/main.rs | 161 +++++++++++++++++++++++++++++++++++++++++++++----
> src/vma2pbs.rs | 64 +++++++++++++++++---
> 3 files changed, 209 insertions(+), 19 deletions(-)
>
> diff --git a/Cargo.toml b/Cargo.toml
> index cd13426..5c6a175 100644
> --- a/Cargo.toml
> +++ b/Cargo.toml
> @@ -7,9 +7,12 @@ edition = "2021"
> [dependencies]
> anyhow = "1.0"
> bincode = "1.3"
> +chrono = "0.4"
> hyper = "0.14.5"
> +itertools = "0.13"
not needed, see below
> pico-args = "0.5"
> md5 = "0.7.0"
> +regex = "1.7"
> scopeguard = "1.1.0"
> serde = "1.0"
> serde_json = "1.0"
> diff --git a/src/main.rs b/src/main.rs
> index 3e25591..4c5135b 100644
> --- a/src/main.rs
> +++ b/src/main.rs
> @@ -1,26 +1,35 @@
> use std::ffi::OsString;
> +use std::fs::read_dir;
> +use std::io::{BufRead, BufReader};
> +use std::path::PathBuf;
>
> use anyhow::{bail, Context, Error};
> +use chrono::NaiveDateTime;
> +use itertools::Itertools;
> use proxmox_sys::linux::tty;
> use proxmox_time::epoch_i64;
> +use regex::Regex;
>
> mod vma;
> mod vma2pbs;
> -use vma2pbs::{vma2pbs, BackupVmaToPbsArgs, PbsArgs, VmaBackupArgs};
> +use vma2pbs::{vma2pbs, BackupVmaToPbsArgs, Compression, PbsArgs, VmaBackupArgs};
>
> const CMD_HELP: &str = "\
> Usage: vma-to-pbs [OPTIONS] --repository <auth_id@host:port:datastore> --vmid <VMID> [vma_file]
>
> Arguments:
> - [vma_file]
> + [vma_file | dump_directory]
>
> Options:
> --repository <auth_id@host:port:datastore>
> Repository URL
> [--ns <NAMESPACE>]
> Namespace
> - --vmid <VMID>
> + [--vmid <VMID>]
> Backup ID
> + This is required if a single VMA file is provided.
> + If not specified, bulk import all VMA backups in the provided directory.
> + If specified with a dump directory, only import backups of the specified vmid.
> [--backup-time <EPOCH>]
> Backup timestamp
> --fingerprint <FINGERPRINT>
> @@ -41,6 +50,8 @@ Options:
> File containing a comment/notes
> [--log-file <LOG_FILE>]
> Log file
> + -y, --yes
> + Automatic yes to prompts
> -h, --help
> Print help
> -V, --version
> @@ -52,7 +63,16 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
> args.remove(0); // remove the executable path.
>
> let mut first_later_args_index = 0;
> - let options = ["-h", "--help", "-c", "--compress", "-e", "--encrypt"];
> + let options = [
> + "-h",
> + "--help",
> + "-c",
> + "--compress",
> + "-e",
> + "--encrypt",
> + "-y",
> + "--yes",
> + ];
>
> for (i, arg) in args.iter().enumerate() {
> if let Some(arg) = arg.to_str() {
> @@ -87,7 +107,7 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
>
> let pbs_repository = args.value_from_str("--repository")?;
> let namespace = args.opt_value_from_str("--ns")?;
> - let vmid = args.value_from_str("--vmid")?;
> + let vmid = args.opt_value_from_str("--vmid")?;
> let backup_time: Option<i64> = args.opt_value_from_str("--backup-time")?;
> let backup_time = backup_time.unwrap_or_else(epoch_i64);
> let fingerprint = args.opt_value_from_str("--fingerprint")?;
> @@ -99,6 +119,7 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
> let key_password_file: Option<OsString> = args.opt_value_from_str("--key-password-file")?;
> let notes_file: Option<OsString> = args.opt_value_from_str("--notes-file")?;
> let log_file_path: Option<OsString> = args.opt_value_from_str("--log-file")?;
> + let yes = args.contains(["-y", "--yes"]);
>
> match (encrypt, keyfile.is_some()) {
> (true, false) => bail!("--encrypt requires a --keyfile!"),
> @@ -196,15 +217,131 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
> encrypt,
> };
>
> - let vma_args = VmaBackupArgs {
> - vma_file_path: vma_file_path.cloned(),
> - backup_id: vmid,
> - backup_time,
> - notes,
> - log_file_path,
> + let bulk =
> + vma_file_path
> + .map(PathBuf::from)
> + .and_then(|path| if path.is_dir() { Some(path) } else { None });
> +
> + let grouped_vmas = if let Some(dump_dir_path) = bulk {
grouped_vmas should still be a map, not a vec of vec..
e.g., something like this (requires some more adaptation - while this
could use itertools, I don't think it's worth to pull that in if the
same can be had with a single fold invocation):
@@ -298,12 +298,16 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
vmas.sort_by_key(|d| d.backup_time);
let total_vma_count = vmas.len();
- let mut grouped_vmas: Vec<_> = vmas
- .into_iter()
- .into_group_map_by(|d| d.backup_id.clone())
- .into_values()
- .collect();
- grouped_vmas.sort_by_key(|d| d[0].backup_id.clone());
+ let grouped_vmas = vmas.into_iter().fold(
+ HashMap::new(),
+ |mut grouped: HashMap<String, Vec<VmaBackupArgs>>, vma_args| {
+ grouped
+ .entry(vma_args.backup_id.clone())
+ .or_default()
+ .push(vma_args);
+ grouped
+ },
+ );
log::info!(
"Found {} backup archive(s) of {} different VMID(s):",
@@ -311,12 +315,8 @@ fn parse_args() -> Result<BackupVmaToPbsArgs, Error> {
grouped_vmas.len()
);
- for vma_group in &grouped_vmas {
- log::info!(
- "- VMID {}: {} backups",
- vma_group[0].backup_id,
- vma_group.len()
- );
+ for (vma_group, vma_args) in &grouped_vmas {
+ log::info!("- VMID {}: {} backups", vma_group, vma_args.len());
}
if !yes {
> + let re = Regex::new(
> + r"vzdump-qemu-(\d+)-(\d{4}_\d{2}_\d{2}-\d{2}_\d{2}_\d{2}).vma(|.zst|.lzo|.gz)$",
> + )?;
> +
> + let mut vmas = Vec::new();
> +
> + for entry in read_dir(dump_dir_path)? {
> + let entry = entry?;
> + let path = entry.path();
> +
> + if !path.is_file() {
> + continue;
> + }
> +
> + if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
> + let Some((_, [backup_id, timestr, ext])) =
> + re.captures(file_name).map(|c| c.extract())
> + else {
> + // Skip the file, since it is not a VMA backup
> + continue;
> + };
> +
> + if let Some(ref vmid) = vmid {
> + if backup_id != vmid {
> + // Skip the backup, since it does not match the specified vmid
> + continue;
> + }
> + }
> +
> + let compression = match ext {
> + "" => None,
> + ".zst" => Some(Compression::Zstd),
> + ".lzo" => Some(Compression::Lzo),
> + ".gz" => Some(Compression::GZip),
> + _ => bail!("Unexpected file extension: {ext}"),
> + };
> +
> + let backup_time = NaiveDateTime::parse_from_str(timestr, "%Y_%m_%d-%H_%M_%S")?
> + .and_utc()
> + .timestamp();
> +
> + let notes_path = path.with_file_name(format!("{}.notes", file_name));
> + let notes = proxmox_sys::fs::file_read_optional_string(notes_path)?;
> +
> + let log_path = path.with_file_name(format!("{}.log", file_name));
> + let log_file_path = if log_path.exists() {
> + Some(log_path.to_path_buf().into_os_string())
> + } else {
> + None
> + };
> +
> + let backup_args = VmaBackupArgs {
> + vma_file_path: Some(path.clone().into()),
> + compression,
> + backup_id: backup_id.to_string(),
> + backup_time,
> + notes,
> + log_file_path,
> + };
> + vmas.push(backup_args);
> + }
> + }
> +
> + vmas.sort_by_key(|d| d.backup_time);
> + let total_vma_count = vmas.len();
> + let mut grouped_vmas: Vec<_> = vmas
> + .into_iter()
> + .into_group_map_by(|d| d.backup_id.clone())
> + .into_values()
> + .collect();
> + grouped_vmas.sort_by_key(|d| d[0].backup_id.clone());
> +
> + println!(
> + "Found {} backup archive(s) of {} different VMID(s):",
> + total_vma_count,
> + grouped_vmas.len()
> + );
if we don't find any, we should print something else here and exit?
> +
> + for vma_group in &grouped_vmas {
> + println!(
> + "- VMID {}: {} backups",
> + vma_group[0].backup_id,
> + vma_group.len()
> + );
> + }
> +
> + if !yes {
> + loop {
> + eprint!("Proceed with the bulk import? (y/n): ");
> + let mut line = String::new();
> +
> + BufReader::new(std::io::stdin()).read_line(&mut line)?;
> + let trimmed = line.trim();
> + if trimmed == "y" || trimmed == "Y" {
> + break;
> + } else if trimmed == "n" || trimmed == "N" {
> + bail!("Bulk import was not confirmed.");
> + }
this maybe should mimic what we do in proxmox_router when prompting for
confirmation? e.g., flush stdout, have a default value, ..?
should we abort after a few loops?
> + }
> + }
> +
> + grouped_vmas
_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
next prev parent reply other threads:[~2024-11-04 13:06 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-30 13:55 [pbs-devel] [PATCH vma-to-pbs v4 0/6] " Filip Schauer
2024-10-30 13:55 ` [pbs-devel] [PATCH vma-to-pbs v4 1/6] split BackupVmaToPbsArgs into PbsArgs and VmaBackupArgs Filip Schauer
2024-10-30 13:55 ` [pbs-devel] [PATCH vma-to-pbs v4 2/6] add support for bulk import of a dump directory Filip Schauer
2024-11-04 13:06 ` Fabian Grünbichler [this message]
2024-11-11 13:13 ` Filip Schauer
2024-10-30 13:55 ` [pbs-devel] [PATCH vma-to-pbs v4 3/6] add option to skip vmids whose backups failed to upload Filip Schauer
2024-10-30 13:55 ` [pbs-devel] [PATCH vma-to-pbs v4 4/6] remove hard coded values Filip Schauer
2024-10-30 13:55 ` [pbs-devel] [PATCH vma-to-pbs v4 5/6] use level-based logging instead of println Filip Schauer
2024-10-30 13:55 ` [pbs-devel] [PATCH vma-to-pbs v4 6/6] log device upload progress as a percentage Filip Schauer
2024-11-04 13:09 ` [pbs-devel] partially-applied: [PATCH vma-to-pbs v4 0/6] add support for bulk import of a dump directory Fabian Grünbichler
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1730724842.agk2is6zq8.astroid@yuna.none \
--to=f.gruenbichler@proxmox.com \
--cc=pbs-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.