From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 4C0601FF15C for ; Wed, 13 Nov 2024 12:41:14 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id C4398145C8; Wed, 13 Nov 2024 12:41:15 +0100 (CET) Mime-Version: 1.0 Date: Wed, 13 Nov 2024 12:41:12 +0100 Message-Id: From: "Shannon Sterz" To: "Proxmox Backup Server development discussion" X-Mailer: aerc 0.17.0-69-g65571b67d7d3-dirty References: <20241111130822.124584-1-f.schauer@proxmox.com> <20241111130822.124584-2-f.schauer@proxmox.com> In-Reply-To: <20241111130822.124584-2-f.schauer@proxmox.com> X-SPAM-LEVEL: Spam detection results: 0 AWL -0.043 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH vma-to-pbs v5 1/4] add support for bulk import of a dump directory X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" comments in-line: On Mon Nov 11, 2024 at 2:08 PM CET, Filip Schauer wrote: > When a path to a directory is provided in the vma_file argument, try to > upload all VMA backups in the directory. This also handles compressed > VMA files, notes and logs. If a vmid is specified with --vmid, only the > backups of that particular vmid are uploaded. > > This is intended for use on a dump directory: > > PBS_FINGERPRINT='PBS_FINGERPRINT' vma-to-pbs \ > --repository 'user@realm!token@server:port:datastore' \ > /var/lib/vz/dump > > Signed-off-by: Filip Schauer > --- > Cargo.toml | 2 + > src/main.rs | 167 +++++++++++++++++++++++++++++++++++++++++++++---- > src/vma2pbs.rs | 64 ++++++++++++++++--- > 3 files changed, 214 insertions(+), 19 deletions(-) > > diff --git a/Cargo.toml b/Cargo.toml > index cd13426..ad80304 100644 > --- a/Cargo.toml > +++ b/Cargo.toml > @@ -7,9 +7,11 @@ edition = "2021" > [dependencies] > anyhow = "1.0" > bincode = "1.3" > +chrono = "0.4" > hyper = "0.14.5" > pico-args = "0.5" > md5 = "0.7.0" > +regex = "1.7" > scopeguard = "1.1.0" > serde = "1.0" > serde_json = "1.0" > diff --git a/src/main.rs b/src/main.rs > index 3e25591..a394078 100644 > --- a/src/main.rs > +++ b/src/main.rs > @@ -1,26 +1,35 @@ > +use std::collections::HashMap; > use std::ffi::OsString; > +use std::fs::read_dir; > +use std::io::{BufRead, BufReader, Write}; > +use std::path::PathBuf; > > use anyhow::{bail, Context, Error}; > +use chrono::NaiveDateTime; > use proxmox_sys::linux::tty; > use proxmox_time::epoch_i64; > +use regex::Regex; > > mod vma; > mod vma2pbs; > -use vma2pbs::{vma2pbs, BackupVmaToPbsArgs, PbsArgs, VmaBackupArgs}; > +use vma2pbs::{vma2pbs, BackupVmaToPbsArgs, Compression, PbsArgs, VmaBackupArgs}; > > const CMD_HELP: &str = "\ > Usage: vma-to-pbs [OPTIONS] --repository --vmid [vma_file] > > Arguments: > - [vma_file] > + [vma_file | dump_directory] > > Options: > --repository > Repository URL > [--ns ] > Namespace > - --vmid > + [--vmid ] nit: this is marked as optional here (and in the code), but the usage line above still make it look like it's required. > Backup ID > + This is required if a single VMA file is provided. > + If not specified, bulk import all VMA backups in the provided directory. > + If specified with a dump directory, only import backups of the specified vmid. > [--backup-time ] > Backup timestamp > --fingerprint > @@ -41,6 +50,8 @@ Options: > File containing a comment/notes > [--log-file ] > Log file > + -y, --yes > + Automatic yes to prompts > -h, --help > Print help > -V, --version > @@ -52,7 +63,16 @@ fn parse_args() -> Result { > args.remove(0); // remove the executable path. > > let mut first_later_args_index = 0; > - let options = ["-h", "--help", "-c", "--compress", "-e", "--encrypt"]; > + let options = [ > + "-h", > + "--help", > + "-c", > + "--compress", > + "-e", > + "--encrypt", > + "-y", > + "--yes", > + ]; > > for (i, arg) in args.iter().enumerate() { > if let Some(arg) = arg.to_str() { > @@ -87,7 +107,7 @@ fn parse_args() -> Result { > > let pbs_repository = args.value_from_str("--repository")?; > let namespace = args.opt_value_from_str("--ns")?; > - let vmid = args.value_from_str("--vmid")?; > + let vmid: Option = args.opt_value_from_str("--vmid")?; > let backup_time: Option = args.opt_value_from_str("--backup-time")?; > let backup_time = backup_time.unwrap_or_else(epoch_i64); > let fingerprint = args.opt_value_from_str("--fingerprint")?; > @@ -99,6 +119,7 @@ fn parse_args() -> Result { > let key_password_file: Option = args.opt_value_from_str("--key-password-file")?; > let notes_file: Option = args.opt_value_from_str("--notes-file")?; > let log_file_path: Option = args.opt_value_from_str("--log-file")?; > + let yes = args.contains(["-y", "--yes"]); > > match (encrypt, keyfile.is_some()) { > (true, false) => bail!("--encrypt requires a --keyfile!"), > @@ -196,15 +217,137 @@ fn parse_args() -> Result { > encrypt, > }; > > - let vma_args = VmaBackupArgs { > - vma_file_path: vma_file_path.cloned(), > - backup_id: vmid, > - backup_time, > - notes, > - log_file_path, > + let bulk = > + vma_file_path > + .map(PathBuf::from) > + .and_then(|path| if path.is_dir() { Some(path) } else { None }); > + > + let grouped_vmas = if let Some(dump_dir_path) = bulk { > + let re = Regex::new( > + r"vzdump-qemu-(\d+)-(\d{4}_\d{2}_\d{2}-\d{2}_\d{2}_\d{2}).vma(|.zst|.lzo|.gz)$", > + )?; > + > + let mut vmas = Vec::new(); > + > + for entry in read_dir(dump_dir_path)? { > + let entry = entry?; > + let path = entry.path(); > + > + if !path.is_file() { > + continue; > + } > + > + if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) { > + let Some((_, [backup_id, timestr, ext])) = > + re.captures(file_name).map(|c| c.extract()) > + else { > + // Skip the file, since it is not a VMA backup > + continue; > + }; > + > + if let Some(ref vmid) = vmid { > + if backup_id != vmid { > + // Skip the backup, since it does not match the specified vmid > + continue; > + } > + } > + > + let compression = match ext { > + "" => None, > + ".zst" => Some(Compression::Zstd), > + ".lzo" => Some(Compression::Lzo), > + ".gz" => Some(Compression::GZip), > + _ => bail!("Unexpected file extension: {ext}"), > + }; > + > + let backup_time = NaiveDateTime::parse_from_str(timestr, "%Y_%m_%d-%H_%M_%S")? > + .and_utc() > + .timestamp(); > + > + let notes_path = path.with_file_name(format!("{}.notes", file_name)); > + let notes = proxmox_sys::fs::file_read_optional_string(notes_path)?; > + > + let log_path = path.with_file_name(format!("{}.log", file_name)); > + let log_file_path = if log_path.exists() { > + Some(log_path.to_path_buf().into_os_string()) > + } else { > + None > + }; > + > + let backup_args = VmaBackupArgs { > + vma_file_path: Some(path.clone().into()), > + compression, > + backup_id: backup_id.to_string(), > + backup_time, > + notes, > + log_file_path, > + }; > + vmas.push(backup_args); > + } > + } > + > + vmas.sort_by_key(|d| d.backup_time); > + let total_vma_count = vmas.len(); > + let grouped_vmas = vmas.into_iter().fold( > + HashMap::new(), > + |mut grouped: HashMap>, vma_args| { > + grouped > + .entry(vma_args.backup_id.clone()) > + .or_default() > + .push(vma_args); > + grouped > + }, > + ); > + > + if grouped_vmas.is_empty() { > + bail!("Did not find any backup archives"); > + } > + > + println!( > + "Found {} backup archive(s) of {} different VMID(s):", > + total_vma_count, > + grouped_vmas.len() > + ); > + > + for (backup_id, vma_group) in &grouped_vmas { > + println!("- VMID {}: {} backups", backup_id, vma_group.len()); nit: this should be: ```rs println!("- VMID {backup_id}: {} backups", vma_group.len()); ``` > + } > + > + if !yes { > + eprint!("Proceed with the bulk import? (Y/n): "); > + std::io::stdout().flush()?; > + let mut line = String::new(); > + > + BufReader::new(std::io::stdin()).read_line(&mut line)?; > + let trimmed = line.trim(); > + match trimmed { > + "y" | "Y" | "" => {} > + "n" | "N" => bail!("Bulk import was not confirmed."), > + _ => bail!("Unexpected choice '{trimmed}'!"), > + } > + } > + > + grouped_vmas > + } else if let Some(vmid) = vmid { > + HashMap::from([( > + vmid.clone(), > + vec![VmaBackupArgs { > + vma_file_path: vma_file_path.cloned(), > + compression: None, > + backup_id: vmid, > + backup_time, > + notes, > + log_file_path, > + }], > + )]) > + } else { > + bail!("No vmid specified for single backup file"); > }; > > - let options = BackupVmaToPbsArgs { pbs_args, vma_args }; > + let options = BackupVmaToPbsArgs { > + pbs_args, > + grouped_vmas, > + }; > > Ok(options) > } > diff --git a/src/vma2pbs.rs b/src/vma2pbs.rs > index a888a7b..95ede9b 100644 > --- a/src/vma2pbs.rs > +++ b/src/vma2pbs.rs > @@ -4,6 +4,7 @@ use std::collections::HashMap; > use std::ffi::{c_char, CStr, CString, OsString}; > use std::fs::File; > use std::io::{stdin, BufRead, BufReader, Read}; > +use std::process::{Command, Stdio}; > use std::ptr; > use std::time::SystemTime; > > @@ -30,7 +31,7 @@ const VMA_CLUSTER_SIZE: usize = 65536; > > pub struct BackupVmaToPbsArgs { > pub pbs_args: PbsArgs, > - pub vma_args: VmaBackupArgs, > + pub grouped_vmas: HashMap>, > } > > pub struct PbsArgs { > @@ -45,8 +46,15 @@ pub struct PbsArgs { > pub encrypt: bool, > } > > +pub enum Compression { > + Zstd, > + Lzo, > + GZip, > +} > + > pub struct VmaBackupArgs { > pub vma_file_path: Option, > + pub compression: Option, > pub backup_id: String, > pub backup_time: i64, > pub notes: Option, > @@ -467,7 +475,19 @@ pub fn vma2pbs(args: BackupVmaToPbsArgs) -> Result<(), Error> { > > let start_transfer_time = SystemTime::now(); > > - upload_vma_file(pbs_args, &args.vma_args)?; > + for (_, vma_group) in args.grouped_vmas { > + for backup_args in vma_group { > + if let Err(e) = upload_vma_file(pbs_args, &backup_args) { > + eprintln!( > + "Failed to upload vma file at {:?} - {}", > + backup_args.vma_file_path.unwrap_or("(stdin)".into()), > + e nit: same as above, move `e` into the format string > + ); > + println!("Skipping VMID {}", backup_args.backup_id); > + break; > + } > + } > + } > > let transfer_duration = SystemTime::now().duration_since(start_transfer_time)?; > let total_seconds = transfer_duration.as_secs(); > @@ -480,13 +500,43 @@ pub fn vma2pbs(args: BackupVmaToPbsArgs) -> Result<(), Error> { > } > > fn upload_vma_file(pbs_args: &PbsArgs, backup_args: &VmaBackupArgs) -> Result<(), Error> { > - let vma_file: Box = match &backup_args.vma_file_path { > - Some(vma_file_path) => match File::open(vma_file_path) { > - Err(why) => return Err(anyhow!("Couldn't open file: {}", why)), > - Ok(file) => Box::new(BufReader::new(file)), > + match &backup_args.vma_file_path { > + Some(vma_file_path) => println!("Uploading VMA backup from {:?}", vma_file_path), nit: this could be ```rs Some(vma_file_path) => println!("Uploading VMA backup from {vma_file_path:?}"), ``` > + None => println!("Uploading VMA backup from (stdin)"), > + }; > + > + let vma_file: Box = match &backup_args.compression { > + Some(compression) => { > + let vma_file_path = backup_args > + .vma_file_path > + .as_ref() > + .expect("No VMA file path provided"); > + let mut cmd = match compression { > + Compression::Zstd => { > + let mut cmd = Command::new("zstd"); > + cmd.args(["-q", "-d", "-c"]); > + cmd i think the following would be more elegant here: ```rs Compression::Zstd => Command::new("zstd") .args(["-q", "-d", "-c"]), ``` it's a bit more concise imo > + } > + Compression::Lzo => { > + let mut cmd = Command::new("lzop"); > + cmd.args(["-d", "-c"]); > + cmd same as above > + } > + Compression::GZip => Command::new("zcat"), > + }; > + let process = cmd.arg(vma_file_path).stdout(Stdio::piped()).spawn()?; > + let stdout = process.stdout.expect("Failed to capture stdout"); > + Box::new(BufReader::new(stdout)) > + } > + None => match &backup_args.vma_file_path { > + Some(vma_file_path) => match File::open(vma_file_path) { > + Err(why) => return Err(anyhow!("Couldn't open file: {}", why)), nit: `why` can be moved into the format string here > + Ok(file) => Box::new(BufReader::new(file)), > + }, > + None => Box::new(BufReader::new(stdin())), > }, > - None => Box::new(BufReader::new(stdin())), > }; > + > let vma_reader = VmaReader::new(vma_file)?; > > let pbs = create_pbs_backup_task(pbs_args, backup_args)?; _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel