From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 2174B1FF16E for ; Tue, 29 Oct 2024 11:41:54 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 097D334758; Tue, 29 Oct 2024 11:41:58 +0100 (CET) MIME-Version: 1.0 In-Reply-To: <20241022142843.142191-3-f.schauer@proxmox.com> References: <20241022142843.142191-1-f.schauer@proxmox.com> <20241022142843.142191-3-f.schauer@proxmox.com> From: Fabian =?utf-8?q?Gr=C3=BCnbichler?= To: Filip Schauer , pbs-devel@lists.proxmox.com Date: Tue, 29 Oct 2024 11:41:16 +0100 Message-ID: <173019847665.51916.11937343340126733147@yuna.proxmox.com> User-Agent: alot/0.10 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.049 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH vma-to-pbs v3 2/6] add support for bulk import of a dump directory X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" Quoting Filip Schauer (2024-10-22 16:28:39) > When a path to a directory is provided in the vma_file argument, try to > upload all VMA backups in the directory. This also handles compressed > VMA files, notes and logs. If a vmid is specified with --vmid, only the > backups of that particular vmid are uploaded. > > This is intended for use on a dump directory: > > PBS_FINGERPRINT='PBS_FINGERPRINT' vma-to-pbs \ > --repository 'user@realm!token@server:port:datastore' \ > /var/lib/vz/dump > > Signed-off-by: Filip Schauer > --- > Cargo.toml | 3 ++ > src/main.rs | 120 ++++++++++++++++++++++++++++++++++++++++++++----- > src/vma2pbs.rs | 64 +++++++++++++++++++++++--- > 3 files changed, 169 insertions(+), 18 deletions(-) > > diff --git a/Cargo.toml b/Cargo.toml > index cd13426..5c6a175 100644 > --- a/Cargo.toml > +++ b/Cargo.toml > @@ -7,9 +7,12 @@ edition = "2021" > [dependencies] > anyhow = "1.0" > bincode = "1.3" > +chrono = "0.4" > hyper = "0.14.5" > +itertools = "0.13" > pico-args = "0.5" > md5 = "0.7.0" > +regex = "1.7" > scopeguard = "1.1.0" > serde = "1.0" > serde_json = "1.0" > diff --git a/src/main.rs b/src/main.rs > index 3e25591..83c59f6 100644 > --- a/src/main.rs > +++ b/src/main.rs > @@ -1,26 +1,33 @@ > use std::ffi::OsString; > +use std::fs::read_dir; > +use std::path::PathBuf; > > use anyhow::{bail, Context, Error}; > +use chrono::NaiveDateTime; > +use itertools::Itertools; > use proxmox_sys::linux::tty; > use proxmox_time::epoch_i64; > +use regex::Regex; > > mod vma; > mod vma2pbs; > -use vma2pbs::{vma2pbs, BackupVmaToPbsArgs, PbsArgs, VmaBackupArgs}; > +use vma2pbs::{vma2pbs, BackupVmaToPbsArgs, Compression, PbsArgs, VmaBackupArgs}; > > const CMD_HELP: &str = "\ > Usage: vma-to-pbs [OPTIONS] --repository --vmid [vma_file] > > Arguments: > - [vma_file] > + [vma_file | dump_directory] > > Options: > --repository > Repository URL > [--ns ] > Namespace > - --vmid > + [--vmid ] > Backup ID > + If not specified, bulk import all VMA backups in the provided directory. > + If specified with a dump directory, only import backups of the specified vmid. this is missing the previous case that is still there - if a vma_file is given, then vmid is required. > [--backup-time ] > Backup timestamp > --fingerprint > @@ -87,7 +94,7 @@ fn parse_args() -> Result { > > let pbs_repository = args.value_from_str("--repository")?; > let namespace = args.opt_value_from_str("--ns")?; > - let vmid = args.value_from_str("--vmid")?; > + let vmid = args.opt_value_from_str("--vmid")?; > let backup_time: Option = args.opt_value_from_str("--backup-time")?; > let backup_time = backup_time.unwrap_or_else(epoch_i64); > let fingerprint = args.opt_value_from_str("--fingerprint")?; > @@ -196,15 +203,106 @@ fn parse_args() -> Result { > encrypt, > }; > > - let vma_args = VmaBackupArgs { > - vma_file_path: vma_file_path.cloned(), > - backup_id: vmid, > - backup_time, > - notes, > - log_file_path, > + let mut grouped_vmas = Vec::new(); I think this would be better off as a map? that would also allow constructing it in place below.. > + > + let bulk = if let Some(vma_file_path) = vma_file_path { > + PathBuf::from(vma_file_path).is_dir() > + } else { > + false > }; this is vma_file_path.is_some_and(|path| PathBuf::from(path).is_dir()) , but see below > > - let options = BackupVmaToPbsArgs { pbs_args, vma_args }; > + if bulk { since bulk is only used here, > + let dump_dir_path = > + PathBuf::from(vma_file_path.expect("no directory specified for bulk import")); and this does another conversion to a PathBuf, this could just be let bulk = vma_file_path .map(PathBuf::from) .and_then(|path| if path.is_dir() { Some(path) } else { None }); let grouped_vmas = if let Some(dump_dir_path) = bulk { let re = Regex::new( although I am not sure whether the possible cases here - optional vmid, vma_file_path is set and a dir (the new bulk import) - vmid is set, optional vma_file_path (the old single file or stdin import) - no vmid set, vma_file_path not set or not a dir (invalid combination of options) are not an argument for splitting single file and bulk import into separate commands, or adding an explicit --bulk bool argument that would force a user to opt into bulk importing? > + > + let re = Regex::new( > + r"vzdump-qemu-(\d+)-(\d{4}_\d{2}_\d{2}-\d{2}_\d{2}_\d{2}).vma(|.zst|.lzo|.gz)$", > + )?; > + > + let mut vmas = Vec::new(); > + > + for entry in read_dir(dump_dir_path)? { > + let entry = entry?; > + let path = entry.path(); > + > + if !path.is_file() { > + continue; > + } > + > + if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) { if we find a file that is not valid UTF-8, should we warn about it? > + let Some((_, [backup_id, timestr, ext])) = > + re.captures(file_name).map(|c| c.extract()) > + else { > + // Skip the file, since it is not a VMA backup > + continue; > + }; > + > + if let Some(ref vmid) = vmid { > + if backup_id != vmid { > + // Skip the backup, since it does not match the specified vmid > + continue; should we log these as well? > + } > + } > + > + let compression = match ext { > + "" => None, > + ".zst" => Some(Compression::Zstd), > + ".lzo" => Some(Compression::Lzo), > + ".gz" => Some(Compression::GZip), > + _ => continue, and these? > + }; > + > + let backup_time = NaiveDateTime::parse_from_str(timestr, "%Y_%m_%d-%H_%M_%S")? > + .and_utc() > + .timestamp(); > + > + let notes_path = path.with_file_name(format!("{}.notes", file_name)); > + let notes = proxmox_sys::fs::file_read_optional_string(notes_path)?; > + > + let log_path = path.with_file_name(format!("{}.log", file_name)); > + let log_file_path = if log_path.exists() { > + Some(log_path.to_path_buf().into_os_string()) > + } else { > + None > + }; and finally here all that are found and will be processed? ;) > + > + let backup_args = VmaBackupArgs { > + vma_file_path: Some(path.clone().into()), > + compression, > + backup_id: backup_id.to_string(), > + backup_time, > + notes, > + log_file_path, > + }; > + vmas.push(backup_args); > + } > + } > + > + vmas.sort_by_key(|d| d.backup_time); > + grouped_vmas = vmas > + .into_iter() > + .into_group_map_by(|d| d.backup_id.clone()) > + .into_values() > + .collect(); > + grouped_vmas.sort_by_key(|d| d[0].backup_id.clone()); or here, if we just want to print a summary for the bulk imports like: Found backup archives: {total_count} Summary: - VMID XX: {count} - VMID XY: {count} - .. could even ask for confirmation before starting the bulk import? > + } else if let Some(vmid) = vmid { > + let vmas = vec![VmaBackupArgs { > + vma_file_path: vma_file_path.cloned(), > + compression: None, > + backup_id: vmid, > + backup_time, > + notes, > + log_file_path, > + }]; > + grouped_vmas.push(vmas); > + } else { > + bail!("No vmid specified for single backup file"); > + } > + > + let options = BackupVmaToPbsArgs { > + pbs_args, > + grouped_vmas, > + }; > > Ok(options) > } > diff --git a/src/vma2pbs.rs b/src/vma2pbs.rs > index 37ea308..05294f3 100644 > --- a/src/vma2pbs.rs > +++ b/src/vma2pbs.rs > @@ -4,6 +4,7 @@ use std::collections::HashMap; > use std::ffi::{c_char, CStr, CString, OsString}; > use std::fs::File; > use std::io::{stdin, BufRead, BufReader, Read}; > +use std::process::{Command, Stdio}; > use std::ptr; > use std::time::SystemTime; > > @@ -30,7 +31,7 @@ const VMA_CLUSTER_SIZE: usize = 65536; > > pub struct BackupVmaToPbsArgs { > pub pbs_args: PbsArgs, > - pub vma_args: VmaBackupArgs, > + pub grouped_vmas: Vec>, > } > > pub struct PbsArgs { > @@ -45,8 +46,15 @@ pub struct PbsArgs { > pub encrypt: bool, > } > > +pub enum Compression { > + Zstd, > + Lzo, > + GZip, > +} > + > pub struct VmaBackupArgs { > pub vma_file_path: Option, > + pub compression: Option, > pub backup_id: String, > pub backup_time: i64, > pub notes: Option, > @@ -468,7 +476,19 @@ pub fn vma2pbs(args: BackupVmaToPbsArgs) -> Result<(), Error> { > > let start_transfer_time = SystemTime::now(); > > - upload_vma_file(pbs_args, &args.vma_args)?; > + for vma_group in args.grouped_vmas { > + for backup_args in vma_group { > + if let Err(e) = upload_vma_file(pbs_args, &backup_args) { > + eprintln!( > + "Failed to upload vma file at {:?} - {}", > + backup_args.vma_file_path.expect("missing VMA file path"), if this is None it's a backup from stdin, and shouldn't print this confusing error message? (ah, that happens in the next patch, but should be folded in here ;)) > + e > + ); > + println!("Skipping VMID {}", backup_args.backup_id); > + break; > + } > + } > + } > > let transfer_duration = SystemTime::now().duration_since(start_transfer_time)?; > let total_seconds = transfer_duration.as_secs(); > @@ -481,13 +501,43 @@ pub fn vma2pbs(args: BackupVmaToPbsArgs) -> Result<(), Error> { > } > > fn upload_vma_file(pbs_args: &PbsArgs, backup_args: &VmaBackupArgs) -> Result<(), Error> { > - let vma_file: Box = match &backup_args.vma_file_path { > - Some(vma_file_path) => match File::open(vma_file_path) { > - Err(why) => return Err(anyhow!("Couldn't open file: {}", why)), > - Ok(file) => Box::new(BufReader::new(file)), > + match &backup_args.vma_file_path { > + Some(vma_file_path) => println!("Uploading VMA backup from {:?}", vma_file_path), > + None => println!("Uploading VMA backup from (stdin)"), > + }; > + > + let vma_file: Box = match &backup_args.compression { > + Some(compression) => { > + let vma_file_path = backup_args > + .vma_file_path > + .as_ref() > + .expect("No VMA file path provided"); > + let mut cmd = match compression { > + Compression::Zstd => { > + let mut cmd = Command::new("zstd"); > + cmd.args(["-q", "-d", "-c"]); > + cmd > + } > + Compression::Lzo => { > + let mut cmd = Command::new("lzop"); > + cmd.args(["-d", "-c"]); > + cmd > + } > + Compression::GZip => Command::new("zcat"), > + }; > + let process = cmd.arg(vma_file_path).stdout(Stdio::piped()).spawn()?; > + let stdout = process.stdout.expect("Failed to capture stdout"); > + Box::new(BufReader::new(stdout)) did you test whether with_capacity and a higher buffer size than 8K improves performance? > + } > + None => match &backup_args.vma_file_path { > + Some(vma_file_path) => match File::open(vma_file_path) { > + Err(why) => return Err(anyhow!("Couldn't open file: {}", why)), > + Ok(file) => Box::new(BufReader::new(file)), same here > + }, > + None => Box::new(BufReader::new(stdin())), > }, > - None => Box::new(BufReader::new(stdin())), > }; > + > let vma_reader = VmaReader::new(vma_file)?; > > let pbs = create_pbs_backup_task(pbs_args, backup_args)?; > -- > 2.39.5 > > > > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel > > _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel