From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id D259A609AA for ; Tue, 15 Dec 2020 11:14:27 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id C190CFC96 for ; Tue, 15 Dec 2020 11:14:27 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 5FE47FC8B for ; Tue, 15 Dec 2020 11:14:26 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 2D9A74508E for ; Tue, 15 Dec 2020 11:14:26 +0100 (CET) To: Proxmox Backup Server development discussion , Hannes Laimer References: <20201215081626.73888-1-h.laimer@proxmox.com> <20201215081626.73888-2-h.laimer@proxmox.com> From: Thomas Lamprecht Message-ID: <0dc9688f-880f-f507-89a7-5f286b27ab77@proxmox.com> Date: Tue, 15 Dec 2020 11:14:24 +0100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:84.0) Gecko/20100101 Thunderbird/84.0 MIME-Version: 1.0 In-Reply-To: <20201215081626.73888-2-h.laimer@proxmox.com> Content-Type: text/plain; charset=UTF-8 Content-Language: en-US Content-Transfer-Encoding: quoted-printable X-SPAM-LEVEL: Spam detection results: 0 AWL -0.067 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment NICE_REPLY_A -0.001 Looks like a legit reply (A) RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [inspect.rs, proxmox-backup-recovery.rs, proxmox.com, mod.rs] Subject: Re: [pbs-devel] [PATCH proxmox-backup 1/3] add inspection of chunks X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 15 Dec 2020 10:14:27 -0000 Still no patch version, that must be used! Use the `-vX` option to your g= it format-patch or git send-email command, please really read, IIRC I'm link= ing that the third time now! https://pve.proxmox.com/wiki/Developer_Documentation#Versioned_Patches On 15.12.20 09:16, Hannes Laimer wrote: > Signed-off-by: Hannes Laimer > --- below this triple dash is for meta information, it is not the commit mess= age but should describe changes since the last version of the patch, it won't= get included in git. > Adds possibility to inspect chunks and find indexes that reference the > chunk. Options: > - chunk: path to the chunk file > - [opt] decode: path to a file or to stdout(-), if specified, the chun= k will > be decoded into the specified location > - [opt] keyfile: path to a keyfile, needed if decode is specified and = the > data was encrypted > - [opt] reference-filter: path in which indexes that reference the chu= nk > should be searched, can be a group, snapshot or the whole datastore,= > if not specified no references will be searched > Makefile | 3 +- > src/bin/proxmox-backup-recovery.rs | 32 ++++ what are they now, recovery or inspect tools? Maybe a more generic name l= ike: # proxmox-backup-debug=20 inspect chunk ... inspect file ... recover index ... I mean what is the plan for the whole CLI tool, what additional commands could be added, I'd like to not just tape this together somehow, but have= somewhat of a design plan.. Documentation patches would be also good to have, e.g., what is the refer= ence-filter, full path to datastore, index, ...? > src/bin/proxmox_backup_recovery/inspect.rs | 195 +++++++++++++++++++++= > src/bin/proxmox_backup_recovery/mod.rs | 2 + > 4 files changed, 231 insertions(+), 1 deletion(-) > create mode 100644 src/bin/proxmox-backup-recovery.rs > create mode 100644 src/bin/proxmox_backup_recovery/inspect.rs > create mode 100644 src/bin/proxmox_backup_recovery/mod.rs >=20 > diff --git a/Makefile b/Makefile > index a1af9d51..a9c2a711 100644 > --- a/Makefile > +++ b/Makefile > @@ -13,7 +13,8 @@ USR_BIN :=3D \ > =20 > # Binaries usable by admins > USR_SBIN :=3D \ > - proxmox-backup-manager > + proxmox-backup-manager \ > + proxmox-backup-recovery \ > =20 > # Binaries for services: > SERVICE_BIN :=3D \ > diff --git a/src/bin/proxmox-backup-recovery.rs b/src/bin/proxmox-backu= p-recovery.rs > new file mode 100644 > index 00000000..ae8c18d5 > --- /dev/null > +++ b/src/bin/proxmox-backup-recovery.rs > @@ -0,0 +1,32 @@ > +use anyhow::Error; > +use proxmox::api::schema::{Schema, StringSchema}; > +use proxmox::api::cli::*; > + > +use proxmox::sys::linux::tty; > +use proxmox_backup_recovery::*; > +mod proxmox_backup_recovery; > + > +pub fn get_encryption_key_password() -> Result, Error> { > + tty::read_password("Encryption Key Password: ") > +} > + > +pub const PATH_SCHEMA: Schema =3D StringSchema::new("Path to a file or= a directory").schema(); > + > +pub const KEYFILE_SCHEMA: Schema =3D StringSchema::new( > + "Path to encryption key. If the data was encrypted, this key will = be used for decryption.", > +) > +.schema(); > + > +fn main() { > + proxmox_backup::tools::setup_safe_path_env(); > + > + let cmd_def =3D CliCommandMap::new() > + .insert("inspect", inspect_commands()); > + > + let rpcenv =3D CliEnvironment::new(); > + run_cli_command( > + cmd_def, > + rpcenv, > + Some(|future| proxmox_backup::tools::runtime::main(future)), > + ); > +} > diff --git a/src/bin/proxmox_backup_recovery/inspect.rs b/src/bin/proxm= ox_backup_recovery/inspect.rs > new file mode 100644 > index 00000000..6bf4a0a3 > --- /dev/null > +++ b/src/bin/proxmox_backup_recovery/inspect.rs > @@ -0,0 +1,195 @@ > +use std::collections::HashSet; > +use std::fs::File; > +use std::io::Write; > +use std::path::Path; > + > +use anyhow::Error; > +use proxmox::api::cli::{ > + format_and_print_result, get_output_format, CliCommand, CliCommand= Map, CommandLineInterface, > +}; > +use proxmox::api::{api, cli::*, RpcEnvironment}; > +use serde_json::{json, Value}; > +use walkdir::WalkDir; > + > +use proxmox_backup::api2::types::SHA256_HEX_REGEX; > +use proxmox_backup::backup::{ > + load_and_decrypt_key, CryptConfig, DataBlob, DynamicIndexReader, F= ixedIndexReader, IndexFile, > +}; > +use proxmox_backup::tools; > + > +use crate::{get_encryption_key_password, KEYFILE_SCHEMA, PATH_SCHEMA};= > + > +/// Decodes a blob and writes its content either to stdout or into a f= ile > +fn decode_blob( > + output_path: Option<&Path>, > + key_file: Option<&Path>, > + digest: Option<&[u8; 32]>, > + blob: &DataBlob, > +) -> Result<(), Error> { > + let mut crypt_conf_opt =3D None; > + let crypt_conf; > + > + if blob.is_encrypted() && key_file.is_some() { > + let (key, _created, _fingerprint) =3D > + load_and_decrypt_key(&key_file.unwrap(), &get_encryption_k= ey_password)?; > + crypt_conf =3D CryptConfig::new(key)?; > + crypt_conf_opt =3D Some(&crypt_conf); > + } > + > + if output_path.is_some() { > + let mut file =3D File::create(output_path.unwrap())?; > + Ok(file.write_all(blob.decode(crypt_conf_opt, digest)?.as_slic= e())?) > + } else { > + Ok(println!( > + "{}", > + String::from_utf8_lossy(blob.decode(crypt_conf_opt, digest= )?.as_slice()) > + )) > + } > +} > + > +#[api( > + input: { > + properties: { > + chunk: { > + schema: PATH_SCHEMA, > + }, > + "reference-filter": { > + schema: PATH_SCHEMA, > + optional: true, > + }, > + "decode": { > + schema: PATH_SCHEMA, > + optional: true, > + }, > + "keyfile": { > + schema: KEYFILE_SCHEMA, > + optional: true, > + }, > + "output-format": { > + schema: OUTPUT_FORMAT, > + optional: true, > + }, > + } > + } > +)] > +/// Inspect a chunk > +fn inspect_chunk(param: Value, _rpcenv: &mut dyn RpcEnvironment) -> Re= sult { > + let chunk_path =3D Path::new(tools::required_string_param(¶m, = "chunk")?); > + let output_format =3D get_output_format(¶m); > + let digest_str =3D chunk_path.file_name().unwrap().to_str().unwrap= (); > + > + if !SHA256_HEX_REGEX.is_match(digest_str) { > + println!("chunk filename is not valid"); at least to stderr, using eprintln! maybe also refer why is it not valid.= > + return Ok(Value::Null); maybe even use bail!(...), I mean, feels weird that we exit with 0 in suc= h a case > + } > + > + let digest_raw =3D proxmox::tools::hex_to_digest(digest_str)?; or, you may even drop above REGEX check + if and just do: let digest_raw =3D proxmox::tools::hex_to_digest(digest_str) .map_err(|e| format_err!("could not parse chunk - {}", e))?; > + > + let reference_filter_param =3D param["reference-filter"].as_str();= > + let decode_output_param =3D param["decode"].as_str(); > + let key_file_param =3D param["keyfile"].as_str(); > + > + let mut search_path =3D None; > + let mut decode_output_path =3D None; > + let mut key_file_path =3D None; > + let mut to_stdout =3D false; > + > + if let Some(path) =3D reference_filter_param { > + search_path =3D Some(Path::new(path)) > + }; Rather avoid mutable plus three extra lines (noise) and do let search_path =3D reference_filter_param.map(|path| Path::new(path)): > + > + if let Some(path) =3D decode_output_param { > + to_stdout =3D path.eq("-"); > + decode_output_path =3D Some(Path::new(path)) maybe a match could fit above better and do away with mutables let (decode_output_path, to_stdout) =3D match decode_output_param { Some(path) if path.eq("-") =3D> (None, true), Some(path) =3D> (Some(path), false), None =3D> (None, false), }; just a line less total, but more explicit, no hard feelings here though > + }; > + > + if let Some(path) =3D key_file_param { > + key_file_path =3D Some(Path::new(path)) > + }; use map here let key_file_path =3D key_file_param.map(|path| Path::new(path)); > + > + let mut file =3D std::fs::File::open(&chunk_path)?; may map_err + format! can make this a better UX if the open fails > + let blob =3D DataBlob::load_from_reader(&mut file)?; > + > + let mut referenced_by =3D None; > + if let Some(search_path) =3D search_path { > + let mut references =3D Vec::new(); > + for entry in WalkDir::new(search_path) > + .follow_links(false) > + .into_iter() > + .filter_map(|e| e.ok()) > + { > + let file_name =3D entry.file_name().to_string_lossy(); > + let mut in_index =3D HashSet::new(); why is this defined here, but only used below, actually it should not be = required anyway. > + let mut index: Option> =3D None; > + > + if file_name.ends_with(".fidx") { > + index =3D match FixedIndexReader::open(entry.path()) {= > + Ok(index) =3D> Some(Box::new(index)), > + Err(_) =3D> None, > + }; > + } > + > + if file_name.ends_with(".didx") { > + index =3D match DynamicIndexReader::open(entry.path())= { > + Ok(index) =3D> Some(Box::new(index)), > + Err(_) =3D> None, > + }; > + } > + > + if let Some(index) =3D index { > + for pos in 0..index.index_count() { > + if let Some(index_chunk_digest) =3D index.index_di= gest(pos) { > + in_index.insert(proxmox::tools::digest_to_hex(= index_chunk_digest)); > + } > + } > + } > + you could move below if hunk in the for loop's if above, avoiding the nee= d for any hashset > + if in_index.contains(digest_str) { > + references.push(entry.path().to_string_lossy().into_ow= ned()); > + } > + } > + referenced_by =3D Some(references); > + } > + > + if let Some(decode_output_path) =3D decode_output_path { > + if to_stdout { > + decode_blob(None, key_file_path, Some(&digest_raw), &blob)= ?; > + } else { > + decode_blob( > + Some(decode_output_path), > + key_file_path, > + Some(&digest_raw), > + &blob, > + )?; > + } > + } Above 12 lines could be reduced with something like (untested): if decode_output_path.is_some() { let decode_output_path =3D if to_stdout { None } else { decode_outpu= t_path }; decode_blob(decode_output_path, key_file_path, Some(&digest_raw), &bl= ob)?; } > + > + let crc_status =3D format!( > + "{}({})", > + blob.compute_crc(), > + blob.verify_crc().map_or("NOK", |_x| "OK") rather use just _ if unused param > + ); > + > + let val =3D match referenced_by { > + Some(references) =3D> json!({ > + "digest": digest_str, > + "crc": crc_status, > + "encryption": blob.crypt_mode()?, > + "referenced-by": references > + }), > + None =3D> json!({ > + "digest": digest_str, > + "crc": crc_status, > + "encryption": blob.crypt_mode()?, > + }), > + }; > + > + format_and_print_result(&val, &output_format); > + Ok(Value::Null) > +} > + > +pub fn inspect_commands() -> CommandLineInterface { > + let cmd_def =3D CliCommandMap::new().insert("chunk", CliCommand::n= ew(&API_METHOD_INSPECT_CHUNK)); > + > + cmd_def.into() > +} > diff --git a/src/bin/proxmox_backup_recovery/mod.rs b/src/bin/proxmox_b= ackup_recovery/mod.rs > new file mode 100644 > index 00000000..644583db > --- /dev/null > +++ b/src/bin/proxmox_backup_recovery/mod.rs > @@ -0,0 +1,2 @@ > +mod inspect; > +pub use inspect::*; >=20