From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id C718F6312C for ; Tue, 9 Feb 2021 11:36:24 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id BF2602BC1E for ; Tue, 9 Feb 2021 11:36:24 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id F11C22BC14 for ; Tue, 9 Feb 2021 11:36:23 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id A5C1345F8C for ; Tue, 9 Feb 2021 11:36:23 +0100 (CET) Date: Tue, 9 Feb 2021 11:36:22 +0100 From: Wolfgang Bumiller To: Hannes Laimer Cc: pbs-devel@lists.proxmox.com Message-ID: <20210209103622.hh642wejc7djz56y@olga.proxmox.com> References: <20210205075806.888558-1-h.laimer@proxmox.com> <20210205075806.888558-4-h.laimer@proxmox.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20210205075806.888558-4-h.laimer@proxmox.com> User-Agent: NeoMutt/20180716 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.045 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [recover.rs, proxmox-backup-debug.rs, mod.rs] Subject: Re: [pbs-devel] [PATCH v3 proxmox-backup 3/3] add index recovery to pb-debug X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 09 Feb 2021 10:36:24 -0000 On Fri, Feb 05, 2021 at 08:58:06AM +0100, Hannes Laimer wrote: > Adds possibility to recover data from an index file. Options: > - chunks: path to the directory where the chunks are saved > - file: the index file that should be recovered(must be either .fidx or > didx) > - [opt] keyfile: path to a keyfile, if the data was encrypted, a keyfile is > needed > - [opt] skip-crc: boolean, if true, read chunks wont be verified with their > crc-sum, increases the restore speed by a lot > > Signed-off-by: Hannes Laimer > --- > > src/bin/proxmox-backup-debug.rs | 6 +- > src/bin/proxmox_backup_debug/mod.rs | 2 + > src/bin/proxmox_backup_debug/recover.rs | 109 ++++++++++++++++++++++++ > 3 files changed, 115 insertions(+), 2 deletions(-) > create mode 100644 src/bin/proxmox_backup_debug/recover.rs > > diff --git a/src/bin/proxmox-backup-debug.rs b/src/bin/proxmox-backup-debug.rs > index 079b2bc8..8bd01a59 100644 > --- a/src/bin/proxmox-backup-debug.rs > +++ b/src/bin/proxmox-backup-debug.rs > @@ -22,7 +22,9 @@ pub const KEYFILE_SCHEMA: Schema = StringSchema::new( > fn main() { > proxmox_backup::tools::setup_safe_path_env(); > > - let cmd_def = CliCommandMap::new().insert("inspect", inspect_commands()); > + let cmd_def = CliCommandMap::new() > + .insert("inspect", inspect_commands()) > + .insert("recover", recover_commands()); > > let rpcenv = CliEnvironment::new(); > run_cli_command( > @@ -30,4 +32,4 @@ fn main() { > rpcenv, > Some(|future| proxmox_backup::tools::runtime::main(future)), > ); > -} > +} > \ No newline at end of file > diff --git a/src/bin/proxmox_backup_debug/mod.rs b/src/bin/proxmox_backup_debug/mod.rs > index 644583db..62df7754 100644 > --- a/src/bin/proxmox_backup_debug/mod.rs > +++ b/src/bin/proxmox_backup_debug/mod.rs > @@ -1,2 +1,4 @@ > mod inspect; > pub use inspect::*; > +mod recover; > +pub use recover::*; > diff --git a/src/bin/proxmox_backup_debug/recover.rs b/src/bin/proxmox_backup_debug/recover.rs > new file mode 100644 > index 00000000..61025adf > --- /dev/null > +++ b/src/bin/proxmox_backup_debug/recover.rs > @@ -0,0 +1,109 @@ > +use std::fs::File; > +use std::io::{Read, Write}; > +use std::path::Path; > + > +use anyhow::{bail, format_err, Error}; > + > +use proxmox::api::api; > +use proxmox::api::cli::{CliCommand, CliCommandMap, CommandLineInterface}; > +use serde_json::Value; > + > +use proxmox_backup::backup::{ > + load_and_decrypt_key, CryptConfig, DataBlob, DynamicIndexReader, FixedIndexReader, IndexFile, > +}; > +use proxmox_backup::tools; > + > +use crate::{get_encryption_key_password, KEYFILE_SCHEMA, PATH_SCHEMA}; > + > +use proxmox::tools::digest_to_hex; > + > +use std::time::Instant; > + > +#[api( > + input: { > + properties: { > + file: { > + schema: PATH_SCHEMA, > + }, > + chunks: { > + schema: PATH_SCHEMA, > + }, > + "keyfile": { > + schema: KEYFILE_SCHEMA, > + optional: true, > + }, > + "skip-crc": { > + type: Boolean, > + optional: true, > + default: false, > + description: "Skip the crc verification, increases the restore speed immensely", > + } > + } > + } > +)] > +/// Recover a index file 'an' Perhapse add some more information of what this actually does and particularly what its limitations are. > +fn recover_index(skip_crc: bool, param: Value) -> Result { > + let start = Instant::now(); > + let file_path = Path::new(tools::required_string_param(¶m, "file")?); > + let chunks_path = Path::new(tools::required_string_param(¶m, "chunks")?); > + > + let key_file_param = param["keyfile"].as_str(); > + let key_file_path = key_file_param.map(|path| Path::new(path)); > + > + let index: Box = match file_path.extension() { > + Some(ext) if ext.eq("fidx") => Box::new( > + FixedIndexReader::open(file_path) > + .map_err(|e| format_err!("could not read index - {}", e))?, > + ), > + Some(ext) if ext.eq("didx") => Box::new( > + DynamicIndexReader::open(file_path) > + .map_err(|e| format_err!("could not read index - {}", e))?, > + ), > + _ => bail!("index file must either be a .fidx or a .didx file"), > + }; Come to think of it, we do have magic bytes at the top of all of these file types, so shouldn't a *debug* binary use *that* instead of the file's *name*? ;-) *And* perhaps warn if the name doesn't match its type... So for this we could just have a helper `fn detect_file_type() -> FileType`, which we could use in all the other functions of this binary as well. > + > + let mut crypt_conf_opt = None; > + let mut crypt_conf; > + > + let output_filename = file_path.file_stem().unwrap().to_str().unwrap(); > + let output_path = Path::new(output_filename); > + let mut output_file = File::create(output_path) > + .map_err(|e| format_err!("could not create output file - {}", e))?; > + I feel like the code below should already exist somewhere in some half-way reusable form. It might be worth trying to instantiate a `LocalDataStore` and use its `read_chunk` method, but I'm not sure if it helps much. Plus it wouldn't reuse the buffer... > + for pos in 0..index.index_count() { > + let chunk_digest = index.index_digest(pos).unwrap(); > + let digest_str = digest_to_hex(chunk_digest); > + let digest_prefix = &digest_str[0..4]; > + let chunk_path = chunks_path.join(digest_prefix).join(digest_str); > + let mut chunk_file = std::fs::File::open(&chunk_path) > + .map_err(|e| format_err!("could not open chunk file - {}", e))?; > + > + let mut data = Vec::with_capacity(1024 * 1024); ...but neither do you right now ;-) Please move this above the loop and just use `.clear()` here (which does not deallocate). > + chunk_file.read_to_end(&mut data)?; > + let chunk_blob = DataBlob::from_raw(data)?; > + > + if !skip_crc { > + chunk_blob.verify_crc()?; > + } > + > + if key_file_path.is_some() && chunk_blob.is_encrypted() && crypt_conf_opt.is_none() { > + let (key, _created, _fingerprint) = > + load_and_decrypt_key(&key_file_path.unwrap(), &get_encryption_key_password)?; > + crypt_conf = CryptConfig::new(key)?; > + crypt_conf_opt = Some(&crypt_conf); > + } > + > + output_file.write_all( > + chunk_blob > + .decode(crypt_conf_opt, Some(chunk_digest))? > + .as_slice(), > + )?; > + } > + println!("{} sec.", start.elapsed().as_secs_f32()); As a frequent user of the `time` shell command I'm wondering if this is really something I want to always see? ;-) > + Ok(Value::Null) > +} > + > +pub fn recover_commands() -> CommandLineInterface { > + let cmd_def = CliCommandMap::new().insert("index", CliCommand::new(&API_METHOD_RECOVER_INDEX)); > + cmd_def.into() > +} > -- > 2.20.1