From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 4313BB80B2 for ; Wed, 6 Mar 2024 16:50:17 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 29D291B9E9 for ; Wed, 6 Mar 2024 16:49:47 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Wed, 6 Mar 2024 16:49:44 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id CF4AC438B0 for ; Wed, 6 Mar 2024 16:49:43 +0100 (CET) Message-ID: <9a4d9e4c-3175-44a4-aa02-61e68b2c4b2a@proxmox.com> Date: Wed, 6 Mar 2024 16:49:38 +0100 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird Content-Language: en-US To: Proxmox Backup Server development discussion , Filip Schauer References: <20240305135645.96347-1-f.schauer@proxmox.com> <20240305135645.96347-6-f.schauer@proxmox.com> From: Max Carrara In-Reply-To: <20240305135645.96347-6-f.schauer@proxmox.com> Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.003 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: Re: [pbs-devel] [PATCH v4 vma-to-pbs 5/6] Add support for streaming the VMA file via stdin X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 06 Mar 2024 15:50:17 -0000 On 3/5/24 14:56, Filip Schauer wrote: > This allows the user to stream a compressed VMA file directly to a PBS > without the need to extract it to an intermediate .vma file. > > Example usage: > > zstd -d --stdout vzdump.vma.zstd | vma-to-pbs \ > --repository \ > --vmid 123 \ > --password_file pbs_password > > Signed-off-by: Filip Schauer > --- > src/main.rs | 232 +-------------------------------- > src/vma.rs | 270 ++++++++++++++------------------------ > src/vma2pbs.rs | 348 +++++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 455 insertions(+), 395 deletions(-) > create mode 100644 src/vma2pbs.rs > > diff --git a/src/main.rs b/src/main.rs > index b58387b..dc8171e 100644 > --- a/src/main.rs > +++ b/src/main.rs > @@ -1,235 +1,14 @@ > extern crate anyhow; > extern crate clap; > -extern crate proxmox_backup_qemu; > -extern crate proxmox_io; > extern crate proxmox_sys; > -extern crate scopeguard; As mentioned in my previous comments, the remaining `extern crate` statements here are no longer necessary since edition 2018 [0]. > > -use std::env; > -use std::ffi::{c_char, CStr, CString}; > -use std::ptr; > -use std::time::{SystemTime, UNIX_EPOCH}; > - > -use anyhow::{anyhow, Context, Result}; > +use anyhow::Result; > use clap::{command, Arg, ArgAction}; > -use proxmox_backup_qemu::*; > use proxmox_sys::linux::tty; > -use scopeguard::defer; > > mod vma; > -use vma::*; > - > -fn backup_vma_to_pbs( > - vma_file_path: String, > - pbs_repository: String, > - backup_id: String, > - pbs_password: String, > - keyfile: Option, > - key_password: Option, > - master_keyfile: Option, > - fingerprint: String, > - compress: bool, > - encrypt: bool, > -) -> Result<()> { > - println!("VMA input file: {}", vma_file_path); > - println!("PBS repository: {}", pbs_repository); > - println!("PBS fingerprint: {}", fingerprint); > - println!("compress: {}", compress); > - println!("encrypt: {}", encrypt); > - > - let backup_time = SystemTime::now() > - .duration_since(UNIX_EPOCH) > - .unwrap() > - .as_secs(); > - println!("backup time: {}", backup_time); > - > - let mut pbs_err: *mut c_char = ptr::null_mut(); > - > - let pbs_repository_cstr = CString::new(pbs_repository).unwrap(); > - let backup_id_cstr = CString::new(backup_id).unwrap(); > - let pbs_password_cstr = CString::new(pbs_password).unwrap(); > - let fingerprint_cstr = CString::new(fingerprint).unwrap(); > - let keyfile_cstr = keyfile.map(|v| CString::new(v).unwrap()); > - let keyfile_ptr = keyfile_cstr.map(|v| v.as_ptr()).unwrap_or(ptr::null()); > - let key_password_cstr = key_password.map(|v| CString::new(v).unwrap()); > - let key_password_ptr = key_password_cstr.map(|v| v.as_ptr()).unwrap_or(ptr::null()); > - let master_keyfile_cstr = master_keyfile.map(|v| CString::new(v).unwrap()); > - let master_keyfile_ptr = master_keyfile_cstr > - .map(|v| v.as_ptr()) > - .unwrap_or(ptr::null()); > - > - let pbs = proxmox_backup_new_ns( > - pbs_repository_cstr.as_ptr(), > - ptr::null(), > - backup_id_cstr.as_ptr(), > - backup_time, > - PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE, > - pbs_password_cstr.as_ptr(), > - keyfile_ptr, > - key_password_ptr, > - master_keyfile_ptr, > - true, > - false, > - fingerprint_cstr.as_ptr(), > - &mut pbs_err, > - ); > - > - defer! { > - proxmox_backup_disconnect(pbs); > - } > - > - if pbs == ptr::null_mut() { > - unsafe { > - let pbs_err_cstr = CStr::from_ptr(pbs_err); > - return Err(anyhow!("proxmox_backup_new_ns failed: {pbs_err_cstr:?}")); > - } > - } > - > - let connect_result = proxmox_backup_connect(pbs, &mut pbs_err); > - > - if connect_result < 0 { > - unsafe { > - let pbs_err_cstr = CStr::from_ptr(pbs_err); > - return Err(anyhow!("proxmox_backup_connect failed: {pbs_err_cstr:?}")); > - } > - } > - > - let mut vma_reader = VmaReader::new(&vma_file_path)?; > - > - // Handle configs > - let configs = vma_reader.get_configs(); > - for (config_name, config_data) in configs { > - println!("CFG: size: {} name: {}", config_data.len(), config_name); > - > - let config_name_cstr = CString::new(config_name).unwrap(); > - > - if proxmox_backup_add_config( > - pbs, > - config_name_cstr.as_ptr(), > - config_data.as_ptr(), > - config_data.len() as u64, > - &mut pbs_err, > - ) < 0 > - { > - unsafe { > - let pbs_err_cstr = CStr::from_ptr(pbs_err); > - return Err(anyhow!( > - "proxmox_backup_add_config failed: {pbs_err_cstr:?}" > - )); > - } > - } > - } > - > - // Handle block devices > - for device_id in 0..255 { > - let device_name = match vma_reader.get_device_name(device_id) { > - Some(x) => x, > - None => { > - continue; > - } > - }; > - > - let device_size = match vma_reader.get_device_size(device_id) { > - Some(x) => x, > - None => { > - continue; > - } > - }; > - > - println!( > - "DEV: dev_id={} size: {} devname: {}", > - device_id, device_size, device_name > - ); > - > - let device_name_cstr = CString::new(device_name).unwrap(); > - let pbs_device_id = proxmox_backup_register_image( > - pbs, > - device_name_cstr.as_ptr(), > - device_size, > - false, > - &mut pbs_err, > - ); > - > - if pbs_device_id < 0 { > - unsafe { > - let pbs_err_cstr = CStr::from_ptr(pbs_err); > - return Err(anyhow!( > - "proxmox_backup_register_image failed: {pbs_err_cstr:?}" > - )); > - } > - } > - > - let mut image_chunk_buffer = > - proxmox_io::boxed::zeroed(PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE as usize); > - let mut bytes_transferred = 0; > - > - while bytes_transferred < device_size { > - let bytes_left = device_size - bytes_transferred; > - let chunk_size = bytes_left.min(PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE); > - println!( > - "Uploading dev_id: {} offset: {:#0X} - {:#0X}", > - device_id, > - bytes_transferred, > - bytes_transferred + chunk_size > - ); > - > - let is_zero_chunk = vma_reader > - .read_device_contents( > - device_id, > - &mut image_chunk_buffer[0..chunk_size as usize], > - bytes_transferred, > - ) > - .with_context(|| { > - format!( > - "read {} bytes at offset {} from disk {} from VMA file", > - chunk_size, bytes_transferred, device_id > - ) > - })?; > - > - let write_data_result = proxmox_backup_write_data( > - pbs, > - pbs_device_id as u8, > - if is_zero_chunk { > - ptr::null() > - } else { > - image_chunk_buffer.as_ptr() > - }, > - bytes_transferred, > - chunk_size, > - &mut pbs_err, > - ); > - > - if write_data_result < 0 { > - unsafe { > - let pbs_err_cstr = CStr::from_ptr(pbs_err); > - return Err(anyhow!( > - "proxmox_backup_write_data failed: {pbs_err_cstr:?}" > - )); > - } > - } > - > - bytes_transferred += chunk_size; > - } > - > - if proxmox_backup_close_image(pbs, pbs_device_id as u8, &mut pbs_err) < 0 { > - unsafe { > - let pbs_err_cstr = CStr::from_ptr(pbs_err); > - return Err(anyhow!( > - "proxmox_backup_close_image failed: {pbs_err_cstr:?}" > - )); > - } > - } > - } > - > - if proxmox_backup_finish(pbs, &mut pbs_err) < 0 { > - unsafe { > - let pbs_err_cstr = CStr::from_ptr(pbs_err); > - return Err(anyhow!("proxmox_backup_finish failed: {pbs_err_cstr:?}")); > - } > - } > - > - Ok(()) > -} > +mod vma2pbs; > +use vma2pbs::backup_vma_to_pbs; > > fn main() -> Result<()> { > let matches = command!() > @@ -307,7 +86,8 @@ fn main() -> Result<()> { > let compress = matches.get_flag("compress"); > let encrypt = matches.get_flag("encrypt"); > > - let vma_file_path = matches.get_one::("vma_file").unwrap().to_string(); > + let vma_file_path = matches.get_one::("vma_file"); You got rid of the `unwrap()`, good! > + > let password_file = matches.get_one::("password_file"); > > let pbs_password = match password_file { > @@ -331,7 +111,7 @@ fn main() -> Result<()> { > }; > > backup_vma_to_pbs( > - vma_file_path, > + vma_file_path.cloned(), > pbs_repository, > vmid, > pbs_password, > diff --git a/src/vma.rs b/src/vma.rs > index 5ec3822..de3045d 100644 > --- a/src/vma.rs > +++ b/src/vma.rs > @@ -2,10 +2,9 @@ extern crate anyhow; > extern crate md5; > > use std::collections::HashMap; > -use std::fs::File; > -use std::io::{Read, Seek, SeekFrom}; > +use std::io::Read; > use std::mem::size_of; > -use std::{cmp, str}; > +use std::str; > > use anyhow::{anyhow, Result}; > use bincode::Options; > @@ -45,6 +44,8 @@ struct VmaHeader { > reserved1: [u8; 4], > #[serde(with = "BigArray")] > pub dev_info: [VmaDeviceInfoHeader; VMA_MAX_DEVICES], > + #[serde(skip_deserializing, skip_serializing)] > + blob_buffer: Vec, > } > > #[repr(C)] > @@ -68,52 +69,35 @@ struct VmaExtentHeader { > pub blockinfo: [VmaBlockInfo; VMA_BLOCKS_PER_EXTENT], > } > > -#[derive(Clone)] > -struct VmaBlockIndexEntry { > - pub cluster_file_offset: u64, > - pub mask: u16, > -} > - > pub struct VmaReader { > - vma_file: File, > + vma_file: Box, > vma_header: VmaHeader, > configs: HashMap, > - block_index: Vec>, > - blocks_are_indexed: bool, > } > > impl VmaReader { > - pub fn new(vma_file_path: &str) -> Result { > - let mut vma_file = match File::open(vma_file_path) { > - Err(why) => return Err(anyhow!("couldn't open {}: {}", vma_file_path, why)), > - Ok(file) => file, > - }; > - > - let vma_header = Self::read_header(&mut vma_file).unwrap(); > - let configs = Self::read_blob_buffer(&mut vma_file, &vma_header).unwrap(); > - let block_index: Vec> = (0..256).map(|_| Vec::new()).collect(); > + pub fn new(mut vma_file: impl Read + 'static) -> Result { Why use `impl Read + 'static`? This makes the function implicitly generic. That can sometimes be totally fine, but is there any benefit you get from monomorphization in this case? Furthermore, `VmaReader::vma_file` is `Box` - why have that one dynamically dispatched, and the other one not? > + let vma_header = Self::read_header(&mut vma_file)?; > + let configs = Self::read_configs(&vma_header)?; > > let instance = Self { > - vma_file, > + vma_file: Box::new(vma_file), ... especially if you `Box` it here anyway. > vma_header, > configs, > - block_index, > - blocks_are_indexed: false, > }; > > Ok(instance) > } > > - fn read_header(vma_file: &mut File) -> Result { > - let mut buffer = Vec::with_capacity(size_of::()); > - buffer.resize(size_of::(), 0); > + fn read_header(vma_file: &mut impl Read) -> Result { Same as above here - is there any benefit from being generic over `Read` here? > + let mut buffer = vec![0; 12288]; What is this size? Should be a constant. > vma_file.read_exact(&mut buffer)?; > > let bincode_options = bincode::DefaultOptions::new() > .with_fixint_encoding() > .with_big_endian(); > > - let vma_header: VmaHeader = bincode_options.deserialize(&buffer)?; > + let mut vma_header: VmaHeader = bincode_options.deserialize(&buffer)?; > > if vma_header.magic != [b'V', b'M', b'A', 0] { As I commented on a previous patch, this array should also be a constant. > return Err(anyhow!("Invalid magic number")); Use `anyhow::bail`. [4] > @@ -124,7 +108,7 @@ impl VmaReader { > } > > buffer.resize(vma_header.header_size as usize, 0); > - vma_file.read_exact(&mut buffer[size_of::()..])?; > + vma_file.read_exact(&mut buffer[12288..])?; Constant? ;) > > // Fill the MD5 sum field with zeros to compute the MD5 sum > buffer[32..48].fill(0); > @@ -134,89 +118,77 @@ impl VmaReader { > return Err(anyhow!("Wrong VMA header checksum")); > }> > - return Ok(vma_header); > - } > - > - fn read_string_from_file(vma_file: &mut File, file_offset: u64) -> Result { > - let mut size_bytes = [0u8; 2]; > - vma_file.seek(SeekFrom::Start(file_offset))?; > - vma_file.read_exact(&mut size_bytes)?; > - let size = u16::from_le_bytes(size_bytes) as usize; > - let mut string_bytes = Vec::with_capacity(size - 1); > - string_bytes.resize(size - 1, 0); > - vma_file.read_exact(&mut string_bytes)?; > - let string = str::from_utf8(&string_bytes)?; > + let blob_buffer = &buffer[12288..vma_header.header_size as usize]; Use a constant here too, please. > + vma_header.blob_buffer = blob_buffer.to_vec(); > > - return Ok(string.to_string()); > + Ok(vma_header) > } > > - fn read_blob_buffer( > - vma_file: &mut File, > - vma_header: &VmaHeader, > - ) -> Result> { > + fn read_configs(vma_header: &VmaHeader) -> Result> { I assume config values are key-value pairs. Would it make sense to encapsulate this map in a separate struct to make the code more expressive? > let mut configs = HashMap::new(); > > for i in 0..VMA_MAX_CONFIGS { > - let config_name_offset = vma_header.config_names[i]; > - let config_data_offset = vma_header.config_data[i]; > + let config_name_offset = vma_header.config_names[i] as usize; > + let config_data_offset = vma_header.config_data[i] as usize; > > if config_name_offset == 0 || config_data_offset == 0 { > continue; > } > > - let config_name_file_offset = > - (vma_header.blob_buffer_offset + config_name_offset) as u64; > - let config_data_file_offset = > - (vma_header.blob_buffer_offset + config_data_offset) as u64; > - let config_name = Self::read_string_from_file(vma_file, config_name_file_offset)?; > - let config_data = Self::read_string_from_file(vma_file, config_data_file_offset)?; > - > - configs.insert(String::from(config_name), String::from(config_data)); The code starting here ... > + let config_name_size_bytes = > + vma_header.blob_buffer[config_name_offset..config_name_offset + 2].try_into()?; > + let config_name_size = u16::from_le_bytes(config_name_size_bytes) as usize; > + let config_name_bytes = &vma_header.blob_buffer > + [config_name_offset + 2..config_name_offset + 1 + config_name_size]; > + let config_name = str::from_utf8(config_name_bytes); ... and ending here looks suspiciously similar .. .. to the code starting here ... > + let config_data_size_bytes = > + vma_header.blob_buffer[config_data_offset..config_data_offset + 2].try_into()?; > + let config_data_size = u16::from_le_bytes(config_data_size_bytes) as usize; > + let config_data_bytes = &vma_header.blob_buffer > + [config_data_offset + 2..config_data_offset + 1 + config_data_size]; > + let config_data = str::from_utf8(config_data_bytes); ... and ending here. Should be a helper function with a docstring, IMO. Especially since there's a lot of stuff going on - the indexing of the buffers in particular could use a description. > + > + configs.insert(String::from(config_name?), String::from(config_data?)); Why `?` here on the variable names themselves? Why not earlier? Are these names always UTF-8 encoded? > } > > - return Ok(configs); > + Ok(configs) > } > > pub fn get_configs(&self) -> HashMap { > - return self.configs.clone(); > + self.configs.clone() > } > > - pub fn get_device_name(&mut self, device_id: usize) -> Option { > - if device_id >= VMA_MAX_DEVICES { > - return None; > - } > - > - let device_name_offset = self.vma_header.dev_info[device_id].device_name_offset; > + pub fn get_device_name(&self, device_id: u8) -> Option { Could use a `type VmaDeviceId = u8;` declared at the top of the file here. > + let device_name_offset = > + self.vma_header.dev_info[device_id as usize].device_name_offset as usize; > > if device_name_offset == 0 { > return None; > } > > - let device_name_file_offset = > - (self.vma_header.blob_buffer_offset + device_name_offset) as u64; > - let device_name = > - Self::read_string_from_file(&mut self.vma_file, device_name_file_offset).unwrap(); > + let size_bytes = self.vma_header.blob_buffer[device_name_offset..device_name_offset + 2] > + .try_into() > + .unwrap(); Is it safe to `unwrap()` here? If yes, add a comment why, if not, add `anyhow::Context` and abort early with `?` otherwise. > + let size = u16::from_le_bytes(size_bytes) as usize; > + let device_name_bytes = > + &self.vma_header.blob_buffer[device_name_offset + 2..device_name_offset + 1 + size]; > + let device_name = str::from_utf8(device_name_bytes); > > - return Some(device_name.to_string()); > + Some(String::from(device_name.unwrap())) Is it safe to `unwrap()` here too? Same as above. Are device names always encoded in UTF-8? Also, this code looks suspiciously similar to the two blocks I marked above, so could probably use the same function here too. > } > > - pub fn get_device_size(&self, device_id: usize) -> Option { > - if device_id >= VMA_MAX_DEVICES { > - return None; > - } > - > - let dev_info = &self.vma_header.dev_info[device_id]; > + pub fn get_device_size(&self, device_id: u8) -> Option { `type VmaDeviceId = u8;` - same as above. Using `type VmaDeviceSize = u64;` for the return value would also be nice. > + let dev_info = &self.vma_header.dev_info[device_id as usize]; > > if dev_info.device_name_offset == 0 { > return None; > } > > - return Some(dev_info.device_size); > + Some(dev_info.device_size) > } > > - fn read_extent_header(vma_file: &mut File) -> Result { > - let mut buffer = Vec::with_capacity(size_of::()); > - buffer.resize(size_of::(), 0); > + fn read_extent_header(mut vma_file: impl Read) -> Result { Same as I mentioned before - is there any benefit to being generic here? > + let mut buffer = vec![0; size_of::()]; > vma_file.read_exact(&mut buffer)?; > > let bincode_options = bincode::DefaultOptions::new() > @@ -237,113 +209,73 @@ impl VmaReader { > return Err(anyhow!("Wrong VMA extent header checksum")); > } > > - return Ok(vma_extent_header); > + Ok(vma_extent_header) > } > > - fn index_device_clusters(&mut self) -> Result<()> { > - for device_id in 0..255 { > - let device_size = match self.get_device_size(device_id) { > - Some(x) => x, > - None => { > - continue; > - } > - }; > + fn restore_extent(&mut self, func: F) -> Result<()> > + where > + F: Fn(u8, u64, Option>) -> Result<()>, > + { A couple of things: What is `func` what what does it do? Should be described in a docstring. Regarding `F: Fn(u8, u64, Option>) -> Result<()>`: * Why `Fn` and not `FnOnce`? You call this with a closure later on. * What does the `u8` stand for? * What does the `u64` stand for? * What does the `Option>` stand for? You might benefit from type aliases here, so the signature reveals a little more about what`func` should actually be. > + let vma_extent_header = Self::read_extent_header(&mut self.vma_file)?; > > - let device_cluster_count = (device_size + 4096 * 16 - 1) / (4096 * 16); > + for i in 0..VMA_BLOCKS_PER_EXTENT { > + let blockinfo = &vma_extent_header.blockinfo[i]; > > - let block_index_entry_placeholder = VmaBlockIndexEntry { > - cluster_file_offset: 0, > - mask: 0, > - }; > - > - self.block_index[device_id] > - .resize(device_cluster_count as usize, block_index_entry_placeholder); > - } > + if blockinfo.dev_id == 0 { > + continue; > + } > > - let mut file_offset = self.vma_header.header_size as u64; > - let vma_file_size = self.vma_file.metadata()?.len(); > + let image_offset = 4096 * 16 * blockinfo.cluster_num as u64; Should use constants here. > + let is_zero = blockinfo.mask == 0; I'm usually in favour of assigning the result of conditional checks to variables first, but is that really necessary here? > > - while file_offset < vma_file_size { > - self.vma_file.seek(SeekFrom::Start(file_offset))?; > - let vma_extent_header = Self::read_extent_header(&mut self.vma_file)?; > - file_offset += size_of::() as u64; > + let image_chunk_buffer = if is_zero { It's only used here after all, and inlining it wouldn't make the code more complex at all. > + None > + } else { > + let mut image_chunk_buffer = vec![0; 4096 * 16]; Again, should use constants here. > > - for i in 0..VMA_BLOCKS_PER_EXTENT { > - let blockinfo = &vma_extent_header.blockinfo[i]; > + for j in 0..16 { What does "j" stand for? > + let block_mask = ((blockinfo.mask >> j) & 1) == 1; What does this `block_mask` mean exactly? It's a `bool`, so why not name it `does_match_mask`? > > - if blockinfo.dev_id == 0 { > - continue; > + if block_mask { > + self.vma_file.read_exact( > + &mut image_chunk_buffer[4096 * j as usize..4096 * (j + 1) as usize], Why does `image_chunk_buffer` need to be indexed the way it does? Could this perhaps be expressed better via an explicit `Range` that you define as a variable first? Or maybe define the bounds of the range instead first? E.g.: let start = 4096 * j as usize; let end = 4096 * (j + 1) as usize; > + )?; > + } else { > + image_chunk_buffer[4096 * j as usize..4096 * (j + 1) as usize].fill(0); You use the same range here as well. > + } > } > > - let block_index_entry = VmaBlockIndexEntry { > - cluster_file_offset: file_offset, > - mask: blockinfo.mask, > - }; > + Some(image_chunk_buffer) > + }; > > - self.block_index[blockinfo.dev_id as usize][blockinfo.cluster_num as usize] = > - block_index_entry; > - file_offset += blockinfo.mask.count_ones() as u64 * 4096; > - } > + func(blockinfo.dev_id, image_offset, image_chunk_buffer)?; So, `func` gets called here at the end, it gets a device ID, an offset, and a buffer. My first guess is that it's used to actually perform the restoring / conversion of the chunks - so why not make that clear from the beginning? Also, coming back to `F: Fn(u8, u64, Option>) -> Result<()>`: Why does the buffer have to be wrapped in an `Option`? Why not just use an empty buffer to represent that nothing was passed on? Also, `Vec` could probably be `&[u8]`, couldn't it? > } > > - self.blocks_are_indexed = true; > - > - return Ok(()); > + Ok(()) > } > > - pub fn read_device_contents( > - &mut self, > - device_id: usize, > - buffer: &mut [u8], > - offset: u64, > - ) -> Result { > - if device_id >= VMA_MAX_DEVICES { > - return Err(anyhow!("invalid device id {}", device_id)); > - } > - > - if offset % (4096 * 16) != 0 { > - return Err(anyhow!("offset is not aligned to 65536")); > - } > - > - // Make sure that the device clusters are already indexed > - if !self.blocks_are_indexed { > - self.index_device_clusters()?; > - } > - > - let this_device_block_index = &self.block_index[device_id]; > - let length = cmp::min( > - buffer.len(), > - this_device_block_index.len() * 4096 * 16 - offset as usize, > - ); > - let mut buffer_offset = 0; > - let mut buffer_is_zero = true; > - > - while buffer_offset < length { > - let block_index_entry = > - &this_device_block_index[(offset as usize + buffer_offset) / (4096 * 16)]; > - self.vma_file > - .seek(SeekFrom::Start(block_index_entry.cluster_file_offset))?; > - > - for i in 0..16 { > - if buffer_offset >= length { > - break; > - } > - > - let block_buffer_end = buffer_offset + cmp::min(length - buffer_offset, 4096); > - let block_mask = ((block_index_entry.mask >> i) & 1) == 1; > - > - if block_mask { > - self.vma_file > - .read_exact(&mut buffer[buffer_offset..block_buffer_end])?; > - buffer_is_zero = false; > - } else { > - buffer[buffer_offset..block_buffer_end].fill(0); > - } > - > - buffer_offset += 4096; > + pub fn restore(&mut self, func: F) -> Result<()> > + where > + F: Fn(u8, u64, Option>) -> Result<()>, > + { `func` returns here yet again. There are some more notes below. > + loop { > + match self.restore_extent(&func) { > + Ok(()) => {} > + Err(e) => match e.downcast_ref::() { > + Some(ioerr) => { > + if ioerr.kind() == std::io::ErrorKind::UnexpectedEof { > + break; Why is it fine to `break` on that error explicitly? Could use a comment, perhaps. > + } else { > + return Err(anyhow!("Failed to read VMA file: {}", ioerr)); Use `anyhow::format_err` instead of `anyhow::anyhow`, as we use the former by convention. Futhermore, to really levearge `anyhow` here, wrap the original error and provide some `anyhow::Context`: return Err(format_err!(ioerr)).context("failed to read VMA file"); > + } > + } > + _ => { > + return Err(anyhow!("{}", e)); Same as above. > + } > + }, > } > } > > - return Ok(buffer_is_zero); > + Ok(()) > } > } Some more thoughts `func` and `F: Fn(u8, u64, Option>) -> Result<()>`: Unless it makes sense to be generic in this context, it's better to define a type alias for the function itself (and use some more aliases for the other types), like so: type DeviceId = u8; type ImageOffset = u64; type RestoreHandler = Box Result<()>>; That is much more expressive and also allows you to get rid of the generic. Putting it in a `Box` is less overhead than you think, because you usually need to to that once anyway. You can still use freestanding functions instead of closures, too: fn do_the_restoration(id: DeviceId, offset: ImageOffset, buf: &[u8]) -> Result<()> { todo!() } // ... let handler: RestoreHandler = Box::new(do_the_restoration); Since `RestoreHandler` is constrained to `Fn`, you can then pass it by reference: pub fn restore(&mut self, handler: &RestoreHandler) -> Result<()> { // ... } > diff --git a/src/vma2pbs.rs b/src/vma2pbs.rs > new file mode 100644 > index 0000000..2451bff > --- /dev/null > +++ b/src/vma2pbs.rs > @@ -0,0 +1,348 @@ > +extern crate anyhow; > +extern crate proxmox_backup_qemu; > +extern crate scopeguard; > + > +use std::cell::RefCell; > +use std::collections::HashMap; > +use std::ffi::{c_char, CStr, CString}; > +use std::fs::File; > +use std::io::{stdin, BufRead, BufReader}; > +use std::ptr; > +use std::time::{SystemTime, UNIX_EPOCH}; > + > +use anyhow::{anyhow, Result}; > +use proxmox_backup_qemu::*; Please refrain from using glob imports; while they're convenient, they can sometimes lead to name collisions. I know that's not the case here, but it's still something to keep in mind. One good use of glob imports would be e.g. importing a so-called "prelude". I'm sure you've heard of it before, but I'd still like to explain it here. A prelude makes it easier to import many common traits (and types) all at once, because in one way or another, you'll end up using them anyway. A good example of this is `std::io::prelude`. [1] There's also an implicit prelude that's automatically used everywhere. [2] > +use scopeguard::defer; > + > +use crate::vma::*; Same here. > + > +const VMA_CLUSTER_SIZE: usize = 65536; > + As mentioned in my comments regarding patch 1, there are several things about the function below that I'd like to note: 1. The function has way too many arguments. `cargo clippy` would emit a warning here, but unfortunately I wasn't able to actually build the project (see my reply to your cover letter). Instead, (as I mentioned before) this function should take a struct that represents its arguments, as that makes it vastly more readable and also makes it much easier to add additional parameters later on if necessary. It's even better if you implement some builder pattern around it - e.g. *required* arguments need to be put into `::new()`, additional arguments / flags can be initialized with `::default()` and specified via setters if necessary. Note that you don't have to go all-in on this - a simple struct usually is enough. Even if it seems boiler-platey, it's going to save you and others a lot of time refactoring, should this function's signature ever change (from what I've experienced, these things *do* eventually change sooner or later in our codebase, in general). I also realize we have such functions floating around in other places, but IMHO there's no need to continue that pattern. 2. The body of this function really needs to be broken up into seveal smaller pieces. There are some things that will look jarring either way (see inline comments) but to make reading, debugging, error handling and many other things easier, something like this should always be broken up into smaller parts IMHO. I'd like to note that we have giant functions like this in other places, but I personally think that we shouldn't continue going down that route any further (but that's a discussion for another time). See comments inline for things that could maybe be put into separate functions. 3. As the function is now `pub`, it implicitly defines an API (boundary). You should therefore take several things into special consideration: a. "Will others use that function outside of this crate?" If the answer is "no", then you should mark it as `pub(crate)`. This also makes the following points less important, but you should still consider them regardless. b. "How can I guarantee that this function's signature will remain stable?" This is why I suggested to use an arg-struct earlier - should a new `Option`al parameter be added, call sites won't have to change. Otherwise, you'd need to explicitly pass on `None` everywhere the function is used. If the function's signature is less likely to change, it's also more friendly for semver [3]. c. "Is it necessary to use owned types like `String`, `Vec`, etc.?" Unless owned types are required, it's more flexible to use `&str` instead of `String`, `&[T]` instead of `Vec`, etc. That means you can use both the owned variant and the borrowed one at call sites, making the function overall friendlier yet again. d. "Can this function fail? If yes, would it benefit from concrete error types?" Since this is something that can fail in *many* numerous and different ways, it's not necessary here - but it's something to keep in mind for library code e. "Are my defaults semver-safe [3]?" This is something that's often overlooked, but changing default parameters actually does break semver. E.g. it makes a huge difference if `compress` was set to `false`, but now it's `true` by default. This can affect callers in many unexpected ways, so make sure to choose your defaults very carefully. (It's also one of the reasons why Rust doesn't allow default values in functions, AFAIK.) Relevant if you go down the arg-struct route. There are probably many more things to think of, but these things immediately came to my mind, as they're IMO the most important things to consider. > +pub fn backup_vma_to_pbs( > + vma_file_path: Option, Should be a `std::path::Path` or `PathBuf`. > + pbs_repository: String, > + backup_id: String, > + pbs_password: String, > + keyfile: Option, > + key_password: Option, > + master_keyfile: Option, > + fingerprint: String, > + compress: bool, > + encrypt: bool, > +) -> Result<()> { > + println!("PBS repository: {}", pbs_repository); > + println!("PBS fingerprint: {}", fingerprint); > + println!("compress: {}", compress); > + println!("encrypt: {}", encrypt); > + The code from here ... > + let backup_time = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs(); > + println!("backup time: {}", backup_time); > + > + let mut pbs_err: *mut c_char = ptr::null_mut(); > + > + let pbs_repository_cstr = CString::new(pbs_repository)?; > + let backup_id_cstr = CString::new(backup_id)?; > + let pbs_password_cstr = CString::new(pbs_password)?; > + let fingerprint_cstr = CString::new(fingerprint)?; > + let keyfile_cstr = keyfile.map(|v| CString::new(v).unwrap()); > + let keyfile_ptr = keyfile_cstr.map(|v| v.as_ptr()).unwrap_or(ptr::null()); > + let key_password_cstr = key_password.map(|v| CString::new(v).unwrap()); > + let key_password_ptr = key_password_cstr.map(|v| v.as_ptr()).unwrap_or(ptr::null()); > + let master_keyfile_cstr = master_keyfile.map(|v| CString::new(v).unwrap()); > + let master_keyfile_ptr = master_keyfile_cstr > + .map(|v| v.as_ptr()) > + .unwrap_or(ptr::null()); > + > + let pbs = proxmox_backup_new_ns( > + pbs_repository_cstr.as_ptr(), > + ptr::null(), > + backup_id_cstr.as_ptr(), > + backup_time, > + PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE, > + pbs_password_cstr.as_ptr(), > + keyfile_ptr, > + key_password_ptr, > + master_keyfile_ptr, > + true, > + false, > + fingerprint_cstr.as_ptr(), > + &mut pbs_err, > + ); ... to here could e.g. be made its own function - if you had an arg-struct here, it would be easier to pass around all the original arguments between such helpers. Furthermore, if you make that function return a `Result<..., anyhow::Error>` (just use `anyhow::Result`) you can then add `Context` more conveniently, e.g.: let pbs = pbs_new_backup_namespaced(&cli_args, backup_time, ps_err) .context("failed to create new backups"); .. or something similar. You could also avoid the `unwrap()`s that way and just `?` on failure when converting to a `CString`. > + > + defer! { > + proxmox_backup_disconnect(pbs); > + } > + > + if pbs.is_null() { > + unsafe { > + let pbs_err_cstr = CStr::from_ptr(pbs_err); > + return Err(anyhow!("proxmox_backup_new_ns failed: {pbs_err_cstr:?}")); Use `anyhow::bail` [4] instead here. > + } The `unsafe` block above should only contain the part that's actually `unsafe`, and since you're doing the same kind of error handling in multiple locations, you should put it into a separate *inner* helper function with a comment regarding safety --> Why is it safe to make a `CStr` from the `pbs_err` pointer? Is the pointer guaranteed to *not* be `NULL` when an error happens? You're using `ptr::null_mut()` above after all. Otherwise the helper function should maybe also contain a null check. > + } > + > + let connect_result = proxmox_backup_connect(pbs, &mut pbs_err); > + > + if connect_result < 0 { > + unsafe { > + let pbs_err_cstr = CStr::from_ptr(pbs_err); > + return Err(anyhow!("proxmox_backup_connect failed: {pbs_err_cstr:?}")); > + } > + } See above. > + > + println!("Connected to Proxmox Backup Server"); > + > + let vma_file: Box = match vma_file_path { > + Some(vma_file_path) => match File::open(vma_file_path) { > + Err(why) => return Err(anyhow!("Couldn't open file: {}", why)), Use `anyhow::bail`. [4] > + Ok(file) => Box::new(BufReader::new(file)), > + }, > + None => Box::new(BufReader::new(stdin())), > + }; Maybe `if let Some(...)` would be better here? Also, you could try moving the `Box::new(BufReader::new())` outside of the expression, if it makes the code more concise. > + let mut vma_reader = VmaReader::new(vma_file)?; Would it make sense to add `Context` here? > + > + let start_transfer_time = SystemTime::now(); > + > + // Handle configs What does "handling configs" entail? Should probably be a separate function plus docstring. > + let configs = vma_reader.get_configs(); > + for (config_name, config_data) in configs { > + println!("CFG: size: {} name: {}", config_data.len(), config_name); > + > + let config_name_cstr = CString::new(config_name)?; > + > + if proxmox_backup_add_config( > + pbs, > + config_name_cstr.as_ptr(), > + config_data.as_ptr(), > + config_data.len() as u64, > + &mut pbs_err, > + ) < 0 As you did above, you could assign the result of `proxmox_backup_add_config` to a variable first to make the if-clause a little less thick. > + { > + unsafe { > + let pbs_err_cstr = CStr::from_ptr(pbs_err); > + return Err(anyhow!( > + "proxmox_backup_add_config failed: {pbs_err_cstr:?}" > + )); > + } Use `anyhow::bail` [4] + helper to get the error string, as mentioned above. > + } > + } > + > + let mut pbs_device_id_map = [-1i32; 256]; > + let mut device_sizes = [0u64; 256]; Type aliases + constants for the sizes of these arrays here would be nice, e.g.: type DeviceSize = u64; const DEVICE_COUNT: usize = 256; > + > + // Handle block devices > + for device_id in 0..255 { Another place where a constant could be used - is it one you maybe have defined previously already somewhere? > + let device_name = match vma_reader.get_device_name(device_id) { > + Some(x) => x, > + None => continue, > + }; > + > + let device_size = match vma_reader.get_device_size(device_id) { > + Some(x) => x, > + None => continue, > + }; > + > + device_sizes[device_id as usize] = device_size; > + > + println!( > + "DEV: dev_id={} size: {} devname: {}", > + device_id, device_size, device_name > + ); > + > + let device_name_cstr = CString::new(device_name)?; > + let pbs_device_id = proxmox_backup_register_image( > + pbs, > + device_name_cstr.as_ptr(), > + device_size, > + false, > + &mut pbs_err, > + ); > + > + if pbs_device_id < 0 { > + unsafe { > + let pbs_err_cstr = CStr::from_ptr(pbs_err); > + return Err(anyhow!( > + "proxmox_backup_register_image failed: {pbs_err_cstr:?}" > + )); > + } Use `anyhow::bail` [4] + helper function, as mentioned above. > + } > + > + pbs_device_id_map[device_id as usize] = pbs_device_id; > + } Does it make sense to put this into a separate function as well? > + > + struct ImageChunk { > + sub_chunks: HashMap>>, > + mask: u64, > + non_zero_mask: u64, > + } > + > + let images_chunks: RefCell>> = > + RefCell::new(HashMap::new()); Would it be simpler to factor this nested `HashMap` into its own type that describes what it's actually representing? That would allow you to express what's going on later in this function much better. > + > + vma_reader.restore(|dev_id, offset, buffer| { > + let pbs_device_id = pbs_device_id_map[dev_id as usize]; > + > + if pbs_device_id < 0 { > + return Err(anyhow!( > + "Reference to unknown device id {} in VMA file", > + dev_id > + )); Use `anyhow::bail` [4]. > + } > + > + let mut images_chunks = images_chunks.borrow_mut(); > + let image_chunks = images_chunks.get_mut(&dev_id); > + let pbs_chunk_offset = > + PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE * (offset / PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE); > + let sub_chunk_index = > + ((offset % PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE) / VMA_CLUSTER_SIZE as u64) as u8; > + > + let pbs_chunk_size = > + PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE.min(device_sizes[dev_id as usize] - pbs_chunk_offset); While I trust that you calculations are fine, it would be nice if the above was more descriptive --> Separate function with docstring, perhaps? > + > + let pbs_upload_chunk = |pbs_chunk_buffer: Option<&[u8]>| { Can this closure be a function? > + println!( > + "Uploading dev_id: {} offset: {:#0X} - {:#0X}", > + dev_id, > + pbs_chunk_offset, > + pbs_chunk_offset + pbs_chunk_size, > + ); > + > + let mut pbs_err: *mut c_char = ptr::null_mut(); > + > + let write_data_result = proxmox_backup_write_data( > + pbs, > + pbs_device_id as u8, > + match pbs_chunk_buffer { > + Some(pbs_chunk_buffer) => pbs_chunk_buffer.as_ptr(), > + None => ptr::null(), > + }, > + pbs_chunk_offset, > + pbs_chunk_size, > + &mut pbs_err, > + ); > + > + if write_data_result < 0 { > + unsafe { > + let pbs_err_cstr = CStr::from_ptr(pbs_err); > + return Err(anyhow!( > + "proxmox_backup_write_data failed: {pbs_err_cstr:?}" > + )); > + } Use `anyhow::bail` [4] + helper for error message, as above. > + } > + > + Ok(()) > + }; > + > + let insert_image_chunk = |image_chunks: &mut HashMap, > + buffer: Option>| { Should probably also be a function instead of a closure? > + let mut sub_chunks: HashMap>> = HashMap::new(); > + let mask = 1 << sub_chunk_index; > + let non_zero_mask = buffer.is_some() as u64; > + sub_chunks.insert(sub_chunk_index, buffer); > + > + let image_chunk = ImageChunk { > + sub_chunks, > + mask, > + non_zero_mask, > + }; > + > + image_chunks.insert(pbs_chunk_offset, image_chunk); > + }; > + > + match image_chunks { > + Some(image_chunks) => { > + let image_chunk = image_chunks.get_mut(&pbs_chunk_offset); > + > + match image_chunk { > + Some(image_chunk) => { > + image_chunk.mask |= 1 << sub_chunk_index; > + image_chunk.non_zero_mask |= (buffer.is_some() as u64) << sub_chunk_index; > + image_chunk.sub_chunks.insert(sub_chunk_index, buffer); > + > + let sub_chunk_count = > + ((pbs_chunk_size + 65535) / VMA_CLUSTER_SIZE as u64) as u8; Didn't you declare a constant for that value above? > + let pbs_chunk_mask = 1_u64 > + .checked_shl(sub_chunk_count.into()) > + .unwrap_or(0) > + .wrapping_sub(1); > + > + if image_chunk.mask == pbs_chunk_mask { > + if image_chunk.non_zero_mask == 0 { > + pbs_upload_chunk(None)?; > + } else { > + let mut pbs_chunk_buffer = > + proxmox_io::boxed::zeroed(pbs_chunk_size as usize); > + > + for i in 0..sub_chunk_count { > + let sub_chunk = image_chunk.sub_chunks.get(&i).unwrap(); > + let a = i as usize * VMA_CLUSTER_SIZE; What is "a"? > + let b = (a + VMA_CLUSTER_SIZE).min(pbs_chunk_size as usize); What is "b"? > + > + match sub_chunk { > + Some(sub_chunk) => { > + pbs_chunk_buffer[a..b] > + .copy_from_slice(&sub_chunk[0..b - a]); > + } > + None => pbs_chunk_buffer[a..b].fill(0), > + } > + } > + > + pbs_upload_chunk(Some(&*pbs_chunk_buffer))?; > + } > + > + image_chunks.remove(&pbs_chunk_offset); > + } > + } > + None => { > + if pbs_chunk_size <= VMA_CLUSTER_SIZE as u64 { > + pbs_upload_chunk(buffer.as_deref())?; `anyhow::Context` here would be nice. Why can this fail? > + } else { > + insert_image_chunk(image_chunks, buffer); > + } > + } > + } > + } > + None => { > + if pbs_chunk_size <= VMA_CLUSTER_SIZE as u64 { > + pbs_upload_chunk(buffer.as_deref())?; `anyhow::Context` here, too. > + } else { > + let mut image_chunks: HashMap = HashMap::new(); > + insert_image_chunk(&mut image_chunks, buffer); > + images_chunks.insert(dev_id, image_chunks); > + } > + } > + } > + > + Ok(()) > + })?; This *entire* closure deserves to be broken up into multiple smaller parts. It's really hard to follow the code at all, IMHO. You might even get rid of the `RefCell`s that way, perhaps. > + > + for pbs_device_id in pbs_device_id_map.iter().take(255) { Magic constant again here too. > + if *pbs_device_id < 0 { > + continue; > + } > + > + if proxmox_backup_close_image(pbs, *pbs_device_id as u8, &mut pbs_err) < 0 { > + unsafe { > + let pbs_err_cstr = CStr::from_ptr(pbs_err); > + return Err(anyhow!( > + "proxmox_backup_close_image failed: {pbs_err_cstr:?}" > + )); > + } Use `anyhow::bail` [4] + helper for error message, as above. > + } > + } > + > + if proxmox_backup_finish(pbs, &mut pbs_err) < 0 { > + unsafe { > + let pbs_err_cstr = CStr::from_ptr(pbs_err); > + return Err(anyhow!("proxmox_backup_finish failed: {pbs_err_cstr:?}")); > + } Use `anyhow::bail` [4] + helper for error message, as above. > + } > + > + println!( > + "Backup finished within {}ms", > + SystemTime::now() > + .duration_since(start_transfer_time)? > + .as_millis() > + ); Assign the result of `SystemTime::now()...` to a variable, so you can just: println!("Backup finished in {ms} ms."); Also, why milliseconds and not minutes + seconds? > + > + Ok(()) > +} [0]: https://doc.rust-lang.org/edition-guide/rust-2018/path-changes.html [1]: https://doc.rust-lang.org/std/io/prelude/index.html [2]: https://doc.rust-lang.org/std/prelude/index.html [3]: https://semver.org/ [4]: https://docs.rs/anyhow/1.0.80/anyhow/macro.bail.html