From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 2E981BB74A for ; Mon, 25 Mar 2024 13:35:05 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 1579D3CBD for ; Mon, 25 Mar 2024 13:35:05 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Mon, 25 Mar 2024 13:35:02 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 1C39341F64 for ; Mon, 25 Mar 2024 13:35:02 +0100 (CET) Content-Type: text/plain; charset=UTF-8 Date: Mon, 25 Mar 2024 13:34:57 +0100 Message-Id: From: "Max Carrara" To: "Proxmox Backup Server development discussion" Mime-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Mailer: aerc 0.17.0-72-g6a84f1331f1c References: <20240320141516.213930-1-f.schauer@proxmox.com> <20240320141516.213930-8-f.schauer@proxmox.com> In-Reply-To: <20240320141516.213930-8-f.schauer@proxmox.com> X-SPAM-LEVEL: Spam detection results: 0 AWL 0.021 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH vma-to-pbs 07/10] add support for streaming the VMA file via stdin X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 25 Mar 2024 12:35:05 -0000 On Wed Mar 20, 2024 at 3:15 PM CET, Filip Schauer wrote: > This allows the user to stream a compressed VMA file directly to a PBS > without the need to extract it to an intermediate .vma file. > > Example usage: > > zstd -d --stdout vzdump.vma.zst | vma-to-pbs \ > --repository \ > --vmid 123 \ > --password_file pbs_password > > Signed-off-by: Filip Schauer > --- > This requires > https://lists.proxmox.com/pipermail/pve-devel/2024-March/062182.html > to be applied first. > > src/main.rs | 241 ++------------------------------ > src/vma.rs | 326 ++++++++++++++++++++----------------------- > src/vma2pbs.rs | 368 +++++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 530 insertions(+), 405 deletions(-) > create mode 100644 src/vma2pbs.rs > > diff --git a/src/main.rs b/src/main.rs > index f619b3e..e0e1076 100644 > --- a/src/main.rs > +++ b/src/main.rs > @@ -1,228 +1,10 @@ > -use std::env; > -use std::ffi::{c_char, CStr, CString}; > -use std::ptr; > -use std::time::{SystemTime, UNIX_EPOCH}; > - > -use anyhow::{anyhow, Context, Result}; > +use anyhow::Result; > use clap::{command, Arg, ArgAction}; > -use proxmox_backup_qemu::*; > use proxmox_sys::linux::tty; > -use scopeguard::defer; > =20 > mod vma; > -use vma::*; > - > -fn backup_vma_to_pbs( > - vma_file_path: String, > - pbs_repository: String, > - backup_id: String, > - pbs_password: String, > - keyfile: Option, > - key_password: Option, > - master_keyfile: Option, > - fingerprint: String, > - compress: bool, > - encrypt: bool, > -) -> Result<()> { > - println!("VMA input file: {}", vma_file_path); > - println!("PBS repository: {}", pbs_repository); > - println!("PBS fingerprint: {}", fingerprint); > - println!("compress: {}", compress); > - println!("encrypt: {}", encrypt); > - > - let backup_time =3D SystemTime::now() > - .duration_since(UNIX_EPOCH) > - .unwrap() > - .as_secs(); > - println!("backup time: {}", backup_time); > - > - let mut pbs_err: *mut c_char =3D ptr::null_mut(); > - > - let pbs_repository_cstr =3D CString::new(pbs_repository).unwrap(); > - let backup_id_cstr =3D CString::new(backup_id).unwrap(); > - let pbs_password_cstr =3D CString::new(pbs_password).unwrap(); > - let fingerprint_cstr =3D CString::new(fingerprint).unwrap(); > - let keyfile_cstr =3D keyfile.map(|v| CString::new(v).unwrap()); > - let keyfile_ptr =3D keyfile_cstr.map(|v| v.as_ptr()).unwrap_or(ptr::= null()); > - let key_password_cstr =3D key_password.map(|v| CString::new(v).unwra= p()); > - let key_password_ptr =3D key_password_cstr.map(|v| v.as_ptr()).unwra= p_or(ptr::null()); > - let master_keyfile_cstr =3D master_keyfile.map(|v| CString::new(v).u= nwrap()); > - let master_keyfile_ptr =3D master_keyfile_cstr > - .map(|v| v.as_ptr()) > - .unwrap_or(ptr::null()); > - > - let pbs =3D proxmox_backup_new_ns( > - pbs_repository_cstr.as_ptr(), > - ptr::null(), > - backup_id_cstr.as_ptr(), > - backup_time, > - PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE, > - pbs_password_cstr.as_ptr(), > - keyfile_ptr, > - key_password_ptr, > - master_keyfile_ptr, > - true, > - false, > - fingerprint_cstr.as_ptr(), > - &mut pbs_err, > - ); > - > - defer! { > - proxmox_backup_disconnect(pbs); > - } > - > - if pbs =3D=3D ptr::null_mut() { > - unsafe { > - let pbs_err_cstr =3D CStr::from_ptr(pbs_err); > - return Err(anyhow!("proxmox_backup_new_ns failed: {pbs_err_c= str:?}")); > - } > - } > - > - let connect_result =3D proxmox_backup_connect(pbs, &mut pbs_err); > - > - if connect_result < 0 { > - unsafe { > - let pbs_err_cstr =3D CStr::from_ptr(pbs_err); > - return Err(anyhow!("proxmox_backup_connect failed: {pbs_err_= cstr:?}")); > - } > - } > - > - let mut vma_reader =3D VmaReader::new(&vma_file_path)?; > - > - // Handle configs > - let configs =3D vma_reader.get_configs(); > - for (config_name, config_data) in configs { > - println!("CFG: size: {} name: {}", config_data.len(), config_nam= e); > - > - let config_name_cstr =3D CString::new(config_name).unwrap(); > - > - if proxmox_backup_add_config( > - pbs, > - config_name_cstr.as_ptr(), > - config_data.as_ptr(), > - config_data.len() as u64, > - &mut pbs_err, > - ) < 0 > - { > - unsafe { > - let pbs_err_cstr =3D CStr::from_ptr(pbs_err); > - return Err(anyhow!( > - "proxmox_backup_add_config failed: {pbs_err_cstr:?}" > - )); > - } > - } > - } > - > - // Handle block devices > - for device_id in 0..255 { > - let device_name =3D match vma_reader.get_device_name(device_id) = { > - Some(x) =3D> x, > - None =3D> { > - continue; > - } > - }; > - > - let device_size =3D match vma_reader.get_device_size(device_id) = { > - Some(x) =3D> x, > - None =3D> { > - continue; > - } > - }; > - > - println!( > - "DEV: dev_id=3D{} size: {} devname: {}", > - device_id, device_size, device_name > - ); > - > - let device_name_cstr =3D CString::new(device_name).unwrap(); > - let pbs_device_id =3D proxmox_backup_register_image( > - pbs, > - device_name_cstr.as_ptr(), > - device_size, > - false, > - &mut pbs_err, > - ); > - > - if pbs_device_id < 0 { > - unsafe { > - let pbs_err_cstr =3D CStr::from_ptr(pbs_err); > - return Err(anyhow!( > - "proxmox_backup_register_image failed: {pbs_err_cstr= :?}" > - )); > - } > - } > - > - let mut image_chunk_buffer =3D > - proxmox_io::boxed::zeroed(PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE = as usize); > - let mut bytes_transferred =3D 0; > - > - while bytes_transferred < device_size { > - let bytes_left =3D device_size - bytes_transferred; > - let chunk_size =3D bytes_left.min(PROXMOX_BACKUP_DEFAULT_CHU= NK_SIZE); > - println!( > - "Uploading dev_id: {} offset: {:#0X} - {:#0X}", > - device_id, > - bytes_transferred, > - bytes_transferred + chunk_size > - ); > - > - let is_zero_chunk =3D vma_reader > - .read_device_contents( > - device_id, > - &mut image_chunk_buffer[0..chunk_size as usize], > - bytes_transferred, > - ) > - .with_context(|| { > - format!( > - "read {} bytes at offset {} from disk {} from VM= A file", > - chunk_size, bytes_transferred, device_id > - ) > - })?; > - > - let write_data_result =3D proxmox_backup_write_data( > - pbs, > - pbs_device_id as u8, > - if is_zero_chunk { > - ptr::null() > - } else { > - image_chunk_buffer.as_ptr() > - }, > - bytes_transferred, > - chunk_size, > - &mut pbs_err, > - ); > - > - if write_data_result < 0 { > - unsafe { > - let pbs_err_cstr =3D CStr::from_ptr(pbs_err); > - return Err(anyhow!( > - "proxmox_backup_write_data failed: {pbs_err_cstr= :?}" > - )); > - } > - } > - > - bytes_transferred +=3D chunk_size; > - } > - > - if proxmox_backup_close_image(pbs, pbs_device_id as u8, &mut pbs= _err) < 0 { > - unsafe { > - let pbs_err_cstr =3D CStr::from_ptr(pbs_err); > - return Err(anyhow!( > - "proxmox_backup_close_image failed: {pbs_err_cstr:?}= " > - )); > - } > - } > - } > - > - if proxmox_backup_finish(pbs, &mut pbs_err) < 0 { > - unsafe { > - let pbs_err_cstr =3D CStr::from_ptr(pbs_err); > - return Err(anyhow!("proxmox_backup_finish failed: {pbs_err_c= str:?}")); > - } > - } > - > - Ok(()) > -} > +mod vma2pbs; > +use vma2pbs::{backup_vma_to_pbs, BackupVmaToPbsArgs}; > =20 > fn main() -> Result<()> { > let matches =3D command!() > @@ -300,7 +82,8 @@ fn main() -> Result<()> { > let compress =3D matches.get_flag("compress"); > let encrypt =3D matches.get_flag("encrypt"); > =20 > - let vma_file_path =3D matches.get_one::("vma_file").unwrap()= .to_string(); > + let vma_file_path =3D matches.get_one::("vma_file"); > + > let password_file =3D matches.get_one::("password_file"); > =20 > let pbs_password =3D match password_file { > @@ -323,18 +106,20 @@ fn main() -> Result<()> { > None =3D> None, > }; > =20 > - backup_vma_to_pbs( > - vma_file_path, > + let args =3D BackupVmaToPbsArgs { > + vma_file_path: vma_file_path.cloned(), > pbs_repository, > - vmid, > + backup_id: vmid, > pbs_password, > - keyfile.cloned(), > + keyfile: keyfile.cloned(), > key_password, > - master_keyfile.cloned(), > + master_keyfile: master_keyfile.cloned(), > fingerprint, > compress, > encrypt, > - )?; > + }; > + > + backup_vma_to_pbs(args)?; Very glad to see an arg struct being used here now! :) > =20 > Ok(()) > } > diff --git a/src/vma.rs b/src/vma.rs > index d30cb09..d369b36 100644 > --- a/src/vma.rs > +++ b/src/vma.rs > @@ -1,24 +1,55 @@ > -use std::collections::HashMap; > -use std::fs::File; > -use std::io::{Read, Seek, SeekFrom}; > +use std::collections::HashSet; > +use std::io::Read; > use std::mem::size_of; > -use std::{cmp, str}; > +use std::str; > =20 > use anyhow::{anyhow, Result}; > use bincode::Options; > use serde::{Deserialize, Serialize}; > use serde_big_array::BigArray; > =20 > -const VMA_BLOCKS_PER_EXTENT: usize =3D 59; > +/// Maximum number of clusters in an extent > +/// See Image Data Streams in pve-qemu.git/vma_spec.txt > +const VMA_CLUSTERS_PER_EXTENT: usize =3D 59; > + > +/// Number of 4k blocks per cluster > +/// See pve-qemu.git/vma_spec.txt > +const VMA_BLOCKS_PER_CLUSTER: usize =3D 16; > + > +/// Maximum number of config files > +/// See VMA Header in pve-qemu.git/vma_spec.txt > const VMA_MAX_CONFIGS: usize =3D 256; > + > +/// Maximum number of block devices > +/// See VMA Header in pve-qemu.git/vma_spec.txt > const VMA_MAX_DEVICES: usize =3D 256; > =20 > +/// VMA magic string > +/// See VMA Header in pve-qemu.git/vma_spec.txt > +const VMA_HEADER_MAGIC: [u8; 4] =3D [b'V', b'M', b'A', 0]; > + > +/// VMA extent magic string > +/// See VMA Extent Header in pve-qemu.git/vma_spec.txt > +const VMA_EXTENT_HEADER_MAGIC: [u8; 4] =3D [b'V', b'M', b'A', b'E']; > + > +/// Size of a block > +/// See pve-qemu.git/vma_spec.txt > +const BLOCK_SIZE: usize =3D 4096; > + > +/// Size of the VMA header without the blob buffer appended at the end > +/// See VMA Header in pve-qemu.git/vma_spec.txt > +const VMA_HEADER_SIZE_NO_BLOB_BUFFER: usize =3D 12288; > + > +type VmaDeviceId =3D u8; > +type VmaDeviceOffset =3D u64; > +type VmaDeviceSize =3D u64; > + > #[repr(C)] > #[derive(Serialize, Deserialize)] > struct VmaDeviceInfoHeader { > pub device_name_offset: u32, > reserved: [u8; 4], > - pub device_size: u64, > + pub device_size: VmaDeviceSize, > reserved1: [u8; 16], > } > =20 > @@ -42,6 +73,8 @@ struct VmaHeader { > reserved1: [u8; 4], > #[serde(with =3D "BigArray")] > pub dev_info: [VmaDeviceInfoHeader; VMA_MAX_DEVICES], > + #[serde(skip_deserializing, skip_serializing)] > + blob_buffer: Vec, > } > =20 > #[repr(C)] > @@ -62,57 +95,46 @@ struct VmaExtentHeader { > pub uuid: [u8; 16], > pub md5sum: [u8; 16], > #[serde(with =3D "BigArray")] > - pub blockinfo: [VmaBlockInfo; VMA_BLOCKS_PER_EXTENT], > + pub blockinfo: [VmaBlockInfo; VMA_CLUSTERS_PER_EXTENT], > } > =20 > -#[derive(Clone)] > -struct VmaBlockIndexEntry { > - pub cluster_file_offset: u64, > - pub mask: u16, > +#[derive(Clone, Eq, Hash, PartialEq)] > +pub struct VmaConfig { > + pub name: String, > + pub content: String, > } > =20 > pub struct VmaReader { > - vma_file: File, > + vma_file: Box, > vma_header: VmaHeader, > - configs: HashMap, > - block_index: Vec>, > - blocks_are_indexed: bool, > + configs: HashSet, > } > =20 > impl VmaReader { > - pub fn new(vma_file_path: &str) -> Result { > - let mut vma_file =3D match File::open(vma_file_path) { > - Err(why) =3D> return Err(anyhow!("couldn't open {}: {}", vma= _file_path, why)), > - Ok(file) =3D> file, > - }; > - > - let vma_header =3D Self::read_header(&mut vma_file).unwrap(); > - let configs =3D Self::read_blob_buffer(&mut vma_file, &vma_heade= r).unwrap(); > - let block_index: Vec> =3D (0..256).map(|= _| Vec::new()).collect(); > + pub fn new(mut vma_file: impl Read + 'static) -> Result { Still not too sure about this here - I guess it's *fine* to stay with `impl Read` here and `Box` in the struct above, but IMO if you're already generic here, the `VmaReader` might as well become a `VmaReader`. The relevant `impl` blocks where `Read` is necessary could then just be constrained. Otherwise, the stuff given to `new()` here is being monomorphized and _then_ made into a trait object and boxed. Sure, it's a minor thing, but still some unnecessary work. > + let vma_header =3D Self::read_header(&mut vma_file)?; > + let configs =3D Self::read_configs(&vma_header)?; > =20 > let instance =3D Self { > - vma_file, > + vma_file: Box::new(vma_file), > vma_header, > configs, > - block_index, > - blocks_are_indexed: false, > }; > =20 > Ok(instance) > } > =20 > - fn read_header(vma_file: &mut File) -> Result { > - let mut buffer =3D Vec::with_capacity(size_of::()); > - buffer.resize(size_of::(), 0); > + fn read_header(vma_file: &mut impl Read) -> Result { > + let mut buffer =3D vec![0; VMA_HEADER_SIZE_NO_BLOB_BUFFER]; > vma_file.read_exact(&mut buffer)?; > =20 > let bincode_options =3D bincode::DefaultOptions::new() > .with_fixint_encoding() > .with_big_endian(); > =20 > - let vma_header: VmaHeader =3D bincode_options.deserialize(&buffe= r)?; > + let mut vma_header: VmaHeader =3D bincode_options.deserialize(&b= uffer)?; > =20 > - if vma_header.magic !=3D [b'V', b'M', b'A', 0] { > + if vma_header.magic !=3D VMA_HEADER_MAGIC { > return Err(anyhow!("Invalid magic number")); > } > =20 > @@ -121,7 +143,7 @@ impl VmaReader { > } > =20 > buffer.resize(vma_header.header_size as usize, 0); > - vma_file.read_exact(&mut buffer[size_of::()..])?; > + vma_file.read_exact(&mut buffer[VMA_HEADER_SIZE_NO_BLOB_BUFFER..= ])?; > =20 > // Fill the MD5 sum field with zeros to compute the MD5 sum > buffer[32..48].fill(0); > @@ -131,89 +153,77 @@ impl VmaReader { > return Err(anyhow!("Wrong VMA header checksum")); > } > =20 > - return Ok(vma_header); > + let blob_buffer =3D &buffer[VMA_HEADER_SIZE_NO_BLOB_BUFFER..vma_= header.header_size as usize]; > + vma_header.blob_buffer =3D blob_buffer.to_vec(); > + > + Ok(vma_header) > } > =20 > - fn read_string_from_file(vma_file: &mut File, file_offset: u64) -> R= esult { > - let mut size_bytes =3D [0u8; 2]; > - vma_file.seek(SeekFrom::Start(file_offset))?; > - vma_file.read_exact(&mut size_bytes)?; > + fn read_string(buffer: &[u8]) -> Result { > + let size_bytes: [u8; 2] =3D buffer[0..2].try_into()?; > let size =3D u16::from_le_bytes(size_bytes) as usize; > - let mut string_bytes =3D Vec::with_capacity(size - 1); > - string_bytes.resize(size - 1, 0); > - vma_file.read_exact(&mut string_bytes)?; > - let string =3D str::from_utf8(&string_bytes)?; > + let string_bytes: &[u8] =3D &buffer[2..1 + size]; > + let string =3D str::from_utf8(string_bytes)?; > =20 > - return Ok(string.to_string()); > + Ok(String::from(string)) > } > =20 > - fn read_blob_buffer( > - vma_file: &mut File, > - vma_header: &VmaHeader, > - ) -> Result> { > - let mut configs =3D HashMap::new(); > + fn read_configs(vma_header: &VmaHeader) -> Result= > { > + let mut configs =3D HashSet::new(); > =20 > for i in 0..VMA_MAX_CONFIGS { > - let config_name_offset =3D vma_header.config_names[i]; > - let config_data_offset =3D vma_header.config_data[i]; > + let config_name_offset =3D vma_header.config_names[i] as usi= ze; > + let config_data_offset =3D vma_header.config_data[i] as usiz= e; > =20 > if config_name_offset =3D=3D 0 || config_data_offset =3D=3D = 0 { > continue; > } > =20 > - let config_name_file_offset =3D > - (vma_header.blob_buffer_offset + config_name_offset) as = u64; > - let config_data_file_offset =3D > - (vma_header.blob_buffer_offset + config_data_offset) as = u64; > - let config_name =3D Self::read_string_from_file(vma_file, co= nfig_name_file_offset)?; > - let config_data =3D Self::read_string_from_file(vma_file, co= nfig_data_file_offset)?; > - > - configs.insert(String::from(config_name), String::from(confi= g_data)); > + let config_name =3D Self::read_string(&vma_header.blob_buffe= r[config_name_offset..])?; > + let config_data =3D Self::read_string(&vma_header.blob_buffe= r[config_data_offset..])?; > + let vma_config =3D VmaConfig { > + name: config_name, > + content: config_data, > + }; > + configs.insert(vma_config); > } > =20 > - return Ok(configs); > + Ok(configs) > } > =20 > - pub fn get_configs(&self) -> HashMap { > - return self.configs.clone(); > + pub fn get_configs(&self) -> HashSet { > + self.configs.clone() > } > =20 > - pub fn get_device_name(&mut self, device_id: usize) -> Option { > - if device_id >=3D VMA_MAX_DEVICES { > - return None; > - } > + pub fn contains_device(&self, device_id: VmaDeviceId) -> bool { > + self.vma_header.dev_info[device_id as usize].device_name_offset = !=3D 0 > + } > =20 > - let device_name_offset =3D self.vma_header.dev_info[device_id].d= evice_name_offset; > + pub fn get_device_name(&self, device_id: VmaDeviceId) -> Result { > + let device_name_offset =3D > + self.vma_header.dev_info[device_id as usize].device_name_off= set as usize; > =20 > if device_name_offset =3D=3D 0 { > - return None; > + anyhow::bail!("device_name_offset cannot be 0"); > } > =20 > - let device_name_file_offset =3D > - (self.vma_header.blob_buffer_offset + device_name_offset) as= u64; > - let device_name =3D > - Self::read_string_from_file(&mut self.vma_file, device_name_= file_offset).unwrap(); > + let device_name =3D Self::read_string(&self.vma_header.blob_buff= er[device_name_offset..])?; > =20 > - return Some(device_name.to_string()); > + Ok(device_name) > } > =20 > - pub fn get_device_size(&self, device_id: usize) -> Option { > - if device_id >=3D VMA_MAX_DEVICES { > - return None; > - } > - > - let dev_info =3D &self.vma_header.dev_info[device_id]; > + pub fn get_device_size(&self, device_id: VmaDeviceId) -> Result { > + let dev_info =3D &self.vma_header.dev_info[device_id as usize]; > =20 > if dev_info.device_name_offset =3D=3D 0 { > - return None; > + anyhow::bail!("device_name_offset cannot be 0"); It's okay if you `use anyhow::bail` above, no need to fully qualify it ;) > } > =20 > - return Some(dev_info.device_size); > + Ok(dev_info.device_size) > } > =20 > - fn read_extent_header(vma_file: &mut File) -> Result { > - let mut buffer =3D Vec::with_capacity(size_of::= ()); > - buffer.resize(size_of::(), 0); > + fn read_extent_header(mut vma_file: impl Read) -> Result { > + let mut buffer =3D vec![0; size_of::()]; > vma_file.read_exact(&mut buffer)?; > =20 > let bincode_options =3D bincode::DefaultOptions::new() > @@ -222,7 +232,7 @@ impl VmaReader { > =20 > let vma_extent_header: VmaExtentHeader =3D bincode_options.deser= ialize(&buffer)?; > =20 > - if vma_extent_header.magic !=3D [b'V', b'M', b'A', b'E'] { > + if vma_extent_header.magic !=3D VMA_EXTENT_HEADER_MAGIC { > return Err(anyhow!("Invalid magic number")); > } > =20 > @@ -234,113 +244,75 @@ impl VmaReader { > return Err(anyhow!("Wrong VMA extent header checksum")); > } > =20 > - return Ok(vma_extent_header); > + Ok(vma_extent_header) > } > =20 > - fn index_device_clusters(&mut self) -> Result<()> { > - for device_id in 0..255 { > - let device_size =3D match self.get_device_size(device_id) { > - Some(x) =3D> x, > - None =3D> { > - continue; > - } > - }; > - > - let device_cluster_count =3D (device_size + 4096 * 16 - 1) /= (4096 * 16); > + fn restore_extent(&mut self, callback: F) -> Result<()> > + where > + F: Fn(VmaDeviceId, VmaDeviceOffset, Option>) -> Result<(= )>, > + { > + let vma_extent_header =3D Self::read_extent_header(&mut self.vma= _file)?; > =20 > - let block_index_entry_placeholder =3D VmaBlockIndexEntry { > - cluster_file_offset: 0, > - mask: 0, > - }; > - > - self.block_index[device_id] > - .resize(device_cluster_count as usize, block_index_entry= _placeholder); > - } > + for cluster_index in 0..VMA_CLUSTERS_PER_EXTENT { > + let blockinfo =3D &vma_extent_header.blockinfo[cluster_index= ]; > =20 > - let mut file_offset =3D self.vma_header.header_size as u64; > - let vma_file_size =3D self.vma_file.metadata()?.len(); > - > - while file_offset < vma_file_size { > - self.vma_file.seek(SeekFrom::Start(file_offset))?; > - let vma_extent_header =3D Self::read_extent_header(&mut self= .vma_file)?; > - file_offset +=3D size_of::() as u64; > - > - for i in 0..VMA_BLOCKS_PER_EXTENT { > - let blockinfo =3D &vma_extent_header.blockinfo[i]; > + if blockinfo.dev_id =3D=3D 0 { > + continue; > + } > =20 > - if blockinfo.dev_id =3D=3D 0 { > - continue; > + let image_offset =3D > + (BLOCK_SIZE * VMA_BLOCKS_PER_CLUSTER * blockinfo.cluster= _num as usize) as u64; > + let cluster_is_zero =3D blockinfo.mask =3D=3D 0; > + > + let image_chunk_buffer =3D if cluster_is_zero { > + None > + } else { > + let mut image_chunk_buffer =3D vec![0; BLOCK_SIZE * VMA_= BLOCKS_PER_CLUSTER]; > + > + for block_index in 0..VMA_BLOCKS_PER_CLUSTER { > + let block_is_zero =3D ((blockinfo.mask >> block_inde= x) & 1) =3D=3D 0; > + let block_start =3D BLOCK_SIZE * block_index; > + let block_end =3D block_start + BLOCK_SIZE; > + > + if block_is_zero { > + image_chunk_buffer[block_start..block_end].fill(= 0); > + } else { > + self.vma_file > + .read_exact(&mut image_chunk_buffer[block_st= art..block_end])?; > + } > } > =20 > - let block_index_entry =3D VmaBlockIndexEntry { > - cluster_file_offset: file_offset, > - mask: blockinfo.mask, > - }; > + Some(image_chunk_buffer) > + }; > =20 > - self.block_index[blockinfo.dev_id as usize][blockinfo.cl= uster_num as usize] =3D > - block_index_entry; > - file_offset +=3D blockinfo.mask.count_ones() as u64 * 40= 96; > - } > + callback(blockinfo.dev_id, image_offset, image_chunk_buffer)= ?; > } > =20 > - self.blocks_are_indexed =3D true; > - > - return Ok(()); > + Ok(()) > } > =20 > - pub fn read_device_contents( > - &mut self, > - device_id: usize, > - buffer: &mut [u8], > - offset: u64, > - ) -> Result { > - if device_id >=3D VMA_MAX_DEVICES { > - return Err(anyhow!("invalid device id {}", device_id)); > - } > - > - if offset % (4096 * 16) !=3D 0 { > - return Err(anyhow!("offset is not aligned to 65536")); > - } > - > - // Make sure that the device clusters are already indexed > - if !self.blocks_are_indexed { > - self.index_device_clusters()?; > - } > - > - let this_device_block_index =3D &self.block_index[device_id]; > - let length =3D cmp::min( > - buffer.len(), > - this_device_block_index.len() * 4096 * 16 - offset as usize, > - ); > - let mut buffer_offset =3D 0; > - let mut buffer_is_zero =3D true; > - > - while buffer_offset < length { > - let block_index_entry =3D > - &this_device_block_index[(offset as usize + buffer_offse= t) / (4096 * 16)]; > - self.vma_file > - .seek(SeekFrom::Start(block_index_entry.cluster_file_off= set))?; > - > - for i in 0..16 { > - if buffer_offset >=3D length { > - break; > - } > - > - let block_buffer_end =3D buffer_offset + cmp::min(length= - buffer_offset, 4096); > - let block_mask =3D ((block_index_entry.mask >> i) & 1) = =3D=3D 1; > - > - if block_mask { > - self.vma_file > - .read_exact(&mut buffer[buffer_offset..block_buf= fer_end])?; > - buffer_is_zero =3D false; > - } else { > - buffer[buffer_offset..block_buffer_end].fill(0); > - } > - > - buffer_offset +=3D 4096; > + pub fn restore(&mut self, callback: F) -> Result<()> > + where > + F: Fn(VmaDeviceId, VmaDeviceOffset, Option>) -> Result<(= )>, > + { > + loop { > + match self.restore_extent(&callback) { > + Ok(()) =3D> {} > + Err(e) =3D> match e.downcast_ref::() { > + Some(ioerr) =3D> { > + if ioerr.kind() =3D=3D std::io::ErrorKind::Unexp= ectedEof { > + break; // Break out of the loop since the en= d of the file was reached. > + } else { > + return Err(anyhow!("Failed to read VMA file:= {}", ioerr)); > + } > + } > + _ =3D> { > + return Err(anyhow::format_err!(e)); > + } > + }, > } > } > =20 > - return Ok(buffer_is_zero); > + Ok(()) > } > } > diff --git a/src/vma2pbs.rs b/src/vma2pbs.rs > new file mode 100644 > index 0000000..cef7b60 > --- /dev/null > +++ b/src/vma2pbs.rs > @@ -0,0 +1,368 @@ > +use std::cell::RefCell; > +use std::collections::HashMap; > +use std::ffi::{c_char, CStr, CString}; > +use std::fs::File; > +use std::io::{stdin, BufRead, BufReader}; > +use std::ptr; > +use std::time::{SystemTime, UNIX_EPOCH}; > + > +use anyhow::{anyhow, Result}; > +use proxmox_backup_qemu::{ > + capi_types::ProxmoxBackupHandle, proxmox_backup_add_config, proxmox_= backup_close_image, > + proxmox_backup_connect, proxmox_backup_disconnect, proxmox_backup_fi= nish, > + proxmox_backup_new_ns, proxmox_backup_register_image, proxmox_backup= _write_data, > + PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE, > +}; > +use scopeguard::defer; > + > +use crate::vma::VmaReader; > + > +const VMA_CLUSTER_SIZE: usize =3D 65536; > + > +pub struct BackupVmaToPbsArgs { > + pub vma_file_path: Option, > + pub pbs_repository: String, > + pub backup_id: String, > + pub pbs_password: String, > + pub keyfile: Option, > + pub key_password: Option, > + pub master_keyfile: Option, > + pub fingerprint: String, > + pub compress: bool, > + pub encrypt: bool, > +} > + > +#[derive(Copy, Clone)] > +struct BlockDeviceInfo { > + pub pbs_device_id: u8, > + pub device_size: u64, > +} > + > +fn create_pbs_backup_task(args: BackupVmaToPbsArgs) -> Result<*mut Proxm= oxBackupHandle> { Ugh, I know all of the `proxmox-backup-qemu`-related things take a `*mut` - so I guess passing that around here is fine, as it makes things easier to read... (In an ideal world that API would take a `NonNull` <.<) > + println!("PBS repository: {}", args.pbs_repository); > + println!("PBS fingerprint: {}", args.fingerprint); > + println!("compress: {}", args.compress); > + println!("encrypt: {}", args.encrypt); > + > + let backup_time =3D SystemTime::now().duration_since(UNIX_EPOCH)?.as= _secs(); > + println!("backup time: {}", backup_time); Should maybe format the time a little different; I e.g. get: [...] backup time: 1711359868 [...] > + > + let mut pbs_err: *mut c_char =3D ptr::null_mut(); > + > + let pbs_repository_cstr =3D CString::new(args.pbs_repository)?; > + let backup_id_cstr =3D CString::new(args.backup_id)?; > + let pbs_password_cstr =3D CString::new(args.pbs_password)?; > + let fingerprint_cstr =3D CString::new(args.fingerprint)?; > + let keyfile_cstr =3D args.keyfile.map(|v| CString::new(v).unwrap()); > + let keyfile_ptr =3D keyfile_cstr.map(|v| v.as_ptr()).unwrap_or(ptr::= null()); > + let key_password_cstr =3D args.key_password.map(|v| CString::new(v).= unwrap()); > + let key_password_ptr =3D key_password_cstr.map(|v| v.as_ptr()).unwra= p_or(ptr::null()); > + let master_keyfile_cstr =3D args.master_keyfile.map(|v| CString::new= (v).unwrap()); > + let master_keyfile_ptr =3D master_keyfile_cstr > + .map(|v| v.as_ptr()) > + .unwrap_or(ptr::null()); > + > + let pbs =3D proxmox_backup_new_ns( > + pbs_repository_cstr.as_ptr(), > + ptr::null(), > + backup_id_cstr.as_ptr(), > + backup_time, > + PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE, > + pbs_password_cstr.as_ptr(), > + keyfile_ptr, > + key_password_ptr, > + master_keyfile_ptr, > + true, > + false, > + fingerprint_cstr.as_ptr(), > + &mut pbs_err, > + ); > + > + if pbs.is_null() { > + let pbs_err_cstr =3D unsafe { CStr::from_ptr(pbs_err) }; > + anyhow::bail!("proxmox_backup_new_ns failed: {pbs_err_cstr:?}"); > + } As mentioned in my previous review, you really should put this kind of error handling / propagation into some kind of helper function. You want to make sure that all necessary invariants for `CStr::from_ptr` [0] are upheld. I know, this is our API and it's very unlikely that we'll do anything funky with those pointers, but we should nevertheless never rely on C FFI stuff to always remain sound IMO, as mistakes / accidents can happen. All it takes is one faulty commit in `proxmox-backup-qemu`. > + > + Ok(pbs) > +} > + > +fn upload_configs(vma_reader: &VmaReader, pbs: *mut ProxmoxBackupHandle)= -> Result<()> { > + let mut pbs_err: *mut c_char =3D ptr::null_mut(); > + let configs =3D vma_reader.get_configs(); > + for config in configs { > + let config_name =3D config.name; > + let config_data =3D config.content; > + > + println!("CFG: size: {} name: {}", config_data.len(), config_nam= e); > + > + let config_name_cstr =3D CString::new(config_name)?; > + > + if proxmox_backup_add_config( > + pbs, > + config_name_cstr.as_ptr(), > + config_data.as_ptr(), > + config_data.len() as u64, > + &mut pbs_err, > + ) < 0 > + { > + let pbs_err_cstr =3D unsafe { CStr::from_ptr(pbs_err) }; > + anyhow::bail!("proxmox_backup_add_config failed: {pbs_err_cs= tr:?}"); > + } Same as above (error handling). > + } > + > + Ok(()) > +} > + > +fn register_block_devices( > + vma_reader: &VmaReader, > + pbs: *mut ProxmoxBackupHandle, > +) -> Result<[Option; 256]> { > + let mut block_device_infos: [Option; 256] =3D [None= ; 256]; > + let mut pbs_err: *mut c_char =3D ptr::null_mut(); > + > + for device_id in 0..255 { > + if !vma_reader.contains_device(device_id) { > + continue; > + } > + > + let device_name =3D vma_reader.get_device_name(device_id)?; > + let device_size =3D vma_reader.get_device_size(device_id)?; > + > + println!( > + "DEV: dev_id=3D{} size: {} devname: {}", > + device_id, device_size, device_name > + ); > + > + let device_name_cstr =3D CString::new(device_name)?; > + let pbs_device_id =3D proxmox_backup_register_image( > + pbs, > + device_name_cstr.as_ptr(), > + device_size, > + false, > + &mut pbs_err, > + ); > + > + if pbs_device_id < 0 { > + let pbs_err_cstr =3D unsafe { CStr::from_ptr(pbs_err) }; > + anyhow::bail!("proxmox_backup_register_image failed: {pbs_er= r_cstr:?}"); > + } Same as above (error handling). > + > + let block_device_info =3D BlockDeviceInfo { > + pbs_device_id: pbs_device_id as u8, > + device_size, > + }; > + > + block_device_infos[device_id as usize] =3D Some(block_device_inf= o); > + } > + > + Ok(block_device_infos) > +} > + > +fn upload_block_devices(mut vma_reader: VmaReader, pbs: *mut ProxmoxBack= upHandle) -> Result<()> { > + let block_device_infos =3D register_block_devices(&vma_reader, pbs)?= ; > + > + struct ImageChunk { > + sub_chunks: HashMap>>, > + mask: u64, > + non_zero_mask: u64, > + } > + > + let images_chunks: RefCell>> = =3D > + RefCell::new(HashMap::new()); > + All of this, starting from here ... > + vma_reader.restore(|dev_id, offset, buffer| { > + let block_device_info =3D match block_device_infos[dev_id as usi= ze] { > + Some(block_device_info) =3D> block_device_info, > + None =3D> anyhow::bail!("Reference to unknown device id {} i= n VMA file", dev_id), > + }; > + > + let pbs_device_id =3D block_device_info.pbs_device_id; > + let device_size =3D block_device_info.device_size; > + > + let mut images_chunks =3D images_chunks.borrow_mut(); > + let image_chunks =3D images_chunks.get_mut(&dev_id); > + let pbs_chunk_offset =3D > + PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE * (offset / PROXMOX_BACKUP= _DEFAULT_CHUNK_SIZE); > + let sub_chunk_index =3D > + ((offset % PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE) / VMA_CLUSTER_= SIZE as u64) as u8; > + > + let pbs_chunk_size =3D PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE.min(dev= ice_size - pbs_chunk_offset); > + > + let pbs_upload_chunk =3D |pbs_chunk_buffer: Option<&[u8]>| { > + println!( > + "Uploading dev_id: {} offset: {:#0X} - {:#0X}", > + dev_id, > + pbs_chunk_offset, > + pbs_chunk_offset + pbs_chunk_size, > + ); > + > + let mut pbs_err: *mut c_char =3D ptr::null_mut(); > + > + let write_data_result =3D proxmox_backup_write_data( > + pbs, > + pbs_device_id, > + match pbs_chunk_buffer { > + Some(pbs_chunk_buffer) =3D> pbs_chunk_buffer.as_ptr(= ), > + None =3D> ptr::null(), > + }, > + pbs_chunk_offset, > + pbs_chunk_size, > + &mut pbs_err, > + ); > + > + if write_data_result < 0 { > + let pbs_err_cstr =3D unsafe { CStr::from_ptr(pbs_err) }; > + anyhow::bail!("proxmox_backup_write_data failed: {pbs_er= r_cstr:?}"); > + } > + > + Ok(()) > + }; > + > + let insert_image_chunk =3D |image_chunks: &mut HashMap, > + buffer: Option>| { > + let mut sub_chunks: HashMap>> =3D HashMap= ::new(); > + let mask =3D 1 << sub_chunk_index; > + let non_zero_mask =3D buffer.is_some() as u64; > + sub_chunks.insert(sub_chunk_index, buffer); > + > + let image_chunk =3D ImageChunk { > + sub_chunks, > + mask, > + non_zero_mask, > + }; > + > + image_chunks.insert(pbs_chunk_offset, image_chunk); > + }; > + > + match image_chunks { > + Some(image_chunks) =3D> { > + let image_chunk =3D image_chunks.get_mut(&pbs_chunk_offs= et); > + > + match image_chunk { > + Some(image_chunk) =3D> { > + image_chunk.mask |=3D 1 << sub_chunk_index; > + image_chunk.non_zero_mask |=3D (buffer.is_some()= as u64) << sub_chunk_index; > + image_chunk.sub_chunks.insert(sub_chunk_index, b= uffer); > + > + let sub_chunk_count =3D > + ((pbs_chunk_size + 65535) / VMA_CLUSTER_SIZE= as u64) as u8; > + let pbs_chunk_mask =3D 1_u64 > + .checked_shl(sub_chunk_count.into()) > + .unwrap_or(0) > + .wrapping_sub(1); > + > + if image_chunk.mask =3D=3D pbs_chunk_mask { > + if image_chunk.non_zero_mask =3D=3D 0 { > + pbs_upload_chunk(None)?; > + } else { > + let mut pbs_chunk_buffer =3D > + proxmox_io::boxed::zeroed(pbs_chunk_= size as usize); > + > + for i in 0..sub_chunk_count { > + let sub_chunk =3D &image_chunk.sub_c= hunks[&i]; > + let start =3D i as usize * VMA_CLUST= ER_SIZE; > + let end =3D > + (start + VMA_CLUSTER_SIZE).min(p= bs_chunk_size as usize); > + > + match sub_chunk { > + Some(sub_chunk) =3D> { > + pbs_chunk_buffer[start..end] > + .copy_from_slice(&sub_ch= unk[0..end - start]); > + } > + None =3D> pbs_chunk_buffer[start= ..end].fill(0), > + } > + } > + > + pbs_upload_chunk(Some(&*pbs_chunk_buffer= ))?; > + } > + > + image_chunks.remove(&pbs_chunk_offset); > + } > + } > + None =3D> { > + if pbs_chunk_size <=3D VMA_CLUSTER_SIZE as u64 { > + pbs_upload_chunk(buffer.as_deref())?; > + } else { > + insert_image_chunk(image_chunks, buffer); > + } > + } > + } > + } > + None =3D> { > + if pbs_chunk_size <=3D VMA_CLUSTER_SIZE as u64 { > + pbs_upload_chunk(buffer.as_deref())?; > + } else { > + let mut image_chunks: HashMap =3D H= ashMap::new(); > + insert_image_chunk(&mut image_chunks, buffer); > + images_chunks.insert(dev_id, image_chunks); > + } > + } > + } > + > + Ok(()) > + })?; ... and ending here should still be taken apart and put into a bunch of smaller functions. If it's hard to keep track of all the things going on here while you're disentangling this, encapsulate the state as struct. > + > + let mut pbs_err: *mut c_char =3D ptr::null_mut(); > + > + for block_device_info in block_device_infos.iter().take(255) { > + let block_device_info =3D match block_device_info { > + Some(block_device_info) =3D> block_device_info, > + None =3D> continue, > + }; > + > + let pbs_device_id =3D block_device_info.pbs_device_id; > + > + if proxmox_backup_close_image(pbs, pbs_device_id, &mut pbs_err) = < 0 { > + let pbs_err_cstr =3D unsafe { CStr::from_ptr(pbs_err) }; > + anyhow::bail!("proxmox_backup_close_image failed: {pbs_err_c= str:?}"); > + } Same as above (error handling). > + } > + > + Ok(()) > +} > + > +pub fn backup_vma_to_pbs(args: BackupVmaToPbsArgs) -> Result<()> { > + let vma_file: Box =3D match &args.vma_file_path { > + Some(vma_file_path) =3D> match File::open(vma_file_path) { > + Err(why) =3D> return Err(anyhow!("Couldn't open file: {}", w= hy)), > + Ok(file) =3D> Box::new(BufReader::new(file)), > + }, > + None =3D> Box::new(BufReader::new(stdin())), IMO we should consider using a separate argument that makes the CLI read from stin, or e.g. if the file is named "-" (without quotes). Otherwise, weird things could happen if one forgets to add an argument: zstd -d --stdout vzdump-qemu-300-2024_03_25-10_21_10.vma.zst | vma-to-pbs -= -repository root@pam@my.pbs.tld:8007:default --vmid 300 Error: stream did not contain valid UTF-8 > + }; > + let vma_reader =3D VmaReader::new(vma_file)?; > + > + let pbs =3D create_pbs_backup_task(args)?; > + > + defer! { > + proxmox_backup_disconnect(pbs); > + } > + > + let mut pbs_err: *mut c_char =3D ptr::null_mut(); > + let connect_result =3D proxmox_backup_connect(pbs, &mut pbs_err); > + > + if connect_result < 0 { > + let pbs_err_cstr =3D unsafe { CStr::from_ptr(pbs_err) }; > + anyhow::bail!("proxmox_backup_connect failed: {pbs_err_cstr:?}")= ; > + } > + > + println!("Connected to Proxmox Backup Server"); > + > + let start_transfer_time =3D SystemTime::now(); > + > + upload_configs(&vma_reader, pbs)?; > + upload_block_devices(vma_reader, pbs)?; > + > + if proxmox_backup_finish(pbs, &mut pbs_err) < 0 { > + let pbs_err_cstr =3D unsafe { CStr::from_ptr(pbs_err) }; > + anyhow::bail!("proxmox_backup_finish failed: {pbs_err_cstr:?}"); > + } Same as above (error handling). > + > + let elapsed_time =3D SystemTime::now().duration_since(start_transfer= _time)?; > + let total_ms =3D elapsed_time.as_millis(); > + let ms =3D total_ms % 1000; > + let seconds =3D (total_ms / 1000) % 60; > + let minutes =3D total_ms / 1000000; > + println!("Backup finished within {minutes} minutes, {seconds} second= s and {ms} ms"); > + > + Ok(()) > +} [0]: https://doc.rust-lang.org/std/ffi/struct.CStr.html#method.from_ptr