* [pbs-devel] [PATCH vma-to-pbs] Initial commit
@ 2023-09-22 12:41 Filip Schauer
2023-09-26 13:11 ` Wolfgang Bumiller
0 siblings, 1 reply; 6+ messages in thread
From: Filip Schauer @ 2023-09-22 12:41 UTC (permalink / raw)
To: pbs-devel
Implement a tool to import VMA files into a Proxmox Backup Server
Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
---
I could not assign this to one distinct repository, so I propose a new
one. In its current state this will not be able to build a deb package
yet since the debian directory is missing.
The program calls extern "C" functions from proxmox-backup-qemu to
interface with the Backup Server. I chose these functions since those
are the only abstracted functions I could find.
The proxmox-sys dependency is used for reading passwords from the tty.
.cargo/config | 5 +
.gitmodules | 6 +
Cargo.toml | 16 ++
Makefile | 70 ++++++++
src/main.rs | 266 +++++++++++++++++++++++++++++
src/vma.rs | 297 +++++++++++++++++++++++++++++++++
submodules/proxmox | 1 +
submodules/proxmox-backup-qemu | 1 +
8 files changed, 662 insertions(+)
create mode 100644 .cargo/config
create mode 100644 .gitmodules
create mode 100644 Cargo.toml
create mode 100644 Makefile
create mode 100644 src/main.rs
create mode 100644 src/vma.rs
create mode 160000 submodules/proxmox
create mode 160000 submodules/proxmox-backup-qemu
diff --git a/.cargo/config b/.cargo/config
new file mode 100644
index 0000000..3b5b6e4
--- /dev/null
+++ b/.cargo/config
@@ -0,0 +1,5 @@
+[source]
+[source.debian-packages]
+directory = "/usr/share/cargo/registry"
+[source.crates-io]
+replace-with = "debian-packages"
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..526f5ef
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,6 @@
+[submodule "submodules/proxmox-backup-qemu"]
+ path = submodules/proxmox-backup-qemu
+ url = git://git.proxmox.com/git/proxmox-backup-qemu.git
+[submodule "submodules/proxmox"]
+ path = submodules/proxmox
+ url = git://git.proxmox.com/git/proxmox.git
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..72e2812
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "vma-to-pbs"
+version = "0.0.1"
+authors = ["Filip Schauer <f.schauer@proxmox.com>"]
+edition = "2021"
+
+[dependencies]
+getopts = "0.2"
+proxmox-sys = { path = "submodules/proxmox/proxmox-sys" }
+proxmox-backup-qemu = { path = "submodules/proxmox-backup-qemu" }
+proxmox-uuid = "1"
+md5 = "0.7.0"
+bincode = "1.3"
+serde = "1.0"
+serde-big-array = "0.4.1"
+array-init = "2.0.1"
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..a0c841d
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,70 @@
+include /usr/share/dpkg/default.mk
+
+PACKAGE = proxmox-vma-to-pbs
+BUILDDIR = $(PACKAGE)-$(DEB_VERSION_UPSTREAM)
+
+ARCH := $(DEB_BUILD_ARCH)
+
+DSC=$(DEB_SOURCE)_$(DEB_VERSION).dsc
+MAIN_DEB=$(PACKAGE)_$(DEB_VERSION)_$(ARCH).deb
+OTHER_DEBS = \
+ $(PACKAGE)-dev_$(DEB_VERSION)_$(ARCH).deb \
+ $(PACKAGE)-dbgsym_$(DEB_VERSION)_$(ARCH).deb
+DEBS=$(MAIN_DEB) $(OTHER_DEBS)
+
+DESTDIR=
+
+TARGET_DIR := target/debug
+
+ifeq ($(BUILD_MODE), release)
+CARGO_BUILD_ARGS += --release
+TARGETDIR := target/release
+endif
+
+.PHONY: all build
+all: build
+
+build: $(TARGETDIR)/vma-to-pbs
+$(TARGETDIR)/vma-to-pbs: Cargo.toml src/
+ cargo build $(CARGO_BUILD_ARGS)
+
+.PHONY: install
+install: $(TARGETDIR)/vma-to-pbs
+ install -D -m 0755 $(TARGETDIR)/vma-to-pbs $(DESTDIR)/usr/bin/vma-to-pbs
+
+$(BUILDDIR): submodule
+ rm -rf $@ $@.tmp && mkdir $@.tmp
+ cp -a submodules debian Makefile .cargo Cargo.toml build.rs src $@.tmp/
+ mv $@.tmp $@
+
+submodule:
+ [ -e submodules/proxmox-backup-qemu/Cargo.toml ] || [ -e submodules/proxmox/proxmox-sys/Cargo.toml ] || git submodule update --init --recursive
+
+dsc:
+ rm -rf $(BUILDDIR) $(DSC)
+ $(MAKE) $(DSC)
+ lintian $(DSC)
+
+$(DSC): $(BUILDDIR)
+ cd $(BUILDDIR); dpkg-buildpackage -S -us -uc -d
+
+sbuild: $(DSC)
+ sbuild $<
+
+.PHONY: deb dsc
+deb: $(OTHER_DEBS)
+$(OTHER_DEBS): $(MAIN_DEB)
+$(MAIN_DEB): $(BUILDDIR)
+ cd $(BUILDDIR); dpkg-buildpackage -b -us -uc
+ lintian $(DEBS)
+
+distclean: clean
+clean:
+ cargo clean
+ rm -rf $(PACKAGE)-[0-9]*/
+ rm -r *.deb *.dsc $(DEB_SOURCE)*.tar* *.build *.buildinfo *.changes Cargo.lock
+
+.PHONY: dinstall
+dinstall: $(DEBS)
+ dpkg -i $(DEBS)
+
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..1e8c240
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,266 @@
+extern crate getopts;
+extern crate proxmox_sys;
+extern crate proxmox_backup_qemu;
+
+use std::time::{SystemTime, UNIX_EPOCH};
+use std::ffi::{c_char, CStr, CString};
+use std::env;
+use std::ptr;
+
+use getopts::Options;
+use proxmox_sys::linux::tty;
+use proxmox_backup_qemu::*;
+
+mod vma;
+use vma::*;
+
+fn print_usage(program: &str, opts: Options) {
+ let brief = format!("Usage: {} vma_file [options]", program);
+ print!("{}", opts.usage(&brief));
+}
+
+fn backup_vma_to_pbs(
+ vma_file_path: String,
+ pbs_repository: String,
+ backup_id: String,
+ pbs_password: String,
+ keyfile: Option<String>,
+ key_password: Option<String>,
+ master_keyfile: Option<String>,
+ fingerprint: String,
+ compress: bool,
+ encrypt: bool
+) {
+ println!("VMA input file: {}", vma_file_path);
+ println!("PBS repository: {}", pbs_repository);
+ println!("PBS fingerprint: {}", fingerprint);
+ println!("compress: {}", compress);
+ println!("encrypt: {}", encrypt);
+
+ let backup_time = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
+ println!("backup time: {}", backup_time);
+
+ let pbs_err: *mut c_char = ptr::null_mut();
+
+ let pbs_repository_cstr = CString::new(pbs_repository).unwrap();
+ let backup_id_cstr = CString::new(backup_id).unwrap();
+ let pbs_password_cstr = CString::new(pbs_password).unwrap();
+ let fingerprint_cstr = CString::new(fingerprint).unwrap();
+ let keyfile_cstr = match keyfile {
+ Some(x) => Some(CString::new(x).unwrap()),
+ None => None
+ };
+ let keyfile_ptr = match keyfile_cstr {
+ Some(x) => x.as_ptr(),
+ None => ptr::null()
+ };
+ let key_password_cstr = match key_password {
+ Some(x) => Some(CString::new(x).unwrap()),
+ None => None
+ };
+ let key_password_ptr = match key_password_cstr {
+ Some(x) => x.as_ptr(),
+ None => ptr::null()
+ };
+ let master_keyfile_cstr = match master_keyfile {
+ Some(x) => Some(CString::new(x).unwrap()),
+ None => None
+ };
+ let master_keyfile_ptr = match master_keyfile_cstr {
+ Some(x) => x.as_ptr(),
+ None => ptr::null()
+ };
+
+ let pbs = proxmox_backup_new_ns(
+ pbs_repository_cstr.as_ptr(),
+ ptr::null(),
+ backup_id_cstr.as_ptr(),
+ backup_time,
+ PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE,
+ pbs_password_cstr.as_ptr(),
+ keyfile_ptr,
+ key_password_ptr,
+ master_keyfile_ptr,
+ true,
+ false,
+ fingerprint_cstr.as_ptr(),
+ &pbs_err as *const _ as *mut _);
+
+ if pbs == ptr::null_mut() {
+ unsafe {
+ let pbs_err_cstr = CStr::from_ptr(pbs_err);
+ panic!("proxmox_backup_new_ns failed: {}", pbs_err_cstr.to_str().unwrap());
+ }
+ }
+
+ let connect_result = proxmox_backup_connect(pbs, &pbs_err as *const _ as *mut _);
+
+ if connect_result < 0 {
+ unsafe {
+ let pbs_err_cstr = CStr::from_ptr(pbs_err);
+ panic!("proxmox_backup_connect failed: {}", pbs_err_cstr.to_str().unwrap());
+ }
+ }
+
+ let mut vma_reader = VmaReader::new(&vma_file_path);
+
+ // Handle configs
+ let configs = vma_reader.get_configs();
+ for (config_name, config_data) in &configs {
+ println!("CFG: size: {} name: {}", config_data.len(), config_name);
+
+ let config_name_cstr = CString::new(config_name.as_bytes()).unwrap();
+
+ if proxmox_backup_add_config(pbs, config_name_cstr.as_ptr(), config_data.as_ptr(), config_data.len() as u64, &pbs_err as *const _ as *mut _) < 0 {
+ unsafe {
+ let pbs_err_cstr = CStr::from_ptr(pbs_err);
+ panic!("proxmox_backup_add_config failed: {}", pbs_err_cstr.to_str().unwrap());
+ }
+ }
+ }
+
+ // Handle block devices
+ for device_id in 0..255 {
+ let device_name = match vma_reader.get_device_name(device_id) {
+ Some(x) => { x }
+ None => { continue; }
+ };
+
+ let device_size = match vma_reader.get_device_size(device_id) {
+ Some(x) => { x }
+ None => { continue; }
+ };
+
+ println!("DEV: dev_id={} size: {} devname: {}", device_id, device_size, device_name);
+
+ let device_name_cstr = CString::new(device_name.as_bytes()).unwrap();
+ let pbs_device_id = proxmox_backup_register_image(pbs, device_name_cstr.as_ptr(), device_size, false, &pbs_err as *const _ as *mut _);
+
+ if pbs_device_id < 0 {
+ unsafe {
+ let pbs_err_cstr = CStr::from_ptr(pbs_err);
+ panic!("proxmox_backup_register_image failed: {}", pbs_err_cstr.to_str().unwrap());
+ }
+ }
+
+ let mut image_chunk_buffer = [0u8; PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE as usize];
+ let mut bytes_transferred = 0;
+
+ while bytes_transferred < device_size {
+ let bytes_left = device_size - bytes_transferred;
+ let this_chunk_size = if bytes_left < PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE { bytes_left } else { PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE };
+ println!("Uploading dev_id: {} offset: {:#0X} - {:#0X}", device_id, bytes_transferred, bytes_transferred + this_chunk_size);
+
+ let is_zero_chunk = vma_reader.read_device_contents(device_id, &mut image_chunk_buffer, bytes_transferred).unwrap();
+ let write_data_result = proxmox_backup_write_data(
+ pbs,
+ pbs_device_id as u8,
+ if is_zero_chunk { ptr::null() } else { image_chunk_buffer.as_ptr() },
+ bytes_transferred,
+ this_chunk_size,
+ &pbs_err as *const _ as *mut _
+ );
+
+ if write_data_result < 0 {
+ unsafe {
+ let pbs_err_cstr = CStr::from_ptr(pbs_err);
+ panic!("proxmox_backup_write_data failed: {}", pbs_err_cstr.to_str().unwrap());
+ }
+ }
+
+ bytes_transferred += this_chunk_size;
+ }
+
+ if proxmox_backup_close_image(pbs, pbs_device_id as u8, &pbs_err as *const _ as *mut _) < 0 {
+ unsafe {
+ let pbs_err_cstr = CStr::from_ptr(pbs_err);
+ panic!("proxmox_backup_close_image failed: {}", pbs_err_cstr.to_str().unwrap());
+ }
+ }
+ }
+
+ if proxmox_backup_finish(pbs, &pbs_err as *const _ as *mut _) < 0 {
+ unsafe {
+ let pbs_err_cstr = CStr::from_ptr(pbs_err);
+ panic!("proxmox_backup_finish failed: {}", pbs_err_cstr.to_str().unwrap());
+ }
+ }
+
+ proxmox_backup_disconnect(pbs);
+}
+
+fn main() {
+ let args: Vec<String> = env::args().collect();
+ let program = args[0].clone();
+
+ let mut opts = Options::new();
+ opts.optopt("", "repository", "Repository URL", "<auth_id>@<host>:<port>:<datastore>");
+ opts.optopt("", "vmid", "Backup ID", "VMID");
+ opts.optopt("", "fingerprint", "Proxmox Backup Server Fingerprint", "FINGERPRINT");
+ opts.optopt("", "keyfile", "Key file", "KEYFILE");
+ opts.optopt("", "master_keyfile", "Master key file", "MASTER_KEYFILE");
+ opts.optflag("c", "compress", "Compress the Backup");
+ opts.optflag("e", "encrypt", "Encrypt the Backup");
+ opts.optflag("h", "help", "print this help menu");
+ let matches = match opts.parse(&args[1..]) {
+ Ok(m) => { m }
+ Err(f) => { panic!("{}", f.to_string()) }
+ };
+
+ if matches.opt_present("h") {
+ print_usage(&program, opts);
+ return;
+ }
+
+ if !matches.opt_present("repository") {
+ println!("missing --repository option");
+ print_usage(&program, opts);
+ return;
+ }
+ let pbs_repository = matches.opt_str("repository").unwrap();
+
+ if !matches.opt_present("vmid") {
+ println!("missing --vmid option");
+ print_usage(&program, opts);
+ return;
+ }
+ let vmid = matches.opt_str("vmid").unwrap();
+
+ if !matches.opt_present("fingerprint") {
+ println!("missing --fingerprint option");
+ print_usage(&program, opts);
+ return;
+ }
+ let fingerprint = matches.opt_str("fingerprint").unwrap();
+
+ let keyfile = matches.opt_str("keyfile");
+ let master_keyfile = matches.opt_str("master_keyfile");
+ let compress = matches.opt_present("c");
+ let encrypt = matches.opt_present("e");
+
+ let vma_file_path = if !matches.free.is_empty() {
+ matches.free[0].clone()
+ } else {
+ print_usage(&program, opts);
+ return;
+ };
+
+ let pbs_password = String::from_utf8(tty::read_password(&"Password: ").unwrap()).unwrap();
+ let key_password = match keyfile {
+ Some(_) => Some(String::from_utf8(tty::read_password(&"Key Password: ").unwrap()).unwrap()),
+ None => None
+ };
+
+ backup_vma_to_pbs(
+ vma_file_path,
+ pbs_repository,
+ vmid,
+ pbs_password,
+ keyfile,
+ key_password,
+ master_keyfile,
+ fingerprint,
+ compress,
+ encrypt
+ );
+}
diff --git a/src/vma.rs b/src/vma.rs
new file mode 100644
index 0000000..4b239c8
--- /dev/null
+++ b/src/vma.rs
@@ -0,0 +1,297 @@
+extern crate md5;
+
+use std::fs::File;
+use std::io::{Read, Seek, SeekFrom};
+use std::mem::size_of;
+use std::{cmp, error, str, result};
+use std::collections::HashMap;
+
+use array_init::array_init;
+use bincode::*;
+use serde::{Deserialize, Serialize};
+use serde_big_array::BigArray;
+
+const VMA_BLOCKS_PER_EXTENT: usize = 59;
+const VMA_MAX_CONFIGS: usize = 256;
+
+#[repr(C)]
+#[derive(Serialize, Deserialize)]
+struct VmaDeviceInfoHeader {
+ pub device_name_offset: u32,
+ reserved: [u8; 4],
+ pub device_size: u64,
+ reserved1: [u8; 16]
+}
+
+#[repr(C)]
+#[derive(Serialize, Deserialize)]
+struct VmaHeader {
+ pub magic: [u8; 4],
+ pub version: u32,
+ pub uuid: [u8; 16],
+ pub ctime: u64,
+ pub md5sum: [u8; 16],
+ pub blob_buffer_offset: u32,
+ pub blob_buffer_size: u32,
+ pub header_size: u32,
+ #[serde(with = "BigArray")]
+ reserved: [u8; 1984],
+ #[serde(with = "BigArray")]
+ pub config_names: [u32; 256],
+ #[serde(with = "BigArray")]
+ pub config_data: [u32; 256],
+ reserved1: [u8; 4],
+ #[serde(with = "BigArray")]
+ pub dev_info: [VmaDeviceInfoHeader; 256]
+}
+
+#[repr(C)]
+#[derive(Serialize, Deserialize)]
+struct VmaBlockInfo {
+ pub mask: u16,
+ reserved: u8,
+ pub dev_id: u8,
+ pub cluster_num: u32
+}
+
+#[repr(C)]
+#[derive(Serialize, Deserialize)]
+struct VmaExtentHeader {
+ pub magic: [u8; 4],
+ reserved: [u8; 2],
+ pub block_count: u16,
+ pub uuid: [u8; 16],
+ pub md5sum: [u8; 16],
+ #[serde(with = "BigArray")]
+ pub blockinfo: [VmaBlockInfo; VMA_BLOCKS_PER_EXTENT]
+}
+
+#[derive(Clone)]
+struct VmaBlockIndexEntry {
+ pub cluster_file_offset: u64,
+ pub mask: u16
+}
+
+pub struct VmaReader {
+ vma_file: File,
+ vma_header: VmaHeader,
+ configs: HashMap<String, String>,
+ block_index: [Vec<VmaBlockIndexEntry>; 256],
+ blocks_are_indexed: bool
+}
+
+impl VmaReader {
+ pub fn new(vma_file_path: &str) -> Self {
+ let mut vma_file = match File::open(vma_file_path) {
+ Err(why) => panic!("couldn't open {}: {}", vma_file_path, why),
+ Ok(file) => file,
+ };
+
+ let vma_header = Self::read_header(&mut vma_file).unwrap();
+ let configs = Self::read_blob_buffer(&mut vma_file, &vma_header).unwrap();
+ let block_index: [Vec<VmaBlockIndexEntry>; 256] = array_init(|_| Vec::new());
+
+ let instance = Self {
+ vma_file,
+ vma_header,
+ configs,
+ block_index,
+ blocks_are_indexed: false
+ };
+
+ return instance;
+ }
+
+ fn read_header(vma_file: &mut File) -> result::Result<VmaHeader, Box<dyn error::Error>> {
+ let mut buffer = vec![0u8; size_of::<VmaHeader>()];
+ vma_file.read_exact(&mut buffer)?;
+
+ let bincode_options = bincode::DefaultOptions::new()
+ .with_fixint_encoding()
+ .with_big_endian();
+
+ let vma_header: VmaHeader = bincode_options.deserialize(&buffer)?;
+
+ if vma_header.magic != [b'V', b'M', b'A', 0] {
+ return Err("Invalid magic number".into());
+ }
+
+ if vma_header.version != 1 {
+ return Err(format!("Invalid VMA version {}", vma_header.version).into());
+ }
+
+ buffer.resize(vma_header.header_size as usize, 0);
+ vma_file.read_exact(&mut buffer[size_of::<VmaHeader>()..])?;
+
+ buffer[32..48].fill(0);
+ let computed_md5sum: [u8; 16] = md5::compute(&buffer).into();
+
+ if vma_header.md5sum != computed_md5sum {
+ return Err("Wrong VMA header checksum".into());
+ }
+
+ return Ok(vma_header);
+ }
+
+ fn read_string_from_file(vma_file: &mut File, file_offset: u64) -> result::Result<String, Box<dyn error::Error>> {
+ let mut size_bytes = [0u8; 2];
+ vma_file.seek(SeekFrom::Start(file_offset))?;
+ vma_file.read_exact(&mut size_bytes)?;
+ let size = u16::from_le_bytes(size_bytes) as usize;
+ let mut string_bytes = vec![0u8; size - 1];
+ vma_file.read_exact(&mut string_bytes)?;
+ let string = str::from_utf8(&string_bytes)?;
+
+ return Ok(string.to_string());
+ }
+
+ fn read_blob_buffer(vma_file: &mut File, vma_header: &VmaHeader) -> result::Result<HashMap<String, String>, Box<dyn error::Error>> {
+ let mut configs = HashMap::new();
+
+ for i in 0..VMA_MAX_CONFIGS {
+ let config_name_offset = vma_header.config_names[i];
+ let config_data_offset = vma_header.config_data[i];
+
+ if config_name_offset == 0 || config_data_offset == 0 {
+ continue;
+ }
+
+ let config_name_file_offset = (vma_header.blob_buffer_offset + config_name_offset) as u64;
+ let config_data_file_offset = (vma_header.blob_buffer_offset + config_data_offset) as u64;
+ let config_name = Self::read_string_from_file(vma_file, config_name_file_offset)?;
+ let config_data = Self::read_string_from_file(vma_file, config_data_file_offset)?;
+
+ configs.insert(String::from(config_name), String::from(config_data));
+ }
+
+ return Ok(configs);
+ }
+
+ pub fn get_configs(&self) -> HashMap<String, String> {
+ return self.configs.clone();
+ }
+
+ pub fn get_device_name(&mut self, device_id: u8) -> Option<String> {
+ let device_name_offset = self.vma_header.dev_info[device_id as usize].device_name_offset;
+
+ if device_name_offset == 0 {
+ return None;
+ }
+
+ let device_name_file_offset = (self.vma_header.blob_buffer_offset + device_name_offset) as u64;
+ let device_name = Self::read_string_from_file(&mut self.vma_file, device_name_file_offset).unwrap();
+
+ return Some(device_name.to_string());
+ }
+
+ pub fn get_device_size(&self, device_id: u8) -> Option<u64> {
+ if self.vma_header.dev_info[device_id as usize].device_name_offset == 0 {
+ return None;
+ }
+
+ return Some(self.vma_header.dev_info[device_id as usize].device_size);
+ }
+
+ fn read_extent_header(vma_file: &mut File) -> result::Result<VmaExtentHeader, Box<dyn error::Error>> {
+ let mut buffer = vec![0u8; size_of::<VmaExtentHeader>()];
+ vma_file.read_exact(&mut buffer)?;
+
+ let bincode_options = bincode::DefaultOptions::new()
+ .with_fixint_encoding()
+ .with_big_endian();
+
+ let vma_extent_header: VmaExtentHeader = bincode_options.deserialize(&buffer)?;
+
+ if vma_extent_header.magic != [b'V', b'M', b'A', b'E'] {
+ return Err("Invalid magic number".into());
+ }
+
+ buffer[24..40].fill(0);
+ let computed_md5sum: [u8; 16] = md5::compute(&buffer).into();
+
+ if vma_extent_header.md5sum != computed_md5sum {
+ return Err("Wrong VMA extent header checksum".into());
+ }
+
+ return Ok(vma_extent_header);
+ }
+
+ fn index_device_clusters(&mut self) -> result::Result<(), Box<dyn error::Error>> {
+ for device_id in 0..255 {
+ let device_size = match self.get_device_size(device_id) {
+ Some(x) => { x }
+ None => { continue; }
+ };
+
+ let device_cluster_count = (device_size + 4096 * 16 - 1) / (4096 * 16);
+
+ let block_index_entry_placeholder = VmaBlockIndexEntry {
+ cluster_file_offset: 0,
+ mask: 0
+ };
+
+ self.block_index[device_id as usize].resize(device_cluster_count as usize, block_index_entry_placeholder);
+ }
+
+ let mut file_offset = self.vma_header.header_size as u64;
+ let vma_file_size = self.vma_file.metadata()?.len();
+
+ while file_offset < vma_file_size {
+ self.vma_file.seek(SeekFrom::Start(file_offset))?;
+ let vma_extent_header = Self::read_extent_header(&mut self.vma_file)?;
+ file_offset += 512;
+
+ for i in 0..59 as usize {
+ let blockinfo = &vma_extent_header.blockinfo[i];
+
+ if blockinfo.dev_id == 0 {
+ continue;
+ }
+
+ let block_index_entry = VmaBlockIndexEntry {
+ cluster_file_offset: file_offset,
+ mask: blockinfo.mask
+ };
+
+ self.block_index[blockinfo.dev_id as usize][blockinfo.cluster_num as usize] = block_index_entry;
+ file_offset += blockinfo.mask.count_ones() as u64 * 4096;
+ }
+ }
+
+ return Ok(());
+ }
+
+ pub fn read_device_contents(&mut self, device_id: u8, buffer: &mut [u8], offset: u64) -> result::Result<bool, Box<dyn error::Error>> {
+ assert_eq!(offset % (4096 * 16), 0);
+
+ // Make sure that the device clusters are already indexed
+ if !self.blocks_are_indexed {
+ self.index_device_clusters()?;
+ }
+
+ let this_device_block_index = &self.block_index[device_id as usize];
+ let length = cmp::min(buffer.len(), this_device_block_index.len() * 4096 * 16 - offset as usize);
+ let mut buffer_offset = 0;
+ let mut buffer_is_zero = true;
+
+ while buffer_offset < length {
+ let block_index_entry = &this_device_block_index[(offset as usize + buffer_offset) / (4096 * 16)];
+ self.vma_file.seek(SeekFrom::Start(block_index_entry.cluster_file_offset))?;
+
+ for i in 0..16 {
+ let current_block_mask = ((block_index_entry.mask >> i) & 1) == 1;
+
+ if current_block_mask {
+ self.vma_file.read_exact(&mut buffer[buffer_offset..(buffer_offset + 4096)])?;
+ buffer_is_zero = false;
+ } else {
+ buffer[buffer_offset..(buffer_offset + 4096)].fill(0);
+ }
+
+ buffer_offset += 4096;
+ }
+ }
+
+ return Ok(buffer_is_zero);
+ }
+}
diff --git a/submodules/proxmox b/submodules/proxmox
new file mode 160000
index 0000000..dc9ee73
--- /dev/null
+++ b/submodules/proxmox
@@ -0,0 +1 @@
+Subproject commit dc9ee737512fc2c7325f47b875d6c69ccf484cea
diff --git a/submodules/proxmox-backup-qemu b/submodules/proxmox-backup-qemu
new file mode 160000
index 0000000..73a09e9
--- /dev/null
+++ b/submodules/proxmox-backup-qemu
@@ -0,0 +1 @@
+Subproject commit 73a09e96720434e4aba7f876f9c6cf56bce58c2c
--
2.39.2
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [pbs-devel] [PATCH vma-to-pbs] Initial commit
2023-09-22 12:41 [pbs-devel] [PATCH vma-to-pbs] Initial commit Filip Schauer
@ 2023-09-26 13:11 ` Wolfgang Bumiller
2023-09-29 13:22 ` Filip Schauer
0 siblings, 1 reply; 6+ messages in thread
From: Wolfgang Bumiller @ 2023-09-26 13:11 UTC (permalink / raw)
To: Filip Schauer; +Cc: pbs-devel
First things first: please use rustfmt (and clippy, I'm pretty sure some
of the things, eg `pbs_err`'s mutability casts, should be caught by it).
Secondly: this is not a library, so it would be perfectly fine to use
`anyhow` for error handling here.
In fact, the panics/unwraps more than likely cause some errors to come
out without any useful context/info, which probably won't produce useful
output when users run into bugs.
That said, a lot of the unwraps are fine, especially all those
string<->C-string conversions... but those are an entire topic anyway
(see further down).
On Fri, Sep 22, 2023 at 02:41:11PM +0200, Filip Schauer wrote:
> Implement a tool to import VMA files into a Proxmox Backup Server
>
> Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
> ---
> I could not assign this to one distinct repository, so I propose a new
> one. In its current state this will not be able to build a deb package
> yet since the debian directory is missing.
A separate repository is probably fine for a standalone binary.
> The program calls extern "C" functions from proxmox-backup-qemu to
> interface with the Backup Server. I chose these functions since those
> are the only abstracted functions I could find.
Ideally we'd have a public-facing version of the `pbs-client` crate at
some point. The `proxmox-backup-qemu` code could be used as a base to
design the public facing API for it. In its _current_ state, I'd
personally describe `pbs-client`'s API as indecipherable and not
suitable for a public facing crate.
On the one hand it would definitely be nice to avoid the intermediate C
api layer, but at the same time this would just double the amount of
code that needs updating when pbs-client changes.
So for now I suppose this would be fine, but I would definitely like to
see an actual usable pbs client crate that ends up replacing
`proxmox-backup-qemu`'s use of the `pbs-client` crate as well. This
should be more maintainable.
Optionally, since you're using proxmox-backup-qemu as a
submodule, we could consider exposing some rust-typed functions there as
an initial public API draft.
Eg. instead of using `proxmox_backup_new_ns()`, its building of
`BackupSetup` could be moved into a `BackupSetup::new()` which could be
a `pub fn` to at least skip over the `str->Cstr->str` chain.
> The proxmox-sys dependency is used for reading passwords from the tty.
>
> .cargo/config | 5 +
> .gitmodules | 6 +
> Cargo.toml | 16 ++
> Makefile | 70 ++++++++
> src/main.rs | 266 +++++++++++++++++++++++++++++
> src/vma.rs | 297 +++++++++++++++++++++++++++++++++
> submodules/proxmox | 1 +
> submodules/proxmox-backup-qemu | 1 +
> 8 files changed, 662 insertions(+)
> create mode 100644 .cargo/config
> create mode 100644 .gitmodules
> create mode 100644 Cargo.toml
> create mode 100644 Makefile
> create mode 100644 src/main.rs
> create mode 100644 src/vma.rs
> create mode 160000 submodules/proxmox
> create mode 160000 submodules/proxmox-backup-qemu
>
> diff --git a/.cargo/config b/.cargo/config
> new file mode 100644
> index 0000000..3b5b6e4
> --- /dev/null
> +++ b/.cargo/config
> @@ -0,0 +1,5 @@
> +[source]
> +[source.debian-packages]
> +directory = "/usr/share/cargo/registry"
> +[source.crates-io]
> +replace-with = "debian-packages"
> diff --git a/.gitmodules b/.gitmodules
> new file mode 100644
> index 0000000..526f5ef
> --- /dev/null
> +++ b/.gitmodules
> @@ -0,0 +1,6 @@
> +[submodule "submodules/proxmox-backup-qemu"]
> + path = submodules/proxmox-backup-qemu
> + url = git://git.proxmox.com/git/proxmox-backup-qemu.git
> +[submodule "submodules/proxmox"]
> + path = submodules/proxmox
> + url = git://git.proxmox.com/git/proxmox.git
^ We don't need proxmox.git as submodule.
> diff --git a/Cargo.toml b/Cargo.toml
> new file mode 100644
> index 0000000..72e2812
> --- /dev/null
> +++ b/Cargo.toml
> @@ -0,0 +1,16 @@
> +[package]
> +name = "vma-to-pbs"
> +version = "0.0.1"
> +authors = ["Filip Schauer <f.schauer@proxmox.com>"]
> +edition = "2021"
> +
> +[dependencies]
Please sort dependencies, and group them so that the submodule
dependencies are a separate group, and our own packages are another.
> +getopts = "0.2"
I'm really not a friend of something that forces path based arguments to
be `String`s.
It accepts `OsStr` as *input* but only produces `String`s as output.
This crate should never ever be used for serious programs!
That said - the VMA files that exist - coming most likely exclusively
from PVE - won't have non-utf8 paths... Still... I can't take this crate
seriously in its current form.
To my knowledge, `clap` is the only argument parser that supports
sticking with `OsStrings` throughout.
> +proxmox-sys = { path = "submodules/proxmox/proxmox-sys" }
> +proxmox-backup-qemu = { path = "submodules/proxmox-backup-qemu" }
> +proxmox-uuid = "1"
> +md5 = "0.7.0"
> +bincode = "1.3"
> +serde = "1.0"
I'm a bit skeptical here. The serde & bincode combination does not allow
for deserializing into a pre-allocated memory location and the structs
here are very large. You're placing some multiple megabytes of data onto
the stack here.
IMO it would be better to just read into a Box<[u8; N]>, cast it to a
Box<T> and do the byte-swapping in-place.
I'll have to think about that. This seems somewhat convenient, but in
the past, with rust, large structs on the stack have been a bit of a
crashy mine-field.
> +serde-big-array = "0.4.1"
> +array-init = "2.0.1"
> diff --git a/src/main.rs b/src/main.rs
> new file mode 100644
> index 0000000..1e8c240
> --- /dev/null
> +++ b/src/main.rs
> @@ -0,0 +1,266 @@
> +extern crate getopts;
> +extern crate proxmox_sys;
> +extern crate proxmox_backup_qemu;
> +
> +use std::time::{SystemTime, UNIX_EPOCH};
> +use std::ffi::{c_char, CStr, CString};
> +use std::env;
> +use std::ptr;
> +
> +use getopts::Options;
> +use proxmox_sys::linux::tty;
> +use proxmox_backup_qemu::*;
> +
> +mod vma;
> +use vma::*;
> +
> +fn print_usage(program: &str, opts: Options) {
This should have 2 variants: when called via `--help` it should print to
`stdout`, when called due to an error it should print to `stderr`.
> + let brief = format!("Usage: {} vma_file [options]", program);
> + print!("{}", opts.usage(&brief));
> +}
> +
> +fn backup_vma_to_pbs(
> + vma_file_path: String,
> + pbs_repository: String,
> + backup_id: String,
> + pbs_password: String,
> + keyfile: Option<String>,
> + key_password: Option<String>,
> + master_keyfile: Option<String>,
> + fingerprint: String,
> + compress: bool,
> + encrypt: bool
> +) {
> + println!("VMA input file: {}", vma_file_path);
> + println!("PBS repository: {}", pbs_repository);
> + println!("PBS fingerprint: {}", fingerprint);
> + println!("compress: {}", compress);
> + println!("encrypt: {}", encrypt);
> +
> + let backup_time = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
> + println!("backup time: {}", backup_time);
> +
> + let pbs_err: *mut c_char = ptr::null_mut();
^ You're actually mutating this, so this should be `let mut pbs_err`.
Otherwise you're basically producing undefined behavior since you cast
it into a mutable value.
All those
&pbs_err as *const _ as *mut _
expressions can in facto simply be
&mut pbs_err
> +
> + let pbs_repository_cstr = CString::new(pbs_repository).unwrap();
> + let backup_id_cstr = CString::new(backup_id).unwrap();
> + let pbs_password_cstr = CString::new(pbs_password).unwrap();
> + let fingerprint_cstr = CString::new(fingerprint).unwrap();
> + let keyfile_cstr = match keyfile {
> + Some(x) => Some(CString::new(x).unwrap()),
> + None => None
> + };
> + let keyfile_ptr = match keyfile_cstr {
> + Some(x) => x.as_ptr(),
> + None => ptr::null()
> + };
^ I think all these pairs are more readable this way:
let keyfile_cstr = keyfile.map(|v| CString::new(v).unwrap());
let keyfile_ptr = keyfile_cstr.map(|v| v.as_ptr()).unwrap_or(ptr::null());
at least it's more concise.
But not a big deal, combinators also take some getting used to and might
not be the more readable variant for everyone. It's a bit annoying that
we need the cstr binding separately for its lifetime, which is another
thing we could avoid with a more rust-y API for the backup client...
> + let key_password_cstr = match key_password {
> + Some(x) => Some(CString::new(x).unwrap()),
> + None => None
> + };
> + let key_password_ptr = match key_password_cstr {
> + Some(x) => x.as_ptr(),
> + None => ptr::null()
> + };
> + let master_keyfile_cstr = match master_keyfile {
> + Some(x) => Some(CString::new(x).unwrap()),
> + None => None
> + };
> + let master_keyfile_ptr = match master_keyfile_cstr {
> + Some(x) => x.as_ptr(),
> + None => ptr::null()
> + };
> +
> + let pbs = proxmox_backup_new_ns(
> + pbs_repository_cstr.as_ptr(),
> + ptr::null(),
> + backup_id_cstr.as_ptr(),
> + backup_time,
> + PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE,
> + pbs_password_cstr.as_ptr(),
> + keyfile_ptr,
> + key_password_ptr,
> + master_keyfile_ptr,
> + true,
> + false,
> + fingerprint_cstr.as_ptr(),
> + &pbs_err as *const _ as *mut _);
1) &mut pbs_err, no cast
2)
Since you're going to be using `panic` below, you should add a wrapper
for the `*mut ProxmoxBackupHandle` which implements `Drop` accordingly.
(Unless you do start adapting `proxmox-backup-qemu`, in which case this
might just become an `Arc<BackupTask>` which already takes care of
this.)
> +
> + if pbs == ptr::null_mut() {
> + unsafe {
> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
> + panic!("proxmox_backup_new_ns failed: {}", pbs_err_cstr.to_str().unwrap());
I think it's nicer to just use Debug formatting (`{pbs_err_cstr:?}`).
Avoids typing out an extra unwrap and can be inlined in the format
string.
> + }
> + }
> +
> + let connect_result = proxmox_backup_connect(pbs, &pbs_err as *const _ as *mut _);
^ &mut pbs_err, no cast
> +
> + if connect_result < 0 {
> + unsafe {
> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
> + panic!("proxmox_backup_connect failed: {}", pbs_err_cstr.to_str().unwrap());
> + }
> + }
> +
> + let mut vma_reader = VmaReader::new(&vma_file_path);
> +
> + // Handle configs
> + let configs = vma_reader.get_configs();
> + for (config_name, config_data) in &configs {
You're not reusing `configs` afterwards, so you can drop the `&`.
That way...
> + println!("CFG: size: {} name: {}", config_data.len(), config_name);
> +
> + let config_name_cstr = CString::new(config_name.as_bytes()).unwrap();
...you can also drop the `.as_bytes()` here since CString::new can also
work directly with a `String` since it takes an `impl Into<Vec<u8>>`.
> +
> + if proxmox_backup_add_config(pbs, config_name_cstr.as_ptr(), config_data.as_ptr(), config_data.len() as u64, &pbs_err as *const _ as *mut _) < 0 {
^ &mut pbs_err, no cast
> + unsafe {
> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
> + panic!("proxmox_backup_add_config failed: {}", pbs_err_cstr.to_str().unwrap());
> + }
> + }
> + }
> +
> + // Handle block devices
> + for device_id in 0..255 {
> + let device_name = match vma_reader.get_device_name(device_id) {
> + Some(x) => { x }
> + None => { continue; }
> + };
> +
> + let device_size = match vma_reader.get_device_size(device_id) {
> + Some(x) => { x }
> + None => { continue; }
> + };
> +
> + println!("DEV: dev_id={} size: {} devname: {}", device_id, device_size, device_name);
> +
> + let device_name_cstr = CString::new(device_name.as_bytes()).unwrap();
^ you can just drop `.as_bytes()` here.
> + let pbs_device_id = proxmox_backup_register_image(pbs, device_name_cstr.as_ptr(), device_size, false, &pbs_err as *const _ as *mut _);
^ &mut pbs_err, no cast
> +
> + if pbs_device_id < 0 {
> + unsafe {
> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
> + panic!("proxmox_backup_register_image failed: {}", pbs_err_cstr.to_str().unwrap());
> + }
> + }
> +
> + let mut image_chunk_buffer = [0u8; PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE as usize];
Please let's not put a 4M buffer onto the stack :-)
Also note that with the way rust works, a debug build of
`Box::new([0; size])` will *also* first build that large buffer on the
stack and *afterwards* allocate and move it.
You can add `proxmox-io` as a dependency and use
`proxmox_io::boxed::zeroed(PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE)` here.
This uses `std::alloc` + `std::ptr::write_bytes` for initialization, or,
if nothing else from that crate will be used, just copy its 2 lines into
a helper around here, "malloc()+bzero()" is not enough magic to justify
a full crate dependency for a single function :-)
> + let mut bytes_transferred = 0;
> +
> + while bytes_transferred < device_size {
> + let bytes_left = device_size - bytes_transferred;
> + let this_chunk_size = if bytes_left < PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE { bytes_left } else { PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE };
^ This is
let this_chunk_size = bytes_left.min(PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE);
> + println!("Uploading dev_id: {} offset: {:#0X} - {:#0X}", device_id, bytes_transferred, bytes_transferred + this_chunk_size);
> +
> + let is_zero_chunk = vma_reader.read_device_contents(device_id, &mut image_chunk_buffer, bytes_transferred).unwrap();
Don't use `unwrap()` on your own functions returning `Results`.
At the very least use `.expect()` to add a bit of context.
But also: anyhow + anyhow::Context trait make more sense.
> + let write_data_result = proxmox_backup_write_data(
> + pbs,
> + pbs_device_id as u8,
> + if is_zero_chunk { ptr::null() } else { image_chunk_buffer.as_ptr() },
> + bytes_transferred,
> + this_chunk_size,
> + &pbs_err as *const _ as *mut _
^ &mut pbs_err, no cast
> + );
> +
> + if write_data_result < 0 {
> + unsafe {
> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
> + panic!("proxmox_backup_write_data failed: {}", pbs_err_cstr.to_str().unwrap());
> + }
> + }
> +
> + bytes_transferred += this_chunk_size;
> + }
> +
> + if proxmox_backup_close_image(pbs, pbs_device_id as u8, &pbs_err as *const _ as *mut _) < 0 {
^ &mut pbs_err, no cast
> + unsafe {
> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
> + panic!("proxmox_backup_close_image failed: {}", pbs_err_cstr.to_str().unwrap());
> + }
> + }
> + }
> +
> + if proxmox_backup_finish(pbs, &pbs_err as *const _ as *mut _) < 0 {
^ &mut pbs_err, no cast
> + unsafe {
> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
> + panic!("proxmox_backup_finish failed: {}", pbs_err_cstr.to_str().unwrap());
> + }
> + }
> +
> + proxmox_backup_disconnect(pbs);
> +}
> +
> +fn main() {
> + let args: Vec<String> = env::args().collect();
> + let program = args[0].clone();
> +
> + let mut opts = Options::new();
> + opts.optopt("", "repository", "Repository URL", "<auth_id>@<host>:<port>:<datastore>");
> + opts.optopt("", "vmid", "Backup ID", "VMID");
> + opts.optopt("", "fingerprint", "Proxmox Backup Server Fingerprint", "FINGERPRINT");
> + opts.optopt("", "keyfile", "Key file", "KEYFILE");
> + opts.optopt("", "master_keyfile", "Master key file", "MASTER_KEYFILE");
> + opts.optflag("c", "compress", "Compress the Backup");
> + opts.optflag("e", "encrypt", "Encrypt the Backup");
> + opts.optflag("h", "help", "print this help menu");
> + let matches = match opts.parse(&args[1..]) {
> + Ok(m) => { m }
> + Err(f) => { panic!("{}", f.to_string()) }
> + };
> +
> + if matches.opt_present("h") {
> + print_usage(&program, opts);
> + return;
Side note:
Since both `main` and `print_usage` return `()` you could just
return print_usage(...);
> + }
> +
> + if !matches.opt_present("repository") {
> + println!("missing --repository option");
^ should be `eprintln`
> + print_usage(&program, opts);
> + return;
IMO it would be friendlier to the user if we don't return here just yet,
but just set an `error` boolean and check the remaining required
arguments first, printing all the missing arguments at once, and only
then print the usage & exit.
> + }
> + let pbs_repository = matches.opt_str("repository").unwrap();
Also, instead of `opt_present` followed by `opt_str().unwrap()`, better
use only `opt_str` with a `match` statement.
Eg.
let mut error = false;
let mut get_required_opt = |name| match matches.opt_str(name) {
Some(value) => value,
None => {
error = true;
eprintln!("missing --{name} option");
String::new() // don't care about the value
}
};
let pbs_repository = get_required_opt("repository");
let vmid = get_required_opt("vmid");
let fingerprint = get_required_opt("fingerprint");
if error {
return print_usage(&program, opts);
}
> +
> + if !matches.opt_present("vmid") {
> + println!("missing --vmid option");
> + print_usage(&program, opts);
> + return;
> + }
> + let vmid = matches.opt_str("vmid").unwrap();
> +
> + if !matches.opt_present("fingerprint") {
> + println!("missing --fingerprint option");
> + print_usage(&program, opts);
> + return;
> + }
> + let fingerprint = matches.opt_str("fingerprint").unwrap();
> +
> + let keyfile = matches.opt_str("keyfile");
> + let master_keyfile = matches.opt_str("master_keyfile");
> + let compress = matches.opt_present("c");
> + let encrypt = matches.opt_present("e");
> +
> + let vma_file_path = if !matches.free.is_empty() {
^ We should probably check the exact len of `.free`, since *multiple*
free arguments are also an error, or at least not handled at all?
> + matches.free[0].clone()
> + } else {
> + print_usage(&program, opts);
> + return;
> + };
> +
> + let pbs_password = String::from_utf8(tty::read_password(&"Password: ").unwrap()).unwrap();
> + let key_password = match keyfile {
> + Some(_) => Some(String::from_utf8(tty::read_password(&"Key Password: ").unwrap()).unwrap()),
> + None => None
> + };
> +
> + backup_vma_to_pbs(
> + vma_file_path,
> + pbs_repository,
> + vmid,
> + pbs_password,
> + keyfile,
> + key_password,
> + master_keyfile,
> + fingerprint,
> + compress,
> + encrypt
> + );
> +}
> diff --git a/src/vma.rs b/src/vma.rs
> new file mode 100644
> index 0000000..4b239c8
> --- /dev/null
> +++ b/src/vma.rs
> @@ -0,0 +1,297 @@
> +extern crate md5;
> +
> +use std::fs::File;
> +use std::io::{Read, Seek, SeekFrom};
> +use std::mem::size_of;
> +use std::{cmp, error, str, result};
please drop `result` from this list.
> +use std::collections::HashMap;
> +
> +use array_init::array_init;
> +use bincode::*;
^ Please don't do `*` imports.
This is the only reason you need the `std::result` import, but you never
actually use `bincode::Result`, in fact, you actually only use
`bincode::Options`.
> +use serde::{Deserialize, Serialize};
> +use serde_big_array::BigArray;
> +
> +const VMA_BLOCKS_PER_EXTENT: usize = 59;
> +const VMA_MAX_CONFIGS: usize = 256;
> +
> +#[repr(C)]
> +#[derive(Serialize, Deserialize)]
> +struct VmaDeviceInfoHeader {
> + pub device_name_offset: u32,
> + reserved: [u8; 4],
> + pub device_size: u64,
> + reserved1: [u8; 16]
> +}
> +
> +#[repr(C)]
> +#[derive(Serialize, Deserialize)]
> +struct VmaHeader {
> + pub magic: [u8; 4],
> + pub version: u32,
> + pub uuid: [u8; 16],
> + pub ctime: u64,
> + pub md5sum: [u8; 16],
> + pub blob_buffer_offset: u32,
> + pub blob_buffer_size: u32,
> + pub header_size: u32,
> + #[serde(with = "BigArray")]
> + reserved: [u8; 1984],
> + #[serde(with = "BigArray")]
> + pub config_names: [u32; 256],
> + #[serde(with = "BigArray")]
> + pub config_data: [u32; 256],
^ These arrays should use `VMA_MAX_CONFIGS` instead of `256`.
> + reserved1: [u8; 4],
> + #[serde(with = "BigArray")]
> + pub dev_info: [VmaDeviceInfoHeader; 256]
Should probably have a `VMA_MAX_DEVICES` here.
> +}
> +
> +#[repr(C)]
> +#[derive(Serialize, Deserialize)]
> +struct VmaBlockInfo {
> + pub mask: u16,
> + reserved: u8,
> + pub dev_id: u8,
> + pub cluster_num: u32
> +}
> +
> +#[repr(C)]
> +#[derive(Serialize, Deserialize)]
> +struct VmaExtentHeader {
> + pub magic: [u8; 4],
> + reserved: [u8; 2],
> + pub block_count: u16,
> + pub uuid: [u8; 16],
> + pub md5sum: [u8; 16],
> + #[serde(with = "BigArray")]
> + pub blockinfo: [VmaBlockInfo; VMA_BLOCKS_PER_EXTENT]
> +}
> +
> +#[derive(Clone)]
> +struct VmaBlockIndexEntry {
> + pub cluster_file_offset: u64,
> + pub mask: u16
> +}
> +
> +pub struct VmaReader {
> + vma_file: File,
> + vma_header: VmaHeader,
> + configs: HashMap<String, String>,
> + block_index: [Vec<VmaBlockIndexEntry>; 256],
IMO this should be either a `Vec<Vec<VmaBlockIndexEntry>>` with checked
`.get()` calls, or at least `Box<>` the array, as `VmaRead` is now quite
a huge thing on the stack.
Alternatively the whole `VmaReader` could be boxed.
> + blocks_are_indexed: bool
> +}
> +
> +impl VmaReader {
> + pub fn new(vma_file_path: &str) -> Self {
> + let mut vma_file = match File::open(vma_file_path) {
> + Err(why) => panic!("couldn't open {}: {}", vma_file_path, why),
> + Ok(file) => file,
> + };
> +
> + let vma_header = Self::read_header(&mut vma_file).unwrap();
> + let configs = Self::read_blob_buffer(&mut vma_file, &vma_header).unwrap();
> + let block_index: [Vec<VmaBlockIndexEntry>; 256] = array_init(|_| Vec::new());
> +
> + let instance = Self {
> + vma_file,
> + vma_header,
> + configs,
> + block_index,
> + blocks_are_indexed: false
> + };
> +
> + return instance;
> + }
> +
> + fn read_header(vma_file: &mut File) -> result::Result<VmaHeader, Box<dyn error::Error>> {
> + let mut buffer = vec![0u8; size_of::<VmaHeader>()];
Better to `Vec::with_capacity()` and `.resize(size, 0)`. The above code
will create a temporary `VmaHeader`-sized zero-buffer on the stack
before moving it. In debug builds this can even be notice-ably slow. At
least in older rust versions building large [0u8; N] could take a
visible amount of time (depending on N even seconds at times...)
> + vma_file.read_exact(&mut buffer)?;
> +
> + let bincode_options = bincode::DefaultOptions::new()
> + .with_fixint_encoding()
> + .with_big_endian();
> +
> + let vma_header: VmaHeader = bincode_options.deserialize(&buffer)?;
> +
> + if vma_header.magic != [b'V', b'M', b'A', 0] {
> + return Err("Invalid magic number".into());
> + }
> +
> + if vma_header.version != 1 {
> + return Err(format!("Invalid VMA version {}", vma_header.version).into());
> + }
> +
> + buffer.resize(vma_header.header_size as usize, 0);
> + vma_file.read_exact(&mut buffer[size_of::<VmaHeader>()..])?;
> +
> + buffer[32..48].fill(0);
Might be worth mentioning that this is the md5 sum in the struct. In
fact, a helper casting a buffer to `&mut VmaHeader` and using the actual
member access to zero it out might be nice, but not necessary since
these are never going to "move". So feel free to just add a comment
instead, but a comment is definitely needed ;-)
> + let computed_md5sum: [u8; 16] = md5::compute(&buffer).into();
> +
> + if vma_header.md5sum != computed_md5sum {
> + return Err("Wrong VMA header checksum".into());
> + }
> +
> + return Ok(vma_header);
> + }
> +
> + fn read_string_from_file(vma_file: &mut File, file_offset: u64) -> result::Result<String, Box<dyn error::Error>> {
> + let mut size_bytes = [0u8; 2];
> + vma_file.seek(SeekFrom::Start(file_offset))?;
> + vma_file.read_exact(&mut size_bytes)?;
> + let size = u16::from_le_bytes(size_bytes) as usize;
> + let mut string_bytes = vec![0u8; size - 1];
> + vma_file.read_exact(&mut string_bytes)?;
> + let string = str::from_utf8(&string_bytes)?;
> +
> + return Ok(string.to_string());
> + }
> +
> + fn read_blob_buffer(vma_file: &mut File, vma_header: &VmaHeader) -> result::Result<HashMap<String, String>, Box<dyn error::Error>> {
> + let mut configs = HashMap::new();
> +
> + for i in 0..VMA_MAX_CONFIGS {
> + let config_name_offset = vma_header.config_names[i];
> + let config_data_offset = vma_header.config_data[i];
> +
> + if config_name_offset == 0 || config_data_offset == 0 {
> + continue;
> + }
> +
> + let config_name_file_offset = (vma_header.blob_buffer_offset + config_name_offset) as u64;
> + let config_data_file_offset = (vma_header.blob_buffer_offset + config_data_offset) as u64;
> + let config_name = Self::read_string_from_file(vma_file, config_name_file_offset)?;
> + let config_data = Self::read_string_from_file(vma_file, config_data_file_offset)?;
> +
> + configs.insert(String::from(config_name), String::from(config_data));
> + }
> +
> + return Ok(configs);
> + }
> +
> + pub fn get_configs(&self) -> HashMap<String, String> {
> + return self.configs.clone();
> + }
> +
> + pub fn get_device_name(&mut self, device_id: u8) -> Option<String> {
^ API-wise I don't think it makes much sense to encode the 256 limit
into the type here. You can get rid of a *lot* of `as usize` casts if
this is just an usize itself. But of course, you'd need to validate the
value then, but at least it can then be explicitly tied to a named MAX_*
constant to explain the limit.
> + let device_name_offset = self.vma_header.dev_info[device_id as usize].device_name_offset;
> +
> + if device_name_offset == 0 {
> + return None;
> + }
> +
> + let device_name_file_offset = (self.vma_header.blob_buffer_offset + device_name_offset) as u64;
> + let device_name = Self::read_string_from_file(&mut self.vma_file, device_name_file_offset).unwrap();
> +
> + return Some(device_name.to_string());
> + }
> +
> + pub fn get_device_size(&self, device_id: u8) -> Option<u64> {
Much less typing if you first
let dev_info = &self.vma_header.dev_info[device_id as usize];
and not duplicate the `self.vma_header.dev_info[device_id as usize]`
bit...
> + if self.vma_header.dev_info[device_id as usize].device_name_offset == 0 {
> + return None;
> + }
> +
> + return Some(self.vma_header.dev_info[device_id as usize].device_size);
> + }
> +
> + fn read_extent_header(vma_file: &mut File) -> result::Result<VmaExtentHeader, Box<dyn error::Error>> {
> + let mut buffer = vec![0u8; size_of::<VmaExtentHeader>()];
Again, please manage the buffer as mentioned above in `read_header`.
> + vma_file.read_exact(&mut buffer)?;
> +
> + let bincode_options = bincode::DefaultOptions::new()
> + .with_fixint_encoding()
> + .with_big_endian();
> +
> + let vma_extent_header: VmaExtentHeader = bincode_options.deserialize(&buffer)?;
> +
> + if vma_extent_header.magic != [b'V', b'M', b'A', b'E'] {
> + return Err("Invalid magic number".into());
> + }
> +
> + buffer[24..40].fill(0);
^ Same here, comment needed
> + let computed_md5sum: [u8; 16] = md5::compute(&buffer).into();
> +
> + if vma_extent_header.md5sum != computed_md5sum {
> + return Err("Wrong VMA extent header checksum".into());
> + }
> +
> + return Ok(vma_extent_header);
> + }
> +
> + fn index_device_clusters(&mut self) -> result::Result<(), Box<dyn error::Error>> {
> + for device_id in 0..255 {
> + let device_size = match self.get_device_size(device_id) {
> + Some(x) => { x }
> + None => { continue; }
> + };
> +
> + let device_cluster_count = (device_size + 4096 * 16 - 1) / (4096 * 16);
> +
> + let block_index_entry_placeholder = VmaBlockIndexEntry {
> + cluster_file_offset: 0,
> + mask: 0
> + };
> +
> + self.block_index[device_id as usize].resize(device_cluster_count as usize, block_index_entry_placeholder);
> + }
> +
> + let mut file_offset = self.vma_header.header_size as u64;
> + let vma_file_size = self.vma_file.metadata()?.len();
> +
> + while file_offset < vma_file_size {
> + self.vma_file.seek(SeekFrom::Start(file_offset))?;
> + let vma_extent_header = Self::read_extent_header(&mut self.vma_file)?;
> + file_offset += 512;
^ Should be able to use `size_of::<VmaExtentHeader>()` here, no?
> +
> + for i in 0..59 as usize {
^ Should use VMA_BLOCKS_PER_EXTENT instead of 59.
> + let blockinfo = &vma_extent_header.blockinfo[i];
> +
> + if blockinfo.dev_id == 0 {
> + continue;
> + }
> +
> + let block_index_entry = VmaBlockIndexEntry {
> + cluster_file_offset: file_offset,
> + mask: blockinfo.mask
> + };
> +
> + self.block_index[blockinfo.dev_id as usize][blockinfo.cluster_num as usize] = block_index_entry;
> + file_offset += blockinfo.mask.count_ones() as u64 * 4096;
> + }
> + }
> +
> + return Ok(());
> + }
> +
> + pub fn read_device_contents(&mut self, device_id: u8, buffer: &mut [u8], offset: u64) -> result::Result<bool, Box<dyn error::Error>> {
> + assert_eq!(offset % (4096 * 16), 0);
And another reason for anyhow over `panic!` based error handling: I'm
not sure the above is supposed to be checked on release builds as well?
> +
> + // Make sure that the device clusters are already indexed
> + if !self.blocks_are_indexed {
> + self.index_device_clusters()?;
> + }
> +
> + let this_device_block_index = &self.block_index[device_id as usize];
> + let length = cmp::min(buffer.len(), this_device_block_index.len() * 4096 * 16 - offset as usize);
> + let mut buffer_offset = 0;
> + let mut buffer_is_zero = true;
> +
> + while buffer_offset < length {
> + let block_index_entry = &this_device_block_index[(offset as usize + buffer_offset) / (4096 * 16)];
> + self.vma_file.seek(SeekFrom::Start(block_index_entry.cluster_file_offset))?;
> +
> + for i in 0..16 {
> + let current_block_mask = ((block_index_entry.mask >> i) & 1) == 1;
> +
> + if current_block_mask {
> + self.vma_file.read_exact(&mut buffer[buffer_offset..(buffer_offset + 4096)])?;
it's possible to have a device that isn't exactly a multiple of
4096, meaning its very final block may be smaller, so the device size
needs to always be taken into account when reading such blocks.
You can produce such an image via `pvesm alloc` as it takes a multiple
of `1024` for its size, eg:
# pvesm alloc local 100 vm-100-bad.raw $[(1<<20) + 1]
Will produce a disk that is 1G + 1024 bytes.
Now check what your tool does with the file
- while the final blocks are still zero
- then fill those blocks with data and check again.
> + buffer_is_zero = false;
> + } else {
> + buffer[buffer_offset..(buffer_offset + 4096)].fill(0);
> + }
> +
> + buffer_offset += 4096;
> + }
> + }
> +
> + return Ok(buffer_is_zero);
> + }
> +}
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [pbs-devel] [PATCH vma-to-pbs] Initial commit
2023-09-26 13:11 ` Wolfgang Bumiller
@ 2023-09-29 13:22 ` Filip Schauer
2023-09-29 13:38 ` Lukas Wagner
0 siblings, 1 reply; 6+ messages in thread
From: Filip Schauer @ 2023-09-29 13:22 UTC (permalink / raw)
To: Wolfgang Bumiller; +Cc: pbs-devel
Patch v2 was submitted here:
https://lists.proxmox.com/pipermail/pbs-devel/2023-September/006760.html
Some additional remarks were added below.
On 26/09/2023 15:11, Wolfgang Bumiller wrote:
> First things first: please use rustfmt (and clippy, I'm pretty sure some
> of the things, eg `pbs_err`'s mutability casts, should be caught by it).
>
> Secondly: this is not a library, so it would be perfectly fine to use
> `anyhow` for error handling here.
> In fact, the panics/unwraps more than likely cause some errors to come
> out without any useful context/info, which probably won't produce useful
> output when users run into bugs.
> That said, a lot of the unwraps are fine, especially all those
> string<->C-string conversions... but those are an entire topic anyway
> (see further down).
>
> On Fri, Sep 22, 2023 at 02:41:11PM +0200, Filip Schauer wrote:
>> Implement a tool to import VMA files into a Proxmox Backup Server
>>
>> Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
>> ---
>> I could not assign this to one distinct repository, so I propose a new
>> one. In its current state this will not be able to build a deb package
>> yet since the debian directory is missing.
> A separate repository is probably fine for a standalone binary.
>
>> The program calls extern "C" functions from proxmox-backup-qemu to
>> interface with the Backup Server. I chose these functions since those
>> are the only abstracted functions I could find.
> Ideally we'd have a public-facing version of the `pbs-client` crate at
> some point. The `proxmox-backup-qemu` code could be used as a base to
> design the public facing API for it. In its _current_ state, I'd
> personally describe `pbs-client`'s API as indecipherable and not
> suitable for a public facing crate.
>
> On the one hand it would definitely be nice to avoid the intermediate C
> api layer, but at the same time this would just double the amount of
> code that needs updating when pbs-client changes.
>
> So for now I suppose this would be fine, but I would definitely like to
> see an actual usable pbs client crate that ends up replacing
> `proxmox-backup-qemu`'s use of the `pbs-client` crate as well. This
> should be more maintainable.
>
> Optionally, since you're using proxmox-backup-qemu as a
> submodule, we could consider exposing some rust-typed functions there as
> an initial public API draft.
> Eg. instead of using `proxmox_backup_new_ns()`, its building of
> `BackupSetup` could be moved into a `BackupSetup::new()` which could be
> a `pub fn` to at least skip over the `str->Cstr->str` chain.
>
>> The proxmox-sys dependency is used for reading passwords from the tty.
>>
>> .cargo/config | 5 +
>> .gitmodules | 6 +
>> Cargo.toml | 16 ++
>> Makefile | 70 ++++++++
>> src/main.rs | 266 +++++++++++++++++++++++++++++
>> src/vma.rs | 297 +++++++++++++++++++++++++++++++++
>> submodules/proxmox | 1 +
>> submodules/proxmox-backup-qemu | 1 +
>> 8 files changed, 662 insertions(+)
>> create mode 100644 .cargo/config
>> create mode 100644 .gitmodules
>> create mode 100644 Cargo.toml
>> create mode 100644 Makefile
>> create mode 100644 src/main.rs
>> create mode 100644 src/vma.rs
>> create mode 160000 submodules/proxmox
>> create mode 160000 submodules/proxmox-backup-qemu
>>
>> diff --git a/.cargo/config b/.cargo/config
>> new file mode 100644
>> index 0000000..3b5b6e4
>> --- /dev/null
>> +++ b/.cargo/config
>> @@ -0,0 +1,5 @@
>> +[source]
>> +[source.debian-packages]
>> +directory = "/usr/share/cargo/registry"
>> +[source.crates-io]
>> +replace-with = "debian-packages"
>> diff --git a/.gitmodules b/.gitmodules
>> new file mode 100644
>> index 0000000..526f5ef
>> --- /dev/null
>> +++ b/.gitmodules
>> @@ -0,0 +1,6 @@
>> +[submodule "submodules/proxmox-backup-qemu"]
>> + path = submodules/proxmox-backup-qemu
>> + url = git://git.proxmox.com/git/proxmox-backup-qemu.git
>> +[submodule "submodules/proxmox"]
>> + path = submodules/proxmox
>> + url = git://git.proxmox.com/git/proxmox.git
> ^ We don't need proxmox.git as submodule.
We use proxmox_sys::linux::tty::read_password.
>> diff --git a/Cargo.toml b/Cargo.toml
>> new file mode 100644
>> index 0000000..72e2812
>> --- /dev/null
>> +++ b/Cargo.toml
>> @@ -0,0 +1,16 @@
>> +[package]
>> +name = "vma-to-pbs"
>> +version = "0.0.1"
>> +authors = ["Filip Schauer <f.schauer@proxmox.com>"]
>> +edition = "2021"
>> +
>> +[dependencies]
> Please sort dependencies, and group them so that the submodule
> dependencies are a separate group, and our own packages are another.
>
>> +getopts = "0.2"
> I'm really not a friend of something that forces path based arguments to
> be `String`s.
> It accepts `OsStr` as *input* but only produces `String`s as output.
> This crate should never ever be used for serious programs!
> That said - the VMA files that exist - coming most likely exclusively
> from PVE - won't have non-utf8 paths... Still... I can't take this crate
> seriously in its current form.
> To my knowledge, `clap` is the only argument parser that supports
> sticking with `OsStrings` throughout.
>
>> +proxmox-sys = { path = "submodules/proxmox/proxmox-sys" }
>> +proxmox-backup-qemu = { path = "submodules/proxmox-backup-qemu" }
>> +proxmox-uuid = "1"
>> +md5 = "0.7.0"
>> +bincode = "1.3"
>> +serde = "1.0"
> I'm a bit skeptical here. The serde & bincode combination does not allow
> for deserializing into a pre-allocated memory location and the structs
> here are very large. You're placing some multiple megabytes of data onto
> the stack here.
> IMO it would be better to just read into a Box<[u8; N]>, cast it to a
> Box<T> and do the byte-swapping in-place.
> I'll have to think about that. This seems somewhat convenient, but in
> the past, with rust, large structs on the stack have been a bit of a
> crashy mine-field.
I am deserializing VmaHeader which is only 12288 bytes in size and
VmaExtentHeader which is only 512 bytes in size.
After turning block_index in VmaReader into a Vec the stack usage of the
entire program never exceeded 500 KiB, regardless of the size of the
disks.
>> +serde-big-array = "0.4.1"
>> +array-init = "2.0.1"
>> diff --git a/src/main.rs b/src/main.rs
>> new file mode 100644
>> index 0000000..1e8c240
>> --- /dev/null
>> +++ b/src/main.rs
>> @@ -0,0 +1,266 @@
>> +extern crate getopts;
>> +extern crate proxmox_sys;
>> +extern crate proxmox_backup_qemu;
>> +
>> +use std::time::{SystemTime, UNIX_EPOCH};
>> +use std::ffi::{c_char, CStr, CString};
>> +use std::env;
>> +use std::ptr;
>> +
>> +use getopts::Options;
>> +use proxmox_sys::linux::tty;
>> +use proxmox_backup_qemu::*;
>> +
>> +mod vma;
>> +use vma::*;
>> +
>> +fn print_usage(program: &str, opts: Options) {
> This should have 2 variants: when called via `--help` it should print to
> `stdout`, when called due to an error it should print to `stderr`.
>
>> + let brief = format!("Usage: {} vma_file [options]", program);
>> + print!("{}", opts.usage(&brief));
>> +}
>> +
>> +fn backup_vma_to_pbs(
>> + vma_file_path: String,
>> + pbs_repository: String,
>> + backup_id: String,
>> + pbs_password: String,
>> + keyfile: Option<String>,
>> + key_password: Option<String>,
>> + master_keyfile: Option<String>,
>> + fingerprint: String,
>> + compress: bool,
>> + encrypt: bool
>> +) {
>> + println!("VMA input file: {}", vma_file_path);
>> + println!("PBS repository: {}", pbs_repository);
>> + println!("PBS fingerprint: {}", fingerprint);
>> + println!("compress: {}", compress);
>> + println!("encrypt: {}", encrypt);
>> +
>> + let backup_time = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
>> + println!("backup time: {}", backup_time);
>> +
>> + let pbs_err: *mut c_char = ptr::null_mut();
> ^ You're actually mutating this, so this should be `let mut pbs_err`.
> Otherwise you're basically producing undefined behavior since you cast
> it into a mutable value.
> All those
> &pbs_err as *const _ as *mut _
> expressions can in facto simply be
> &mut pbs_err
>
>> +
>> + let pbs_repository_cstr = CString::new(pbs_repository).unwrap();
>> + let backup_id_cstr = CString::new(backup_id).unwrap();
>> + let pbs_password_cstr = CString::new(pbs_password).unwrap();
>> + let fingerprint_cstr = CString::new(fingerprint).unwrap();
>> + let keyfile_cstr = match keyfile {
>> + Some(x) => Some(CString::new(x).unwrap()),
>> + None => None
>> + };
>> + let keyfile_ptr = match keyfile_cstr {
>> + Some(x) => x.as_ptr(),
>> + None => ptr::null()
>> + };
> ^ I think all these pairs are more readable this way:
>
> let keyfile_cstr = keyfile.map(|v| CString::new(v).unwrap());
> let keyfile_ptr = keyfile_cstr.map(|v| v.as_ptr()).unwrap_or(ptr::null());
>
> at least it's more concise.
> But not a big deal, combinators also take some getting used to and might
> not be the more readable variant for everyone. It's a bit annoying that
> we need the cstr binding separately for its lifetime, which is another
> thing we could avoid with a more rust-y API for the backup client...
>
>> + let key_password_cstr = match key_password {
>> + Some(x) => Some(CString::new(x).unwrap()),
>> + None => None
>> + };
>> + let key_password_ptr = match key_password_cstr {
>> + Some(x) => x.as_ptr(),
>> + None => ptr::null()
>> + };
>> + let master_keyfile_cstr = match master_keyfile {
>> + Some(x) => Some(CString::new(x).unwrap()),
>> + None => None
>> + };
>> + let master_keyfile_ptr = match master_keyfile_cstr {
>> + Some(x) => x.as_ptr(),
>> + None => ptr::null()
>> + };
>> +
>> + let pbs = proxmox_backup_new_ns(
>> + pbs_repository_cstr.as_ptr(),
>> + ptr::null(),
>> + backup_id_cstr.as_ptr(),
>> + backup_time,
>> + PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE,
>> + pbs_password_cstr.as_ptr(),
>> + keyfile_ptr,
>> + key_password_ptr,
>> + master_keyfile_ptr,
>> + true,
>> + false,
>> + fingerprint_cstr.as_ptr(),
>> + &pbs_err as *const _ as *mut _);
> 1) &mut pbs_err, no cast
>
> 2)
> Since you're going to be using `panic` below, you should add a wrapper
> for the `*mut ProxmoxBackupHandle` which implements `Drop` accordingly.
> (Unless you do start adapting `proxmox-backup-qemu`, in which case this
> might just become an `Arc<BackupTask>` which already takes care of
> this.)
Implementing a Drop wrapper is unfortunately not possible because
ProxmoxBackupHandle is private. But the same can be achieved with defer.
>> +
>> + if pbs == ptr::null_mut() {
>> + unsafe {
>> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
>> + panic!("proxmox_backup_new_ns failed: {}", pbs_err_cstr.to_str().unwrap());
> I think it's nicer to just use Debug formatting (`{pbs_err_cstr:?}`).
> Avoids typing out an extra unwrap and can be inlined in the format
> string.
>
>> + }
>> + }
>> +
>> + let connect_result = proxmox_backup_connect(pbs, &pbs_err as *const _ as *mut _);
> ^ &mut pbs_err, no cast
>
>> +
>> + if connect_result < 0 {
>> + unsafe {
>> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
>> + panic!("proxmox_backup_connect failed: {}", pbs_err_cstr.to_str().unwrap());
>> + }
>> + }
>> +
>> + let mut vma_reader = VmaReader::new(&vma_file_path);
>> +
>> + // Handle configs
>> + let configs = vma_reader.get_configs();
>> + for (config_name, config_data) in &configs {
> You're not reusing `configs` afterwards, so you can drop the `&`.
> That way...
>
>> + println!("CFG: size: {} name: {}", config_data.len(), config_name);
>> +
>> + let config_name_cstr = CString::new(config_name.as_bytes()).unwrap();
> ...you can also drop the `.as_bytes()` here since CString::new can also
> work directly with a `String` since it takes an `impl Into<Vec<u8>>`.
>
>> +
>> + if proxmox_backup_add_config(pbs, config_name_cstr.as_ptr(), config_data.as_ptr(), config_data.len() as u64, &pbs_err as *const _ as *mut _) < 0 {
> ^ &mut pbs_err, no cast
>
>> + unsafe {
>> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
>> + panic!("proxmox_backup_add_config failed: {}", pbs_err_cstr.to_str().unwrap());
>> + }
>> + }
>> + }
>> +
>> + // Handle block devices
>> + for device_id in 0..255 {
>> + let device_name = match vma_reader.get_device_name(device_id) {
>> + Some(x) => { x }
>> + None => { continue; }
>> + };
>> +
>> + let device_size = match vma_reader.get_device_size(device_id) {
>> + Some(x) => { x }
>> + None => { continue; }
>> + };
>> +
>> + println!("DEV: dev_id={} size: {} devname: {}", device_id, device_size, device_name);
>> +
>> + let device_name_cstr = CString::new(device_name.as_bytes()).unwrap();
> ^ you can just drop `.as_bytes()` here.
>
>> + let pbs_device_id = proxmox_backup_register_image(pbs, device_name_cstr.as_ptr(), device_size, false, &pbs_err as *const _ as *mut _);
> ^ &mut pbs_err, no cast
>
>> +
>> + if pbs_device_id < 0 {
>> + unsafe {
>> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
>> + panic!("proxmox_backup_register_image failed: {}", pbs_err_cstr.to_str().unwrap());
>> + }
>> + }
>> +
>> + let mut image_chunk_buffer = [0u8; PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE as usize];
> Please let's not put a 4M buffer onto the stack :-)
> Also note that with the way rust works, a debug build of
> `Box::new([0; size])` will *also* first build that large buffer on the
> stack and *afterwards* allocate and move it.
>
> You can add `proxmox-io` as a dependency and use
> `proxmox_io::boxed::zeroed(PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE)` here.
> This uses `std::alloc` + `std::ptr::write_bytes` for initialization, or,
> if nothing else from that crate will be used, just copy its 2 lines into
> a helper around here, "malloc()+bzero()" is not enough magic to justify
> a full crate dependency for a single function :-)
>
>> + let mut bytes_transferred = 0;
>> +
>> + while bytes_transferred < device_size {
>> + let bytes_left = device_size - bytes_transferred;
>> + let this_chunk_size = if bytes_left < PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE { bytes_left } else { PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE };
> ^ This is
> let this_chunk_size = bytes_left.min(PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE);
>
>> + println!("Uploading dev_id: {} offset: {:#0X} - {:#0X}", device_id, bytes_transferred, bytes_transferred + this_chunk_size);
>> +
>> + let is_zero_chunk = vma_reader.read_device_contents(device_id, &mut image_chunk_buffer, bytes_transferred).unwrap();
> Don't use `unwrap()` on your own functions returning `Results`.
> At the very least use `.expect()` to add a bit of context.
> But also: anyhow + anyhow::Context trait make more sense.
>
>> + let write_data_result = proxmox_backup_write_data(
>> + pbs,
>> + pbs_device_id as u8,
>> + if is_zero_chunk { ptr::null() } else { image_chunk_buffer.as_ptr() },
>> + bytes_transferred,
>> + this_chunk_size,
>> + &pbs_err as *const _ as *mut _
> ^ &mut pbs_err, no cast
>
>> + );
>> +
>> + if write_data_result < 0 {
>> + unsafe {
>> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
>> + panic!("proxmox_backup_write_data failed: {}", pbs_err_cstr.to_str().unwrap());
>> + }
>> + }
>> +
>> + bytes_transferred += this_chunk_size;
>> + }
>> +
>> + if proxmox_backup_close_image(pbs, pbs_device_id as u8, &pbs_err as *const _ as *mut _) < 0 {
> ^ &mut pbs_err, no cast
>
>> + unsafe {
>> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
>> + panic!("proxmox_backup_close_image failed: {}", pbs_err_cstr.to_str().unwrap());
>> + }
>> + }
>> + }
>> +
>> + if proxmox_backup_finish(pbs, &pbs_err as *const _ as *mut _) < 0 {
> ^ &mut pbs_err, no cast
>
>> + unsafe {
>> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
>> + panic!("proxmox_backup_finish failed: {}", pbs_err_cstr.to_str().unwrap());
>> + }
>> + }
>> +
>> + proxmox_backup_disconnect(pbs);
>> +}
>> +
>> +fn main() {
>> + let args: Vec<String> = env::args().collect();
>> + let program = args[0].clone();
>> +
>> + let mut opts = Options::new();
>> + opts.optopt("", "repository", "Repository URL", "<auth_id>@<host>:<port>:<datastore>");
>> + opts.optopt("", "vmid", "Backup ID", "VMID");
>> + opts.optopt("", "fingerprint", "Proxmox Backup Server Fingerprint", "FINGERPRINT");
>> + opts.optopt("", "keyfile", "Key file", "KEYFILE");
>> + opts.optopt("", "master_keyfile", "Master key file", "MASTER_KEYFILE");
>> + opts.optflag("c", "compress", "Compress the Backup");
>> + opts.optflag("e", "encrypt", "Encrypt the Backup");
>> + opts.optflag("h", "help", "print this help menu");
>> + let matches = match opts.parse(&args[1..]) {
>> + Ok(m) => { m }
>> + Err(f) => { panic!("{}", f.to_string()) }
>> + };
>> +
>> + if matches.opt_present("h") {
>> + print_usage(&program, opts);
>> + return;
> Side note:
> Since both `main` and `print_usage` return `()` you could just
> return print_usage(...);
>
>> + }
>> +
>> + if !matches.opt_present("repository") {
>> + println!("missing --repository option");
> ^ should be `eprintln`
>
>> + print_usage(&program, opts);
>> + return;
> IMO it would be friendlier to the user if we don't return here just yet,
> but just set an `error` boolean and check the remaining required
> arguments first, printing all the missing arguments at once, and only
> then print the usage & exit.
>
>> + }
>> + let pbs_repository = matches.opt_str("repository").unwrap();
> Also, instead of `opt_present` followed by `opt_str().unwrap()`, better
> use only `opt_str` with a `match` statement.
>
> Eg.
> let mut error = false;
> let mut get_required_opt = |name| match matches.opt_str(name) {
> Some(value) => value,
> None => {
> error = true;
> eprintln!("missing --{name} option");
> String::new() // don't care about the value
> }
> };
>
> let pbs_repository = get_required_opt("repository");
> let vmid = get_required_opt("vmid");
> let fingerprint = get_required_opt("fingerprint");
> if error {
> return print_usage(&program, opts);
> }
>
>> +
>> + if !matches.opt_present("vmid") {
>> + println!("missing --vmid option");
>> + print_usage(&program, opts);
>> + return;
>> + }
>> + let vmid = matches.opt_str("vmid").unwrap();
>> +
>> + if !matches.opt_present("fingerprint") {
>> + println!("missing --fingerprint option");
>> + print_usage(&program, opts);
>> + return;
>> + }
>> + let fingerprint = matches.opt_str("fingerprint").unwrap();
>> +
>> + let keyfile = matches.opt_str("keyfile");
>> + let master_keyfile = matches.opt_str("master_keyfile");
>> + let compress = matches.opt_present("c");
>> + let encrypt = matches.opt_present("e");
>> +
>> + let vma_file_path = if !matches.free.is_empty() {
> ^ We should probably check the exact len of `.free`, since *multiple*
> free arguments are also an error, or at least not handled at all?
>
>> + matches.free[0].clone()
>> + } else {
>> + print_usage(&program, opts);
>> + return;
>> + };
>> +
>> + let pbs_password = String::from_utf8(tty::read_password(&"Password: ").unwrap()).unwrap();
>> + let key_password = match keyfile {
>> + Some(_) => Some(String::from_utf8(tty::read_password(&"Key Password: ").unwrap()).unwrap()),
>> + None => None
>> + };
>> +
>> + backup_vma_to_pbs(
>> + vma_file_path,
>> + pbs_repository,
>> + vmid,
>> + pbs_password,
>> + keyfile,
>> + key_password,
>> + master_keyfile,
>> + fingerprint,
>> + compress,
>> + encrypt
>> + );
>> +}
>> diff --git a/src/vma.rs b/src/vma.rs
>> new file mode 100644
>> index 0000000..4b239c8
>> --- /dev/null
>> +++ b/src/vma.rs
>> @@ -0,0 +1,297 @@
>> +extern crate md5;
>> +
>> +use std::fs::File;
>> +use std::io::{Read, Seek, SeekFrom};
>> +use std::mem::size_of;
>> +use std::{cmp, error, str, result};
> please drop `result` from this list.
>
>> +use std::collections::HashMap;
>> +
>> +use array_init::array_init;
>> +use bincode::*;
> ^ Please don't do `*` imports.
> This is the only reason you need the `std::result` import, but you never
> actually use `bincode::Result`, in fact, you actually only use
> `bincode::Options`.
>
>> +use serde::{Deserialize, Serialize};
>> +use serde_big_array::BigArray;
>> +
>> +const VMA_BLOCKS_PER_EXTENT: usize = 59;
>> +const VMA_MAX_CONFIGS: usize = 256;
>> +
>> +#[repr(C)]
>> +#[derive(Serialize, Deserialize)]
>> +struct VmaDeviceInfoHeader {
>> + pub device_name_offset: u32,
>> + reserved: [u8; 4],
>> + pub device_size: u64,
>> + reserved1: [u8; 16]
>> +}
>> +
>> +#[repr(C)]
>> +#[derive(Serialize, Deserialize)]
>> +struct VmaHeader {
>> + pub magic: [u8; 4],
>> + pub version: u32,
>> + pub uuid: [u8; 16],
>> + pub ctime: u64,
>> + pub md5sum: [u8; 16],
>> + pub blob_buffer_offset: u32,
>> + pub blob_buffer_size: u32,
>> + pub header_size: u32,
>> + #[serde(with = "BigArray")]
>> + reserved: [u8; 1984],
>> + #[serde(with = "BigArray")]
>> + pub config_names: [u32; 256],
>> + #[serde(with = "BigArray")]
>> + pub config_data: [u32; 256],
> ^ These arrays should use `VMA_MAX_CONFIGS` instead of `256`.
>
>> + reserved1: [u8; 4],
>> + #[serde(with = "BigArray")]
>> + pub dev_info: [VmaDeviceInfoHeader; 256]
> Should probably have a `VMA_MAX_DEVICES` here.
>
>> +}
>> +
>> +#[repr(C)]
>> +#[derive(Serialize, Deserialize)]
>> +struct VmaBlockInfo {
>> + pub mask: u16,
>> + reserved: u8,
>> + pub dev_id: u8,
>> + pub cluster_num: u32
>> +}
>> +
>> +#[repr(C)]
>> +#[derive(Serialize, Deserialize)]
>> +struct VmaExtentHeader {
>> + pub magic: [u8; 4],
>> + reserved: [u8; 2],
>> + pub block_count: u16,
>> + pub uuid: [u8; 16],
>> + pub md5sum: [u8; 16],
>> + #[serde(with = "BigArray")]
>> + pub blockinfo: [VmaBlockInfo; VMA_BLOCKS_PER_EXTENT]
>> +}
>> +
>> +#[derive(Clone)]
>> +struct VmaBlockIndexEntry {
>> + pub cluster_file_offset: u64,
>> + pub mask: u16
>> +}
>> +
>> +pub struct VmaReader {
>> + vma_file: File,
>> + vma_header: VmaHeader,
>> + configs: HashMap<String, String>,
>> + block_index: [Vec<VmaBlockIndexEntry>; 256],
> IMO this should be either a `Vec<Vec<VmaBlockIndexEntry>>` with checked
> `.get()` calls, or at least `Box<>` the array, as `VmaRead` is now quite
> a huge thing on the stack.
> Alternatively the whole `VmaReader` could be boxed.
>
>> + blocks_are_indexed: bool
>> +}
>> +
>> +impl VmaReader {
>> + pub fn new(vma_file_path: &str) -> Self {
>> + let mut vma_file = match File::open(vma_file_path) {
>> + Err(why) => panic!("couldn't open {}: {}", vma_file_path, why),
>> + Ok(file) => file,
>> + };
>> +
>> + let vma_header = Self::read_header(&mut vma_file).unwrap();
>> + let configs = Self::read_blob_buffer(&mut vma_file, &vma_header).unwrap();
>> + let block_index: [Vec<VmaBlockIndexEntry>; 256] = array_init(|_| Vec::new());
>> +
>> + let instance = Self {
>> + vma_file,
>> + vma_header,
>> + configs,
>> + block_index,
>> + blocks_are_indexed: false
>> + };
>> +
>> + return instance;
>> + }
>> +
>> + fn read_header(vma_file: &mut File) -> result::Result<VmaHeader, Box<dyn error::Error>> {
>> + let mut buffer = vec![0u8; size_of::<VmaHeader>()];
> Better to `Vec::with_capacity()` and `.resize(size, 0)`. The above code
> will create a temporary `VmaHeader`-sized zero-buffer on the stack
> before moving it. In debug builds this can even be notice-ably slow. At
> least in older rust versions building large [0u8; N] could take a
> visible amount of time (depending on N even seconds at times...)
>
>> + vma_file.read_exact(&mut buffer)?;
>> +
>> + let bincode_options = bincode::DefaultOptions::new()
>> + .with_fixint_encoding()
>> + .with_big_endian();
>> +
>> + let vma_header: VmaHeader = bincode_options.deserialize(&buffer)?;
>> +
>> + if vma_header.magic != [b'V', b'M', b'A', 0] {
>> + return Err("Invalid magic number".into());
>> + }
>> +
>> + if vma_header.version != 1 {
>> + return Err(format!("Invalid VMA version {}", vma_header.version).into());
>> + }
>> +
>> + buffer.resize(vma_header.header_size as usize, 0);
>> + vma_file.read_exact(&mut buffer[size_of::<VmaHeader>()..])?;
>> +
>> + buffer[32..48].fill(0);
> Might be worth mentioning that this is the md5 sum in the struct. In
> fact, a helper casting a buffer to `&mut VmaHeader` and using the actual
> member access to zero it out might be nice, but not necessary since
> these are never going to "move". So feel free to just add a comment
> instead, but a comment is definitely needed ;-)
>
>> + let computed_md5sum: [u8; 16] = md5::compute(&buffer).into();
>> +
>> + if vma_header.md5sum != computed_md5sum {
>> + return Err("Wrong VMA header checksum".into());
>> + }
>> +
>> + return Ok(vma_header);
>> + }
>> +
>> + fn read_string_from_file(vma_file: &mut File, file_offset: u64) -> result::Result<String, Box<dyn error::Error>> {
>> + let mut size_bytes = [0u8; 2];
>> + vma_file.seek(SeekFrom::Start(file_offset))?;
>> + vma_file.read_exact(&mut size_bytes)?;
>> + let size = u16::from_le_bytes(size_bytes) as usize;
>> + let mut string_bytes = vec![0u8; size - 1];
>> + vma_file.read_exact(&mut string_bytes)?;
>> + let string = str::from_utf8(&string_bytes)?;
>> +
>> + return Ok(string.to_string());
>> + }
>> +
>> + fn read_blob_buffer(vma_file: &mut File, vma_header: &VmaHeader) -> result::Result<HashMap<String, String>, Box<dyn error::Error>> {
>> + let mut configs = HashMap::new();
>> +
>> + for i in 0..VMA_MAX_CONFIGS {
>> + let config_name_offset = vma_header.config_names[i];
>> + let config_data_offset = vma_header.config_data[i];
>> +
>> + if config_name_offset == 0 || config_data_offset == 0 {
>> + continue;
>> + }
>> +
>> + let config_name_file_offset = (vma_header.blob_buffer_offset + config_name_offset) as u64;
>> + let config_data_file_offset = (vma_header.blob_buffer_offset + config_data_offset) as u64;
>> + let config_name = Self::read_string_from_file(vma_file, config_name_file_offset)?;
>> + let config_data = Self::read_string_from_file(vma_file, config_data_file_offset)?;
>> +
>> + configs.insert(String::from(config_name), String::from(config_data));
>> + }
>> +
>> + return Ok(configs);
>> + }
>> +
>> + pub fn get_configs(&self) -> HashMap<String, String> {
>> + return self.configs.clone();
>> + }
>> +
>> + pub fn get_device_name(&mut self, device_id: u8) -> Option<String> {
> ^ API-wise I don't think it makes much sense to encode the 256 limit
> into the type here. You can get rid of a *lot* of `as usize` casts if
> this is just an usize itself. But of course, you'd need to validate the
> value then, but at least it can then be explicitly tied to a named MAX_*
> constant to explain the limit.
>
>> + let device_name_offset = self.vma_header.dev_info[device_id as usize].device_name_offset;
>> +
>> + if device_name_offset == 0 {
>> + return None;
>> + }
>> +
>> + let device_name_file_offset = (self.vma_header.blob_buffer_offset + device_name_offset) as u64;
>> + let device_name = Self::read_string_from_file(&mut self.vma_file, device_name_file_offset).unwrap();
>> +
>> + return Some(device_name.to_string());
>> + }
>> +
>> + pub fn get_device_size(&self, device_id: u8) -> Option<u64> {
> Much less typing if you first
>
> let dev_info = &self.vma_header.dev_info[device_id as usize];
>
> and not duplicate the `self.vma_header.dev_info[device_id as usize]`
> bit...
>
>> + if self.vma_header.dev_info[device_id as usize].device_name_offset == 0 {
>> + return None;
>> + }
>> +
>> + return Some(self.vma_header.dev_info[device_id as usize].device_size);
>> + }
>> +
>> + fn read_extent_header(vma_file: &mut File) -> result::Result<VmaExtentHeader, Box<dyn error::Error>> {
>> + let mut buffer = vec![0u8; size_of::<VmaExtentHeader>()];
> Again, please manage the buffer as mentioned above in `read_header`.
>
>> + vma_file.read_exact(&mut buffer)?;
>> +
>> + let bincode_options = bincode::DefaultOptions::new()
>> + .with_fixint_encoding()
>> + .with_big_endian();
>> +
>> + let vma_extent_header: VmaExtentHeader = bincode_options.deserialize(&buffer)?;
>> +
>> + if vma_extent_header.magic != [b'V', b'M', b'A', b'E'] {
>> + return Err("Invalid magic number".into());
>> + }
>> +
>> + buffer[24..40].fill(0);
> ^ Same here, comment needed
>
>> + let computed_md5sum: [u8; 16] = md5::compute(&buffer).into();
>> +
>> + if vma_extent_header.md5sum != computed_md5sum {
>> + return Err("Wrong VMA extent header checksum".into());
>> + }
>> +
>> + return Ok(vma_extent_header);
>> + }
>> +
>> + fn index_device_clusters(&mut self) -> result::Result<(), Box<dyn error::Error>> {
>> + for device_id in 0..255 {
>> + let device_size = match self.get_device_size(device_id) {
>> + Some(x) => { x }
>> + None => { continue; }
>> + };
>> +
>> + let device_cluster_count = (device_size + 4096 * 16 - 1) / (4096 * 16);
>> +
>> + let block_index_entry_placeholder = VmaBlockIndexEntry {
>> + cluster_file_offset: 0,
>> + mask: 0
>> + };
>> +
>> + self.block_index[device_id as usize].resize(device_cluster_count as usize, block_index_entry_placeholder);
>> + }
>> +
>> + let mut file_offset = self.vma_header.header_size as u64;
>> + let vma_file_size = self.vma_file.metadata()?.len();
>> +
>> + while file_offset < vma_file_size {
>> + self.vma_file.seek(SeekFrom::Start(file_offset))?;
>> + let vma_extent_header = Self::read_extent_header(&mut self.vma_file)?;
>> + file_offset += 512;
> ^ Should be able to use `size_of::<VmaExtentHeader>()` here, no?
>
>> +
>> + for i in 0..59 as usize {
> ^ Should use VMA_BLOCKS_PER_EXTENT instead of 59.
>
>> + let blockinfo = &vma_extent_header.blockinfo[i];
>> +
>> + if blockinfo.dev_id == 0 {
>> + continue;
>> + }
>> +
>> + let block_index_entry = VmaBlockIndexEntry {
>> + cluster_file_offset: file_offset,
>> + mask: blockinfo.mask
>> + };
>> +
>> + self.block_index[blockinfo.dev_id as usize][blockinfo.cluster_num as usize] = block_index_entry;
>> + file_offset += blockinfo.mask.count_ones() as u64 * 4096;
>> + }
>> + }
>> +
>> + return Ok(());
>> + }
>> +
>> + pub fn read_device_contents(&mut self, device_id: u8, buffer: &mut [u8], offset: u64) -> result::Result<bool, Box<dyn error::Error>> {
>> + assert_eq!(offset % (4096 * 16), 0);
> And another reason for anyhow over `panic!` based error handling: I'm
> not sure the above is supposed to be checked on release builds as well?
This checks that the offset is aligned to the VMA cluster size so as not
to overcomplicate the logic.
>> +
>> + // Make sure that the device clusters are already indexed
>> + if !self.blocks_are_indexed {
>> + self.index_device_clusters()?;
>> + }
>> +
>> + let this_device_block_index = &self.block_index[device_id as usize];
>> + let length = cmp::min(buffer.len(), this_device_block_index.len() * 4096 * 16 - offset as usize);
>> + let mut buffer_offset = 0;
>> + let mut buffer_is_zero = true;
>> +
>> + while buffer_offset < length {
>> + let block_index_entry = &this_device_block_index[(offset as usize + buffer_offset) / (4096 * 16)];
>> + self.vma_file.seek(SeekFrom::Start(block_index_entry.cluster_file_offset))?;
>> +
>> + for i in 0..16 {
>> + let current_block_mask = ((block_index_entry.mask >> i) & 1) == 1;
>> +
>> + if current_block_mask {
>> + self.vma_file.read_exact(&mut buffer[buffer_offset..(buffer_offset + 4096)])?;
> it's possible to have a device that isn't exactly a multiple of
> 4096, meaning its very final block may be smaller, so the device size
> needs to always be taken into account when reading such blocks.
>
> You can produce such an image via `pvesm alloc` as it takes a multiple
> of `1024` for its size, eg:
>
> # pvesm alloc local 100 vm-100-bad.raw $[(1<<20) + 1]
>
> Will produce a disk that is 1G + 1024 bytes.
>
> Now check what your tool does with the file
> - while the final blocks are still zero
> - then fill those blocks with data and check again.
The tool works like it should, even with disks that do not have a size
aligned to 4096. The bytes read into the buffer past 1024 are simply
ignored in the call to proxmox_backup_write_data, by setting the size
argument to 1024. However, if this VmaReader was used as part of a
library, then relying on the caller to provide a buffer with an aligned
size wouldn't be very good practice. Also in its current state the
read_device_contents function might run through unnecessary loop
iterations when the buffer size is not aligned to 65536.
These concerns where addressed in the v2.
>> + buffer_is_zero = false;
>> + } else {
>> + buffer[buffer_offset..(buffer_offset + 4096)].fill(0);
>> + }
>> +
>> + buffer_offset += 4096;
>> + }
>> + }
>> +
>> + return Ok(buffer_is_zero);
>> + }
>> +}
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [pbs-devel] [PATCH vma-to-pbs] Initial commit
2023-09-29 13:22 ` Filip Schauer
@ 2023-09-29 13:38 ` Lukas Wagner
0 siblings, 0 replies; 6+ messages in thread
From: Lukas Wagner @ 2023-09-29 13:38 UTC (permalink / raw)
To: Proxmox Backup Server development discussion, Filip Schauer,
Wolfgang Bumiller
On 9/29/23 15:22, Filip Schauer wrote:
>>> --- /dev/null
>>> +++ b/.gitmodules
>>> @@ -0,0 +1,6 @@
>>> +[submodule "submodules/proxmox-backup-qemu"]
>>> + path = submodules/proxmox-backup-qemu
>>> + url = git://git.proxmox.com/git/proxmox-backup-qemu.git
>>> +[submodule "submodules/proxmox"]
>>> + path = submodules/proxmox
>>> + url = git://git.proxmox.com/git/proxmox.git
>> ^ We don't need proxmox.git as submodule.
> We use proxmox_sys::linux::tty::read_password.
I believe what Wolfgang meant was that you do not to need to add
proxmox.git as a submodule. Since you patch crate.io with the local
registry at /usr/share/cargo, you can just install the dependencies
that you need via apt and use them just as any other dep inside your
Cargo.toml file.
Since you need `proxmox-sys`, the required package would be
`librust-proxmox-sys-dev`. The newest packaged version is 0.5.0-1,
so you'd need to add
proxmox-sys = "0.5.0"
to your Cargo.toml file.
--
- Lukas
^ permalink raw reply [flat|nested] 6+ messages in thread
* [pbs-devel] [PATCH vma-to-pbs] Initial commit
@ 2023-09-29 13:12 Filip Schauer
2023-09-29 13:15 ` Filip Schauer
0 siblings, 1 reply; 6+ messages in thread
From: Filip Schauer @ 2023-09-29 13:12 UTC (permalink / raw)
To: pbs-devel
Implement a tool to import VMA files into a Proxmox Backup Server
Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
---
Changes since v1:
* Remove unused crates and uses
* Format the code
* Use anyhow for error handling
* Use clap for parsing arguments instead of getopts
* Fix blocks being reindexed on every read
* Make sure ProxmoxBackupHandle is dropped properly on error
* Move image_chunk_buffer from stack to heap
* Move the block_index in VmaReader to the heap completely
* Initialize vectors with `Vec::with_capacity` and `resize` instead of
the `vec!` macro, to potentially improve performance on debug builds.
* Add comments to code filling the MD5 sum field with zeros
* Change device_id arguments to usize
* Handle devices that have a size that is not aligned to 4096 properly
in read_device_contents, when the caller provides a buffer that would
exceed the device size.
* Avoid unnecessary loop iterations in read_device_contents when the
buffer size is not aligned to 65536
.cargo/config | 5 +
.gitmodules | 6 +
Cargo.toml | 18 ++
Makefile | 70 +++++++
src/main.rs | 311 ++++++++++++++++++++++++++++++
src/vma.rs | 340 +++++++++++++++++++++++++++++++++
submodules/proxmox | 1 +
submodules/proxmox-backup-qemu | 1 +
8 files changed, 752 insertions(+)
create mode 100644 .cargo/config
create mode 100644 .gitmodules
create mode 100644 Cargo.toml
create mode 100644 Makefile
create mode 100644 src/main.rs
create mode 100644 src/vma.rs
create mode 160000 submodules/proxmox
create mode 160000 submodules/proxmox-backup-qemu
diff --git a/.cargo/config b/.cargo/config
new file mode 100644
index 0000000..3b5b6e4
--- /dev/null
+++ b/.cargo/config
@@ -0,0 +1,5 @@
+[source]
+[source.debian-packages]
+directory = "/usr/share/cargo/registry"
+[source.crates-io]
+replace-with = "debian-packages"
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..526f5ef
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,6 @@
+[submodule "submodules/proxmox-backup-qemu"]
+ path = submodules/proxmox-backup-qemu
+ url = git://git.proxmox.com/git/proxmox-backup-qemu.git
+[submodule "submodules/proxmox"]
+ path = submodules/proxmox
+ url = git://git.proxmox.com/git/proxmox.git
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..ecf9c03
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "vma-to-pbs"
+version = "0.0.1"
+authors = ["Filip Schauer <f.schauer@proxmox.com>"]
+edition = "2021"
+
+[dependencies]
+anyhow = "1.0"
+bincode = "1.3"
+clap = { version = "4.0.32", features = ["cargo"] }
+md5 = "0.7.0"
+scopeguard = "1.1.0"
+serde = "1.0"
+serde-big-array = "0.4.1"
+
+proxmox-backup-qemu = { path = "submodules/proxmox-backup-qemu" }
+proxmox-io = { path = "submodules/proxmox/proxmox-io" }
+proxmox-sys = { path = "submodules/proxmox/proxmox-sys" }
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..a0c841d
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,70 @@
+include /usr/share/dpkg/default.mk
+
+PACKAGE = proxmox-vma-to-pbs
+BUILDDIR = $(PACKAGE)-$(DEB_VERSION_UPSTREAM)
+
+ARCH := $(DEB_BUILD_ARCH)
+
+DSC=$(DEB_SOURCE)_$(DEB_VERSION).dsc
+MAIN_DEB=$(PACKAGE)_$(DEB_VERSION)_$(ARCH).deb
+OTHER_DEBS = \
+ $(PACKAGE)-dev_$(DEB_VERSION)_$(ARCH).deb \
+ $(PACKAGE)-dbgsym_$(DEB_VERSION)_$(ARCH).deb
+DEBS=$(MAIN_DEB) $(OTHER_DEBS)
+
+DESTDIR=
+
+TARGET_DIR := target/debug
+
+ifeq ($(BUILD_MODE), release)
+CARGO_BUILD_ARGS += --release
+TARGETDIR := target/release
+endif
+
+.PHONY: all build
+all: build
+
+build: $(TARGETDIR)/vma-to-pbs
+$(TARGETDIR)/vma-to-pbs: Cargo.toml src/
+ cargo build $(CARGO_BUILD_ARGS)
+
+.PHONY: install
+install: $(TARGETDIR)/vma-to-pbs
+ install -D -m 0755 $(TARGETDIR)/vma-to-pbs $(DESTDIR)/usr/bin/vma-to-pbs
+
+$(BUILDDIR): submodule
+ rm -rf $@ $@.tmp && mkdir $@.tmp
+ cp -a submodules debian Makefile .cargo Cargo.toml build.rs src $@.tmp/
+ mv $@.tmp $@
+
+submodule:
+ [ -e submodules/proxmox-backup-qemu/Cargo.toml ] || [ -e submodules/proxmox/proxmox-sys/Cargo.toml ] || git submodule update --init --recursive
+
+dsc:
+ rm -rf $(BUILDDIR) $(DSC)
+ $(MAKE) $(DSC)
+ lintian $(DSC)
+
+$(DSC): $(BUILDDIR)
+ cd $(BUILDDIR); dpkg-buildpackage -S -us -uc -d
+
+sbuild: $(DSC)
+ sbuild $<
+
+.PHONY: deb dsc
+deb: $(OTHER_DEBS)
+$(OTHER_DEBS): $(MAIN_DEB)
+$(MAIN_DEB): $(BUILDDIR)
+ cd $(BUILDDIR); dpkg-buildpackage -b -us -uc
+ lintian $(DEBS)
+
+distclean: clean
+clean:
+ cargo clean
+ rm -rf $(PACKAGE)-[0-9]*/
+ rm -r *.deb *.dsc $(DEB_SOURCE)*.tar* *.build *.buildinfo *.changes Cargo.lock
+
+.PHONY: dinstall
+dinstall: $(DEBS)
+ dpkg -i $(DEBS)
+
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..1aefd29
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,311 @@
+extern crate anyhow;
+extern crate clap;
+extern crate proxmox_backup_qemu;
+extern crate proxmox_io;
+extern crate proxmox_sys;
+extern crate scopeguard;
+
+use std::env;
+use std::ffi::{c_char, CStr, CString};
+use std::ptr;
+use std::time::{SystemTime, UNIX_EPOCH};
+
+use anyhow::{anyhow, Context, Result};
+use clap::{command, Arg, ArgAction};
+use proxmox_backup_qemu::*;
+use proxmox_sys::linux::tty;
+use scopeguard::defer;
+
+mod vma;
+use vma::*;
+
+fn backup_vma_to_pbs(
+ vma_file_path: String,
+ pbs_repository: String,
+ backup_id: String,
+ pbs_password: String,
+ keyfile: Option<String>,
+ key_password: Option<String>,
+ master_keyfile: Option<String>,
+ fingerprint: String,
+ compress: bool,
+ encrypt: bool,
+) -> Result<()> {
+ println!("VMA input file: {}", vma_file_path);
+ println!("PBS repository: {}", pbs_repository);
+ println!("PBS fingerprint: {}", fingerprint);
+ println!("compress: {}", compress);
+ println!("encrypt: {}", encrypt);
+
+ let backup_time = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
+ println!("backup time: {}", backup_time);
+
+ let mut pbs_err: *mut c_char = ptr::null_mut();
+
+ let pbs_repository_cstr = CString::new(pbs_repository).unwrap();
+ let backup_id_cstr = CString::new(backup_id).unwrap();
+ let pbs_password_cstr = CString::new(pbs_password).unwrap();
+ let fingerprint_cstr = CString::new(fingerprint).unwrap();
+ let keyfile_cstr = keyfile.map(|v| CString::new(v).unwrap());
+ let keyfile_ptr = keyfile_cstr.map(|v| v.as_ptr()).unwrap_or(ptr::null());
+ let key_password_cstr = key_password.map(|v| CString::new(v).unwrap());
+ let key_password_ptr = key_password_cstr.map(|v| v.as_ptr()).unwrap_or(ptr::null());
+ let master_keyfile_cstr = master_keyfile.map(|v| CString::new(v).unwrap());
+ let master_keyfile_ptr = master_keyfile_cstr.map(|v| v.as_ptr()).unwrap_or(ptr::null());
+
+ let pbs = proxmox_backup_new_ns(
+ pbs_repository_cstr.as_ptr(),
+ ptr::null(),
+ backup_id_cstr.as_ptr(),
+ backup_time,
+ PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE,
+ pbs_password_cstr.as_ptr(),
+ keyfile_ptr,
+ key_password_ptr,
+ master_keyfile_ptr,
+ true,
+ false,
+ fingerprint_cstr.as_ptr(),
+ &mut pbs_err,
+ );
+
+ defer! {
+ proxmox_backup_disconnect(pbs);
+ }
+
+ if pbs == ptr::null_mut() {
+ unsafe {
+ let pbs_err_cstr = CStr::from_ptr(pbs_err);
+ return Err(anyhow!("proxmox_backup_new_ns failed: {pbs_err_cstr:?}"));
+ }
+ }
+
+ let connect_result = proxmox_backup_connect(pbs, &mut pbs_err);
+
+ if connect_result < 0 {
+ unsafe {
+ let pbs_err_cstr = CStr::from_ptr(pbs_err);
+ return Err(anyhow!("proxmox_backup_connect failed: {pbs_err_cstr:?}"));
+ }
+ }
+
+ let mut vma_reader = VmaReader::new(&vma_file_path)?;
+
+ // Handle configs
+ let configs = vma_reader.get_configs();
+ for (config_name, config_data) in configs {
+ println!("CFG: size: {} name: {}", config_data.len(), config_name);
+
+ let config_name_cstr = CString::new(config_name).unwrap();
+
+ if proxmox_backup_add_config(
+ pbs,
+ config_name_cstr.as_ptr(),
+ config_data.as_ptr(),
+ config_data.len() as u64,
+ &mut pbs_err,
+ ) < 0
+ {
+ unsafe {
+ let pbs_err_cstr = CStr::from_ptr(pbs_err);
+ return Err(anyhow!(
+ "proxmox_backup_add_config failed: {pbs_err_cstr:?}"
+ ));
+ }
+ }
+ }
+
+ // Handle block devices
+ for device_id in 0..255 {
+ let device_name = match vma_reader.get_device_name(device_id) {
+ Some(x) => x,
+ None => {
+ continue;
+ }
+ };
+
+ let device_size = match vma_reader.get_device_size(device_id) {
+ Some(x) => x,
+ None => {
+ continue;
+ }
+ };
+
+ println!(
+ "DEV: dev_id={} size: {} devname: {}",
+ device_id, device_size, device_name
+ );
+
+ let device_name_cstr = CString::new(device_name).unwrap();
+ let pbs_device_id = proxmox_backup_register_image(
+ pbs,
+ device_name_cstr.as_ptr(),
+ device_size,
+ false,
+ &mut pbs_err,
+ );
+
+ if pbs_device_id < 0 {
+ unsafe {
+ let pbs_err_cstr = CStr::from_ptr(pbs_err);
+ return Err(anyhow!(
+ "proxmox_backup_register_image failed: {pbs_err_cstr:?}"
+ ));
+ }
+ }
+
+ let mut image_chunk_buffer = proxmox_io::boxed::zeroed(PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE as usize);
+ let mut bytes_transferred = 0;
+
+ while bytes_transferred < device_size {
+ let bytes_left = device_size - bytes_transferred;
+ let chunk_size = bytes_left.min(PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE);
+ println!(
+ "Uploading dev_id: {} offset: {:#0X} - {:#0X}",
+ device_id,
+ bytes_transferred,
+ bytes_transferred + chunk_size
+ );
+
+ let is_zero_chunk = vma_reader
+ .read_device_contents(
+ device_id,
+ &mut image_chunk_buffer[0..chunk_size as usize],
+ bytes_transferred,
+ )
+ .with_context(|| {
+ format!(
+ "read {} bytes at offset {} from disk {} from VMA file",
+ chunk_size, bytes_transferred, device_id
+ )
+ })?;
+
+ let write_data_result = proxmox_backup_write_data(
+ pbs,
+ pbs_device_id as u8,
+ if is_zero_chunk {
+ ptr::null()
+ } else {
+ image_chunk_buffer.as_ptr()
+ },
+ bytes_transferred,
+ chunk_size,
+ &mut pbs_err,
+ );
+
+ if write_data_result < 0 {
+ unsafe {
+ let pbs_err_cstr = CStr::from_ptr(pbs_err);
+ return Err(anyhow!(
+ "proxmox_backup_write_data failed: {pbs_err_cstr:?}"
+ ));
+ }
+ }
+
+ bytes_transferred += chunk_size;
+ }
+
+ if proxmox_backup_close_image(pbs, pbs_device_id as u8, &mut pbs_err) < 0 {
+ unsafe {
+ let pbs_err_cstr = CStr::from_ptr(pbs_err);
+ return Err(anyhow!(
+ "proxmox_backup_close_image failed: {pbs_err_cstr:?}"
+ ));
+ }
+ }
+ }
+
+ if proxmox_backup_finish(pbs, &mut pbs_err) < 0 {
+ unsafe {
+ let pbs_err_cstr = CStr::from_ptr(pbs_err);
+ return Err(anyhow!("proxmox_backup_finish failed: {pbs_err_cstr:?}"));
+ }
+ }
+
+ Ok(())
+}
+
+fn main() -> Result<()> {
+ let matches = command!()
+ .arg(
+ Arg::new("repository")
+ .long("repository")
+ .value_name("auth_id@host:port:datastore")
+ .help("Repository URL")
+ .required(true),
+ )
+ .arg(
+ Arg::new("vmid")
+ .long("vmid")
+ .value_name("VMID")
+ .help("Backup ID")
+ .required(true),
+ )
+ .arg(
+ Arg::new("fingerprint")
+ .long("fingerprint")
+ .value_name("FINGERPRINT")
+ .help("Proxmox Backup Server Fingerprint")
+ .required(true),
+ )
+ .arg(
+ Arg::new("keyfile")
+ .long("keyfile")
+ .value_name("KEYFILE")
+ .help("Key file"),
+ )
+ .arg(
+ Arg::new("master_keyfile")
+ .long("master_keyfile")
+ .value_name("MASTER_KEYFILE")
+ .help("Master key file"),
+ )
+ .arg(
+ Arg::new("compress")
+ .long("compress")
+ .short('c')
+ .help("Compress the Backup")
+ .action(ArgAction::SetTrue),
+ )
+ .arg(
+ Arg::new("encrypt")
+ .long("encrypt")
+ .short('e')
+ .help("Encrypt the Backup")
+ .action(ArgAction::SetTrue),
+ )
+ .arg(Arg::new("vma_file"))
+ .get_matches();
+
+ let pbs_repository = matches.get_one::<String>("repository").unwrap().to_string();
+ let vmid = matches.get_one::<String>("vmid").unwrap().to_string();
+ let fingerprint = matches.get_one::<String>("fingerprint").unwrap().to_string();
+
+ let keyfile = matches.get_one::<String>("keyfile");
+ let master_keyfile = matches.get_one::<String>("master_keyfile");
+ let compress = matches.get_flag("compress");
+ let encrypt = matches.get_flag("encrypt");
+
+ let vma_file_path = matches.get_one::<String>("vma_file").unwrap().to_string();
+
+ let pbs_password = String::from_utf8(tty::read_password(&"Password: ").unwrap()).unwrap();
+ let key_password = match keyfile {
+ Some(_) => Some(String::from_utf8(tty::read_password(&"Key Password: ").unwrap()).unwrap()),
+ None => None,
+ };
+
+ backup_vma_to_pbs(
+ vma_file_path,
+ pbs_repository,
+ vmid,
+ pbs_password,
+ keyfile.cloned(),
+ key_password,
+ master_keyfile.cloned(),
+ fingerprint,
+ compress,
+ encrypt,
+ )?;
+
+ Ok(())
+}
diff --git a/src/vma.rs b/src/vma.rs
new file mode 100644
index 0000000..e2c3475
--- /dev/null
+++ b/src/vma.rs
@@ -0,0 +1,340 @@
+extern crate anyhow;
+extern crate md5;
+
+use std::collections::HashMap;
+use std::fs::File;
+use std::io::{Read, Seek, SeekFrom};
+use std::mem::size_of;
+use std::{cmp, str};
+
+use anyhow::{anyhow, Result};
+use bincode::Options;
+use serde::{Deserialize, Serialize};
+use serde_big_array::BigArray;
+
+const VMA_BLOCKS_PER_EXTENT: usize = 59;
+const VMA_MAX_CONFIGS: usize = 256;
+const VMA_MAX_DEVICES: usize = 256;
+
+#[repr(C)]
+#[derive(Serialize, Deserialize)]
+struct VmaDeviceInfoHeader {
+ pub device_name_offset: u32,
+ reserved: [u8; 4],
+ pub device_size: u64,
+ reserved1: [u8; 16],
+}
+
+#[repr(C)]
+#[derive(Serialize, Deserialize)]
+struct VmaHeader {
+ pub magic: [u8; 4],
+ pub version: u32,
+ pub uuid: [u8; 16],
+ pub ctime: u64,
+ pub md5sum: [u8; 16],
+ pub blob_buffer_offset: u32,
+ pub blob_buffer_size: u32,
+ pub header_size: u32,
+ #[serde(with = "BigArray")]
+ reserved: [u8; 1984],
+ #[serde(with = "BigArray")]
+ pub config_names: [u32; VMA_MAX_CONFIGS],
+ #[serde(with = "BigArray")]
+ pub config_data: [u32; VMA_MAX_CONFIGS],
+ reserved1: [u8; 4],
+ #[serde(with = "BigArray")]
+ pub dev_info: [VmaDeviceInfoHeader; VMA_MAX_DEVICES],
+}
+
+#[repr(C)]
+#[derive(Serialize, Deserialize)]
+struct VmaBlockInfo {
+ pub mask: u16,
+ reserved: u8,
+ pub dev_id: u8,
+ pub cluster_num: u32,
+}
+
+#[repr(C)]
+#[derive(Serialize, Deserialize)]
+struct VmaExtentHeader {
+ pub magic: [u8; 4],
+ reserved: [u8; 2],
+ pub block_count: u16,
+ pub uuid: [u8; 16],
+ pub md5sum: [u8; 16],
+ #[serde(with = "BigArray")]
+ pub blockinfo: [VmaBlockInfo; VMA_BLOCKS_PER_EXTENT],
+}
+
+#[derive(Clone)]
+struct VmaBlockIndexEntry {
+ pub cluster_file_offset: u64,
+ pub mask: u16,
+}
+
+pub struct VmaReader {
+ vma_file: File,
+ vma_header: VmaHeader,
+ configs: HashMap<String, String>,
+ block_index: Vec<Vec<VmaBlockIndexEntry>>,
+ blocks_are_indexed: bool,
+}
+
+impl VmaReader {
+ pub fn new(vma_file_path: &str) -> Result<Self> {
+ let mut vma_file = match File::open(vma_file_path) {
+ Err(why) => return Err(anyhow!("couldn't open {}: {}", vma_file_path, why)),
+ Ok(file) => file,
+ };
+
+ let vma_header = Self::read_header(&mut vma_file).unwrap();
+ let configs = Self::read_blob_buffer(&mut vma_file, &vma_header).unwrap();
+ let block_index: Vec<Vec<VmaBlockIndexEntry>> = (0..256).map(|_| Vec::new()).collect();
+
+ let instance = Self {
+ vma_file,
+ vma_header,
+ configs,
+ block_index,
+ blocks_are_indexed: false,
+ };
+
+ Ok(instance)
+ }
+
+ fn read_header(vma_file: &mut File) -> Result<VmaHeader> {
+ let mut buffer = Vec::with_capacity(size_of::<VmaHeader>());
+ buffer.resize(size_of::<VmaHeader>(), 0);
+ vma_file.read_exact(&mut buffer)?;
+
+ let bincode_options = bincode::DefaultOptions::new()
+ .with_fixint_encoding()
+ .with_big_endian();
+
+ let vma_header: VmaHeader = bincode_options.deserialize(&buffer)?;
+
+ if vma_header.magic != [b'V', b'M', b'A', 0] {
+ return Err(anyhow!("Invalid magic number"));
+ }
+
+ if vma_header.version != 1 {
+ return Err(anyhow!("Invalid VMA version {}", vma_header.version));
+ }
+
+ buffer.resize(vma_header.header_size as usize, 0);
+ vma_file.read_exact(&mut buffer[size_of::<VmaHeader>()..])?;
+
+ // Fill the MD5 sum field with zeros to compute the MD5 sum
+ buffer[32..48].fill(0);
+ let computed_md5sum: [u8; 16] = md5::compute(&buffer).into();
+
+ if vma_header.md5sum != computed_md5sum {
+ return Err(anyhow!("Wrong VMA header checksum"));
+ }
+
+ return Ok(vma_header);
+ }
+
+ fn read_string_from_file(vma_file: &mut File, file_offset: u64) -> Result<String> {
+ let mut size_bytes = [0u8; 2];
+ vma_file.seek(SeekFrom::Start(file_offset))?;
+ vma_file.read_exact(&mut size_bytes)?;
+ let size = u16::from_le_bytes(size_bytes) as usize;
+ let mut string_bytes = Vec::with_capacity(size - 1);
+ string_bytes.resize(size - 1, 0);
+ vma_file.read_exact(&mut string_bytes)?;
+ let string = str::from_utf8(&string_bytes)?;
+
+ return Ok(string.to_string());
+ }
+
+ fn read_blob_buffer(
+ vma_file: &mut File,
+ vma_header: &VmaHeader,
+ ) -> Result<HashMap<String, String>> {
+ let mut configs = HashMap::new();
+
+ for i in 0..VMA_MAX_CONFIGS {
+ let config_name_offset = vma_header.config_names[i];
+ let config_data_offset = vma_header.config_data[i];
+
+ if config_name_offset == 0 || config_data_offset == 0 {
+ continue;
+ }
+
+ let config_name_file_offset = (vma_header.blob_buffer_offset + config_name_offset) as u64;
+ let config_data_file_offset = (vma_header.blob_buffer_offset + config_data_offset) as u64;
+ let config_name = Self::read_string_from_file(vma_file, config_name_file_offset)?;
+ let config_data = Self::read_string_from_file(vma_file, config_data_file_offset)?;
+
+ configs.insert(String::from(config_name), String::from(config_data));
+ }
+
+ return Ok(configs);
+ }
+
+ pub fn get_configs(&self) -> HashMap<String, String> {
+ return self.configs.clone();
+ }
+
+ pub fn get_device_name(&mut self, device_id: usize) -> Option<String> {
+ if device_id >= VMA_MAX_DEVICES {
+ return None;
+ }
+
+ let device_name_offset = self.vma_header.dev_info[device_id].device_name_offset;
+
+ if device_name_offset == 0 {
+ return None;
+ }
+
+ let device_name_file_offset = (self.vma_header.blob_buffer_offset + device_name_offset) as u64;
+ let device_name = Self::read_string_from_file(&mut self.vma_file, device_name_file_offset).unwrap();
+
+ return Some(device_name.to_string());
+ }
+
+ pub fn get_device_size(&self, device_id: usize) -> Option<u64> {
+ if device_id >= VMA_MAX_DEVICES {
+ return None;
+ }
+
+ let dev_info = &self.vma_header.dev_info[device_id];
+
+ if dev_info.device_name_offset == 0 {
+ return None;
+ }
+
+ return Some(dev_info.device_size);
+ }
+
+ fn read_extent_header(vma_file: &mut File) -> Result<VmaExtentHeader> {
+ let mut buffer = Vec::with_capacity(size_of::<VmaExtentHeader>());
+ buffer.resize(size_of::<VmaExtentHeader>(), 0);
+ vma_file.read_exact(&mut buffer)?;
+
+ let bincode_options = bincode::DefaultOptions::new()
+ .with_fixint_encoding()
+ .with_big_endian();
+
+ let vma_extent_header: VmaExtentHeader = bincode_options.deserialize(&buffer)?;
+
+ if vma_extent_header.magic != [b'V', b'M', b'A', b'E'] {
+ return Err(anyhow!("Invalid magic number"));
+ }
+
+ // Fill the MD5 sum field with zeros to compute the MD5 sum
+ buffer[24..40].fill(0);
+ let computed_md5sum: [u8; 16] = md5::compute(&buffer).into();
+
+ if vma_extent_header.md5sum != computed_md5sum {
+ return Err(anyhow!("Wrong VMA extent header checksum"));
+ }
+
+ return Ok(vma_extent_header);
+ }
+
+ fn index_device_clusters(&mut self) -> Result<()> {
+ for device_id in 0..255 {
+ let device_size = match self.get_device_size(device_id) {
+ Some(x) => x,
+ None => {
+ continue;
+ }
+ };
+
+ let device_cluster_count = (device_size + 4096 * 16 - 1) / (4096 * 16);
+
+ let block_index_entry_placeholder = VmaBlockIndexEntry {
+ cluster_file_offset: 0,
+ mask: 0,
+ };
+
+ self.block_index[device_id].resize(device_cluster_count as usize, block_index_entry_placeholder);
+ }
+
+ let mut file_offset = self.vma_header.header_size as u64;
+ let vma_file_size = self.vma_file.metadata()?.len();
+
+ while file_offset < vma_file_size {
+ self.vma_file.seek(SeekFrom::Start(file_offset))?;
+ let vma_extent_header = Self::read_extent_header(&mut self.vma_file)?;
+ file_offset += size_of::<VmaExtentHeader>() as u64;
+
+ for i in 0..VMA_BLOCKS_PER_EXTENT {
+ let blockinfo = &vma_extent_header.blockinfo[i];
+
+ if blockinfo.dev_id == 0 {
+ continue;
+ }
+
+ let block_index_entry = VmaBlockIndexEntry {
+ cluster_file_offset: file_offset,
+ mask: blockinfo.mask,
+ };
+
+ self.block_index[blockinfo.dev_id as usize][blockinfo.cluster_num as usize] = block_index_entry;
+ file_offset += blockinfo.mask.count_ones() as u64 * 4096;
+ }
+ }
+
+ self.blocks_are_indexed = true;
+
+ return Ok(());
+ }
+
+ pub fn read_device_contents(
+ &mut self,
+ device_id: usize,
+ buffer: &mut [u8],
+ offset: u64,
+ ) -> Result<bool> {
+ if device_id >= VMA_MAX_DEVICES {
+ return Err(anyhow!("invalid device id {}", device_id));
+ }
+
+ if offset % (4096 * 16) != 0 {
+ return Err(anyhow!("offset is not aligned to 65536"));
+ }
+
+ // Make sure that the device clusters are already indexed
+ if !self.blocks_are_indexed {
+ self.index_device_clusters()?;
+ }
+
+ let this_device_block_index = &self.block_index[device_id];
+ let length = cmp::min(
+ buffer.len(),
+ this_device_block_index.len() * 4096 * 16 - offset as usize,
+ );
+ let mut buffer_offset = 0;
+ let mut buffer_is_zero = true;
+
+ while buffer_offset < length {
+ let block_index_entry = &this_device_block_index[(offset as usize + buffer_offset) / (4096 * 16)];
+ self.vma_file.seek(SeekFrom::Start(block_index_entry.cluster_file_offset))?;
+
+ for i in 0..16 {
+ if buffer_offset >= length {
+ break;
+ }
+
+ let block_buffer_end = buffer_offset + cmp::min(length - buffer_offset, 4096);
+ let block_mask = ((block_index_entry.mask >> i) & 1) == 1;
+
+ if block_mask {
+ self.vma_file.read_exact(&mut buffer[buffer_offset..block_buffer_end])?;
+ buffer_is_zero = false;
+ } else {
+ buffer[buffer_offset..block_buffer_end].fill(0);
+ }
+
+ buffer_offset += 4096;
+ }
+ }
+
+ return Ok(buffer_is_zero);
+ }
+}
diff --git a/submodules/proxmox b/submodules/proxmox
new file mode 160000
index 0000000..dc9ee73
--- /dev/null
+++ b/submodules/proxmox
@@ -0,0 +1 @@
+Subproject commit dc9ee737512fc2c7325f47b875d6c69ccf484cea
diff --git a/submodules/proxmox-backup-qemu b/submodules/proxmox-backup-qemu
new file mode 160000
index 0000000..73a09e9
--- /dev/null
+++ b/submodules/proxmox-backup-qemu
@@ -0,0 +1 @@
+Subproject commit 73a09e96720434e4aba7f876f9c6cf56bce58c2c
--
2.39.2
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [pbs-devel] [PATCH vma-to-pbs] Initial commit
2023-09-29 13:12 Filip Schauer
@ 2023-09-29 13:15 ` Filip Schauer
0 siblings, 0 replies; 6+ messages in thread
From: Filip Schauer @ 2023-09-29 13:15 UTC (permalink / raw)
To: pbs-devel
Please ignore this, I forgot to add v2 to the subject.
On 29/09/2023 15:12, Filip Schauer wrote:
> Implement a tool to import VMA files into a Proxmox Backup Server
>
> Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
> ---
> Changes since v1:
> * Remove unused crates and uses
> * Format the code
> * Use anyhow for error handling
> * Use clap for parsing arguments instead of getopts
> * Fix blocks being reindexed on every read
> * Make sure ProxmoxBackupHandle is dropped properly on error
> * Move image_chunk_buffer from stack to heap
> * Move the block_index in VmaReader to the heap completely
> * Initialize vectors with `Vec::with_capacity` and `resize` instead of
> the `vec!` macro, to potentially improve performance on debug builds.
> * Add comments to code filling the MD5 sum field with zeros
> * Change device_id arguments to usize
> * Handle devices that have a size that is not aligned to 4096 properly
> in read_device_contents, when the caller provides a buffer that would
> exceed the device size.
> * Avoid unnecessary loop iterations in read_device_contents when the
> buffer size is not aligned to 65536
>
> .cargo/config | 5 +
> .gitmodules | 6 +
> Cargo.toml | 18 ++
> Makefile | 70 +++++++
> src/main.rs | 311 ++++++++++++++++++++++++++++++
> src/vma.rs | 340 +++++++++++++++++++++++++++++++++
> submodules/proxmox | 1 +
> submodules/proxmox-backup-qemu | 1 +
> 8 files changed, 752 insertions(+)
> create mode 100644 .cargo/config
> create mode 100644 .gitmodules
> create mode 100644 Cargo.toml
> create mode 100644 Makefile
> create mode 100644 src/main.rs
> create mode 100644 src/vma.rs
> create mode 160000 submodules/proxmox
> create mode 160000 submodules/proxmox-backup-qemu
>
> diff --git a/.cargo/config b/.cargo/config
> new file mode 100644
> index 0000000..3b5b6e4
> --- /dev/null
> +++ b/.cargo/config
> @@ -0,0 +1,5 @@
> +[source]
> +[source.debian-packages]
> +directory = "/usr/share/cargo/registry"
> +[source.crates-io]
> +replace-with = "debian-packages"
> diff --git a/.gitmodules b/.gitmodules
> new file mode 100644
> index 0000000..526f5ef
> --- /dev/null
> +++ b/.gitmodules
> @@ -0,0 +1,6 @@
> +[submodule "submodules/proxmox-backup-qemu"]
> + path = submodules/proxmox-backup-qemu
> + url = git://git.proxmox.com/git/proxmox-backup-qemu.git
> +[submodule "submodules/proxmox"]
> + path = submodules/proxmox
> + url = git://git.proxmox.com/git/proxmox.git
> diff --git a/Cargo.toml b/Cargo.toml
> new file mode 100644
> index 0000000..ecf9c03
> --- /dev/null
> +++ b/Cargo.toml
> @@ -0,0 +1,18 @@
> +[package]
> +name = "vma-to-pbs"
> +version = "0.0.1"
> +authors = ["Filip Schauer <f.schauer@proxmox.com>"]
> +edition = "2021"
> +
> +[dependencies]
> +anyhow = "1.0"
> +bincode = "1.3"
> +clap = { version = "4.0.32", features = ["cargo"] }
> +md5 = "0.7.0"
> +scopeguard = "1.1.0"
> +serde = "1.0"
> +serde-big-array = "0.4.1"
> +
> +proxmox-backup-qemu = { path = "submodules/proxmox-backup-qemu" }
> +proxmox-io = { path = "submodules/proxmox/proxmox-io" }
> +proxmox-sys = { path = "submodules/proxmox/proxmox-sys" }
> diff --git a/Makefile b/Makefile
> new file mode 100644
> index 0000000..a0c841d
> --- /dev/null
> +++ b/Makefile
> @@ -0,0 +1,70 @@
> +include /usr/share/dpkg/default.mk
> +
> +PACKAGE = proxmox-vma-to-pbs
> +BUILDDIR = $(PACKAGE)-$(DEB_VERSION_UPSTREAM)
> +
> +ARCH := $(DEB_BUILD_ARCH)
> +
> +DSC=$(DEB_SOURCE)_$(DEB_VERSION).dsc
> +MAIN_DEB=$(PACKAGE)_$(DEB_VERSION)_$(ARCH).deb
> +OTHER_DEBS = \
> + $(PACKAGE)-dev_$(DEB_VERSION)_$(ARCH).deb \
> + $(PACKAGE)-dbgsym_$(DEB_VERSION)_$(ARCH).deb
> +DEBS=$(MAIN_DEB) $(OTHER_DEBS)
> +
> +DESTDIR=
> +
> +TARGET_DIR := target/debug
> +
> +ifeq ($(BUILD_MODE), release)
> +CARGO_BUILD_ARGS += --release
> +TARGETDIR := target/release
> +endif
> +
> +.PHONY: all build
> +all: build
> +
> +build: $(TARGETDIR)/vma-to-pbs
> +$(TARGETDIR)/vma-to-pbs: Cargo.toml src/
> + cargo build $(CARGO_BUILD_ARGS)
> +
> +.PHONY: install
> +install: $(TARGETDIR)/vma-to-pbs
> + install -D -m 0755 $(TARGETDIR)/vma-to-pbs $(DESTDIR)/usr/bin/vma-to-pbs
> +
> +$(BUILDDIR): submodule
> + rm -rf $@ $@.tmp && mkdir $@.tmp
> + cp -a submodules debian Makefile .cargo Cargo.toml build.rs src $@.tmp/
> + mv $@.tmp $@
> +
> +submodule:
> + [ -e submodules/proxmox-backup-qemu/Cargo.toml ] || [ -e submodules/proxmox/proxmox-sys/Cargo.toml ] || git submodule update --init --recursive
> +
> +dsc:
> + rm -rf $(BUILDDIR) $(DSC)
> + $(MAKE) $(DSC)
> + lintian $(DSC)
> +
> +$(DSC): $(BUILDDIR)
> + cd $(BUILDDIR); dpkg-buildpackage -S -us -uc -d
> +
> +sbuild: $(DSC)
> + sbuild $<
> +
> +.PHONY: deb dsc
> +deb: $(OTHER_DEBS)
> +$(OTHER_DEBS): $(MAIN_DEB)
> +$(MAIN_DEB): $(BUILDDIR)
> + cd $(BUILDDIR); dpkg-buildpackage -b -us -uc
> + lintian $(DEBS)
> +
> +distclean: clean
> +clean:
> + cargo clean
> + rm -rf $(PACKAGE)-[0-9]*/
> + rm -r *.deb *.dsc $(DEB_SOURCE)*.tar* *.build *.buildinfo *.changes Cargo.lock
> +
> +.PHONY: dinstall
> +dinstall: $(DEBS)
> + dpkg -i $(DEBS)
> +
> diff --git a/src/main.rs b/src/main.rs
> new file mode 100644
> index 0000000..1aefd29
> --- /dev/null
> +++ b/src/main.rs
> @@ -0,0 +1,311 @@
> +extern crate anyhow;
> +extern crate clap;
> +extern crate proxmox_backup_qemu;
> +extern crate proxmox_io;
> +extern crate proxmox_sys;
> +extern crate scopeguard;
> +
> +use std::env;
> +use std::ffi::{c_char, CStr, CString};
> +use std::ptr;
> +use std::time::{SystemTime, UNIX_EPOCH};
> +
> +use anyhow::{anyhow, Context, Result};
> +use clap::{command, Arg, ArgAction};
> +use proxmox_backup_qemu::*;
> +use proxmox_sys::linux::tty;
> +use scopeguard::defer;
> +
> +mod vma;
> +use vma::*;
> +
> +fn backup_vma_to_pbs(
> + vma_file_path: String,
> + pbs_repository: String,
> + backup_id: String,
> + pbs_password: String,
> + keyfile: Option<String>,
> + key_password: Option<String>,
> + master_keyfile: Option<String>,
> + fingerprint: String,
> + compress: bool,
> + encrypt: bool,
> +) -> Result<()> {
> + println!("VMA input file: {}", vma_file_path);
> + println!("PBS repository: {}", pbs_repository);
> + println!("PBS fingerprint: {}", fingerprint);
> + println!("compress: {}", compress);
> + println!("encrypt: {}", encrypt);
> +
> + let backup_time = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
> + println!("backup time: {}", backup_time);
> +
> + let mut pbs_err: *mut c_char = ptr::null_mut();
> +
> + let pbs_repository_cstr = CString::new(pbs_repository).unwrap();
> + let backup_id_cstr = CString::new(backup_id).unwrap();
> + let pbs_password_cstr = CString::new(pbs_password).unwrap();
> + let fingerprint_cstr = CString::new(fingerprint).unwrap();
> + let keyfile_cstr = keyfile.map(|v| CString::new(v).unwrap());
> + let keyfile_ptr = keyfile_cstr.map(|v| v.as_ptr()).unwrap_or(ptr::null());
> + let key_password_cstr = key_password.map(|v| CString::new(v).unwrap());
> + let key_password_ptr = key_password_cstr.map(|v| v.as_ptr()).unwrap_or(ptr::null());
> + let master_keyfile_cstr = master_keyfile.map(|v| CString::new(v).unwrap());
> + let master_keyfile_ptr = master_keyfile_cstr.map(|v| v.as_ptr()).unwrap_or(ptr::null());
> +
> + let pbs = proxmox_backup_new_ns(
> + pbs_repository_cstr.as_ptr(),
> + ptr::null(),
> + backup_id_cstr.as_ptr(),
> + backup_time,
> + PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE,
> + pbs_password_cstr.as_ptr(),
> + keyfile_ptr,
> + key_password_ptr,
> + master_keyfile_ptr,
> + true,
> + false,
> + fingerprint_cstr.as_ptr(),
> + &mut pbs_err,
> + );
> +
> + defer! {
> + proxmox_backup_disconnect(pbs);
> + }
> +
> + if pbs == ptr::null_mut() {
> + unsafe {
> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
> + return Err(anyhow!("proxmox_backup_new_ns failed: {pbs_err_cstr:?}"));
> + }
> + }
> +
> + let connect_result = proxmox_backup_connect(pbs, &mut pbs_err);
> +
> + if connect_result < 0 {
> + unsafe {
> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
> + return Err(anyhow!("proxmox_backup_connect failed: {pbs_err_cstr:?}"));
> + }
> + }
> +
> + let mut vma_reader = VmaReader::new(&vma_file_path)?;
> +
> + // Handle configs
> + let configs = vma_reader.get_configs();
> + for (config_name, config_data) in configs {
> + println!("CFG: size: {} name: {}", config_data.len(), config_name);
> +
> + let config_name_cstr = CString::new(config_name).unwrap();
> +
> + if proxmox_backup_add_config(
> + pbs,
> + config_name_cstr.as_ptr(),
> + config_data.as_ptr(),
> + config_data.len() as u64,
> + &mut pbs_err,
> + ) < 0
> + {
> + unsafe {
> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
> + return Err(anyhow!(
> + "proxmox_backup_add_config failed: {pbs_err_cstr:?}"
> + ));
> + }
> + }
> + }
> +
> + // Handle block devices
> + for device_id in 0..255 {
> + let device_name = match vma_reader.get_device_name(device_id) {
> + Some(x) => x,
> + None => {
> + continue;
> + }
> + };
> +
> + let device_size = match vma_reader.get_device_size(device_id) {
> + Some(x) => x,
> + None => {
> + continue;
> + }
> + };
> +
> + println!(
> + "DEV: dev_id={} size: {} devname: {}",
> + device_id, device_size, device_name
> + );
> +
> + let device_name_cstr = CString::new(device_name).unwrap();
> + let pbs_device_id = proxmox_backup_register_image(
> + pbs,
> + device_name_cstr.as_ptr(),
> + device_size,
> + false,
> + &mut pbs_err,
> + );
> +
> + if pbs_device_id < 0 {
> + unsafe {
> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
> + return Err(anyhow!(
> + "proxmox_backup_register_image failed: {pbs_err_cstr:?}"
> + ));
> + }
> + }
> +
> + let mut image_chunk_buffer = proxmox_io::boxed::zeroed(PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE as usize);
> + let mut bytes_transferred = 0;
> +
> + while bytes_transferred < device_size {
> + let bytes_left = device_size - bytes_transferred;
> + let chunk_size = bytes_left.min(PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE);
> + println!(
> + "Uploading dev_id: {} offset: {:#0X} - {:#0X}",
> + device_id,
> + bytes_transferred,
> + bytes_transferred + chunk_size
> + );
> +
> + let is_zero_chunk = vma_reader
> + .read_device_contents(
> + device_id,
> + &mut image_chunk_buffer[0..chunk_size as usize],
> + bytes_transferred,
> + )
> + .with_context(|| {
> + format!(
> + "read {} bytes at offset {} from disk {} from VMA file",
> + chunk_size, bytes_transferred, device_id
> + )
> + })?;
> +
> + let write_data_result = proxmox_backup_write_data(
> + pbs,
> + pbs_device_id as u8,
> + if is_zero_chunk {
> + ptr::null()
> + } else {
> + image_chunk_buffer.as_ptr()
> + },
> + bytes_transferred,
> + chunk_size,
> + &mut pbs_err,
> + );
> +
> + if write_data_result < 0 {
> + unsafe {
> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
> + return Err(anyhow!(
> + "proxmox_backup_write_data failed: {pbs_err_cstr:?}"
> + ));
> + }
> + }
> +
> + bytes_transferred += chunk_size;
> + }
> +
> + if proxmox_backup_close_image(pbs, pbs_device_id as u8, &mut pbs_err) < 0 {
> + unsafe {
> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
> + return Err(anyhow!(
> + "proxmox_backup_close_image failed: {pbs_err_cstr:?}"
> + ));
> + }
> + }
> + }
> +
> + if proxmox_backup_finish(pbs, &mut pbs_err) < 0 {
> + unsafe {
> + let pbs_err_cstr = CStr::from_ptr(pbs_err);
> + return Err(anyhow!("proxmox_backup_finish failed: {pbs_err_cstr:?}"));
> + }
> + }
> +
> + Ok(())
> +}
> +
> +fn main() -> Result<()> {
> + let matches = command!()
> + .arg(
> + Arg::new("repository")
> + .long("repository")
> + .value_name("auth_id@host:port:datastore")
> + .help("Repository URL")
> + .required(true),
> + )
> + .arg(
> + Arg::new("vmid")
> + .long("vmid")
> + .value_name("VMID")
> + .help("Backup ID")
> + .required(true),
> + )
> + .arg(
> + Arg::new("fingerprint")
> + .long("fingerprint")
> + .value_name("FINGERPRINT")
> + .help("Proxmox Backup Server Fingerprint")
> + .required(true),
> + )
> + .arg(
> + Arg::new("keyfile")
> + .long("keyfile")
> + .value_name("KEYFILE")
> + .help("Key file"),
> + )
> + .arg(
> + Arg::new("master_keyfile")
> + .long("master_keyfile")
> + .value_name("MASTER_KEYFILE")
> + .help("Master key file"),
> + )
> + .arg(
> + Arg::new("compress")
> + .long("compress")
> + .short('c')
> + .help("Compress the Backup")
> + .action(ArgAction::SetTrue),
> + )
> + .arg(
> + Arg::new("encrypt")
> + .long("encrypt")
> + .short('e')
> + .help("Encrypt the Backup")
> + .action(ArgAction::SetTrue),
> + )
> + .arg(Arg::new("vma_file"))
> + .get_matches();
> +
> + let pbs_repository = matches.get_one::<String>("repository").unwrap().to_string();
> + let vmid = matches.get_one::<String>("vmid").unwrap().to_string();
> + let fingerprint = matches.get_one::<String>("fingerprint").unwrap().to_string();
> +
> + let keyfile = matches.get_one::<String>("keyfile");
> + let master_keyfile = matches.get_one::<String>("master_keyfile");
> + let compress = matches.get_flag("compress");
> + let encrypt = matches.get_flag("encrypt");
> +
> + let vma_file_path = matches.get_one::<String>("vma_file").unwrap().to_string();
> +
> + let pbs_password = String::from_utf8(tty::read_password(&"Password: ").unwrap()).unwrap();
> + let key_password = match keyfile {
> + Some(_) => Some(String::from_utf8(tty::read_password(&"Key Password: ").unwrap()).unwrap()),
> + None => None,
> + };
> +
> + backup_vma_to_pbs(
> + vma_file_path,
> + pbs_repository,
> + vmid,
> + pbs_password,
> + keyfile.cloned(),
> + key_password,
> + master_keyfile.cloned(),
> + fingerprint,
> + compress,
> + encrypt,
> + )?;
> +
> + Ok(())
> +}
> diff --git a/src/vma.rs b/src/vma.rs
> new file mode 100644
> index 0000000..e2c3475
> --- /dev/null
> +++ b/src/vma.rs
> @@ -0,0 +1,340 @@
> +extern crate anyhow;
> +extern crate md5;
> +
> +use std::collections::HashMap;
> +use std::fs::File;
> +use std::io::{Read, Seek, SeekFrom};
> +use std::mem::size_of;
> +use std::{cmp, str};
> +
> +use anyhow::{anyhow, Result};
> +use bincode::Options;
> +use serde::{Deserialize, Serialize};
> +use serde_big_array::BigArray;
> +
> +const VMA_BLOCKS_PER_EXTENT: usize = 59;
> +const VMA_MAX_CONFIGS: usize = 256;
> +const VMA_MAX_DEVICES: usize = 256;
> +
> +#[repr(C)]
> +#[derive(Serialize, Deserialize)]
> +struct VmaDeviceInfoHeader {
> + pub device_name_offset: u32,
> + reserved: [u8; 4],
> + pub device_size: u64,
> + reserved1: [u8; 16],
> +}
> +
> +#[repr(C)]
> +#[derive(Serialize, Deserialize)]
> +struct VmaHeader {
> + pub magic: [u8; 4],
> + pub version: u32,
> + pub uuid: [u8; 16],
> + pub ctime: u64,
> + pub md5sum: [u8; 16],
> + pub blob_buffer_offset: u32,
> + pub blob_buffer_size: u32,
> + pub header_size: u32,
> + #[serde(with = "BigArray")]
> + reserved: [u8; 1984],
> + #[serde(with = "BigArray")]
> + pub config_names: [u32; VMA_MAX_CONFIGS],
> + #[serde(with = "BigArray")]
> + pub config_data: [u32; VMA_MAX_CONFIGS],
> + reserved1: [u8; 4],
> + #[serde(with = "BigArray")]
> + pub dev_info: [VmaDeviceInfoHeader; VMA_MAX_DEVICES],
> +}
> +
> +#[repr(C)]
> +#[derive(Serialize, Deserialize)]
> +struct VmaBlockInfo {
> + pub mask: u16,
> + reserved: u8,
> + pub dev_id: u8,
> + pub cluster_num: u32,
> +}
> +
> +#[repr(C)]
> +#[derive(Serialize, Deserialize)]
> +struct VmaExtentHeader {
> + pub magic: [u8; 4],
> + reserved: [u8; 2],
> + pub block_count: u16,
> + pub uuid: [u8; 16],
> + pub md5sum: [u8; 16],
> + #[serde(with = "BigArray")]
> + pub blockinfo: [VmaBlockInfo; VMA_BLOCKS_PER_EXTENT],
> +}
> +
> +#[derive(Clone)]
> +struct VmaBlockIndexEntry {
> + pub cluster_file_offset: u64,
> + pub mask: u16,
> +}
> +
> +pub struct VmaReader {
> + vma_file: File,
> + vma_header: VmaHeader,
> + configs: HashMap<String, String>,
> + block_index: Vec<Vec<VmaBlockIndexEntry>>,
> + blocks_are_indexed: bool,
> +}
> +
> +impl VmaReader {
> + pub fn new(vma_file_path: &str) -> Result<Self> {
> + let mut vma_file = match File::open(vma_file_path) {
> + Err(why) => return Err(anyhow!("couldn't open {}: {}", vma_file_path, why)),
> + Ok(file) => file,
> + };
> +
> + let vma_header = Self::read_header(&mut vma_file).unwrap();
> + let configs = Self::read_blob_buffer(&mut vma_file, &vma_header).unwrap();
> + let block_index: Vec<Vec<VmaBlockIndexEntry>> = (0..256).map(|_| Vec::new()).collect();
> +
> + let instance = Self {
> + vma_file,
> + vma_header,
> + configs,
> + block_index,
> + blocks_are_indexed: false,
> + };
> +
> + Ok(instance)
> + }
> +
> + fn read_header(vma_file: &mut File) -> Result<VmaHeader> {
> + let mut buffer = Vec::with_capacity(size_of::<VmaHeader>());
> + buffer.resize(size_of::<VmaHeader>(), 0);
> + vma_file.read_exact(&mut buffer)?;
> +
> + let bincode_options = bincode::DefaultOptions::new()
> + .with_fixint_encoding()
> + .with_big_endian();
> +
> + let vma_header: VmaHeader = bincode_options.deserialize(&buffer)?;
> +
> + if vma_header.magic != [b'V', b'M', b'A', 0] {
> + return Err(anyhow!("Invalid magic number"));
> + }
> +
> + if vma_header.version != 1 {
> + return Err(anyhow!("Invalid VMA version {}", vma_header.version));
> + }
> +
> + buffer.resize(vma_header.header_size as usize, 0);
> + vma_file.read_exact(&mut buffer[size_of::<VmaHeader>()..])?;
> +
> + // Fill the MD5 sum field with zeros to compute the MD5 sum
> + buffer[32..48].fill(0);
> + let computed_md5sum: [u8; 16] = md5::compute(&buffer).into();
> +
> + if vma_header.md5sum != computed_md5sum {
> + return Err(anyhow!("Wrong VMA header checksum"));
> + }
> +
> + return Ok(vma_header);
> + }
> +
> + fn read_string_from_file(vma_file: &mut File, file_offset: u64) -> Result<String> {
> + let mut size_bytes = [0u8; 2];
> + vma_file.seek(SeekFrom::Start(file_offset))?;
> + vma_file.read_exact(&mut size_bytes)?;
> + let size = u16::from_le_bytes(size_bytes) as usize;
> + let mut string_bytes = Vec::with_capacity(size - 1);
> + string_bytes.resize(size - 1, 0);
> + vma_file.read_exact(&mut string_bytes)?;
> + let string = str::from_utf8(&string_bytes)?;
> +
> + return Ok(string.to_string());
> + }
> +
> + fn read_blob_buffer(
> + vma_file: &mut File,
> + vma_header: &VmaHeader,
> + ) -> Result<HashMap<String, String>> {
> + let mut configs = HashMap::new();
> +
> + for i in 0..VMA_MAX_CONFIGS {
> + let config_name_offset = vma_header.config_names[i];
> + let config_data_offset = vma_header.config_data[i];
> +
> + if config_name_offset == 0 || config_data_offset == 0 {
> + continue;
> + }
> +
> + let config_name_file_offset = (vma_header.blob_buffer_offset + config_name_offset) as u64;
> + let config_data_file_offset = (vma_header.blob_buffer_offset + config_data_offset) as u64;
> + let config_name = Self::read_string_from_file(vma_file, config_name_file_offset)?;
> + let config_data = Self::read_string_from_file(vma_file, config_data_file_offset)?;
> +
> + configs.insert(String::from(config_name), String::from(config_data));
> + }
> +
> + return Ok(configs);
> + }
> +
> + pub fn get_configs(&self) -> HashMap<String, String> {
> + return self.configs.clone();
> + }
> +
> + pub fn get_device_name(&mut self, device_id: usize) -> Option<String> {
> + if device_id >= VMA_MAX_DEVICES {
> + return None;
> + }
> +
> + let device_name_offset = self.vma_header.dev_info[device_id].device_name_offset;
> +
> + if device_name_offset == 0 {
> + return None;
> + }
> +
> + let device_name_file_offset = (self.vma_header.blob_buffer_offset + device_name_offset) as u64;
> + let device_name = Self::read_string_from_file(&mut self.vma_file, device_name_file_offset).unwrap();
> +
> + return Some(device_name.to_string());
> + }
> +
> + pub fn get_device_size(&self, device_id: usize) -> Option<u64> {
> + if device_id >= VMA_MAX_DEVICES {
> + return None;
> + }
> +
> + let dev_info = &self.vma_header.dev_info[device_id];
> +
> + if dev_info.device_name_offset == 0 {
> + return None;
> + }
> +
> + return Some(dev_info.device_size);
> + }
> +
> + fn read_extent_header(vma_file: &mut File) -> Result<VmaExtentHeader> {
> + let mut buffer = Vec::with_capacity(size_of::<VmaExtentHeader>());
> + buffer.resize(size_of::<VmaExtentHeader>(), 0);
> + vma_file.read_exact(&mut buffer)?;
> +
> + let bincode_options = bincode::DefaultOptions::new()
> + .with_fixint_encoding()
> + .with_big_endian();
> +
> + let vma_extent_header: VmaExtentHeader = bincode_options.deserialize(&buffer)?;
> +
> + if vma_extent_header.magic != [b'V', b'M', b'A', b'E'] {
> + return Err(anyhow!("Invalid magic number"));
> + }
> +
> + // Fill the MD5 sum field with zeros to compute the MD5 sum
> + buffer[24..40].fill(0);
> + let computed_md5sum: [u8; 16] = md5::compute(&buffer).into();
> +
> + if vma_extent_header.md5sum != computed_md5sum {
> + return Err(anyhow!("Wrong VMA extent header checksum"));
> + }
> +
> + return Ok(vma_extent_header);
> + }
> +
> + fn index_device_clusters(&mut self) -> Result<()> {
> + for device_id in 0..255 {
> + let device_size = match self.get_device_size(device_id) {
> + Some(x) => x,
> + None => {
> + continue;
> + }
> + };
> +
> + let device_cluster_count = (device_size + 4096 * 16 - 1) / (4096 * 16);
> +
> + let block_index_entry_placeholder = VmaBlockIndexEntry {
> + cluster_file_offset: 0,
> + mask: 0,
> + };
> +
> + self.block_index[device_id].resize(device_cluster_count as usize, block_index_entry_placeholder);
> + }
> +
> + let mut file_offset = self.vma_header.header_size as u64;
> + let vma_file_size = self.vma_file.metadata()?.len();
> +
> + while file_offset < vma_file_size {
> + self.vma_file.seek(SeekFrom::Start(file_offset))?;
> + let vma_extent_header = Self::read_extent_header(&mut self.vma_file)?;
> + file_offset += size_of::<VmaExtentHeader>() as u64;
> +
> + for i in 0..VMA_BLOCKS_PER_EXTENT {
> + let blockinfo = &vma_extent_header.blockinfo[i];
> +
> + if blockinfo.dev_id == 0 {
> + continue;
> + }
> +
> + let block_index_entry = VmaBlockIndexEntry {
> + cluster_file_offset: file_offset,
> + mask: blockinfo.mask,
> + };
> +
> + self.block_index[blockinfo.dev_id as usize][blockinfo.cluster_num as usize] = block_index_entry;
> + file_offset += blockinfo.mask.count_ones() as u64 * 4096;
> + }
> + }
> +
> + self.blocks_are_indexed = true;
> +
> + return Ok(());
> + }
> +
> + pub fn read_device_contents(
> + &mut self,
> + device_id: usize,
> + buffer: &mut [u8],
> + offset: u64,
> + ) -> Result<bool> {
> + if device_id >= VMA_MAX_DEVICES {
> + return Err(anyhow!("invalid device id {}", device_id));
> + }
> +
> + if offset % (4096 * 16) != 0 {
> + return Err(anyhow!("offset is not aligned to 65536"));
> + }
> +
> + // Make sure that the device clusters are already indexed
> + if !self.blocks_are_indexed {
> + self.index_device_clusters()?;
> + }
> +
> + let this_device_block_index = &self.block_index[device_id];
> + let length = cmp::min(
> + buffer.len(),
> + this_device_block_index.len() * 4096 * 16 - offset as usize,
> + );
> + let mut buffer_offset = 0;
> + let mut buffer_is_zero = true;
> +
> + while buffer_offset < length {
> + let block_index_entry = &this_device_block_index[(offset as usize + buffer_offset) / (4096 * 16)];
> + self.vma_file.seek(SeekFrom::Start(block_index_entry.cluster_file_offset))?;
> +
> + for i in 0..16 {
> + if buffer_offset >= length {
> + break;
> + }
> +
> + let block_buffer_end = buffer_offset + cmp::min(length - buffer_offset, 4096);
> + let block_mask = ((block_index_entry.mask >> i) & 1) == 1;
> +
> + if block_mask {
> + self.vma_file.read_exact(&mut buffer[buffer_offset..block_buffer_end])?;
> + buffer_is_zero = false;
> + } else {
> + buffer[buffer_offset..block_buffer_end].fill(0);
> + }
> +
> + buffer_offset += 4096;
> + }
> + }
> +
> + return Ok(buffer_is_zero);
> + }
> +}
> diff --git a/submodules/proxmox b/submodules/proxmox
> new file mode 160000
> index 0000000..dc9ee73
> --- /dev/null
> +++ b/submodules/proxmox
> @@ -0,0 +1 @@
> +Subproject commit dc9ee737512fc2c7325f47b875d6c69ccf484cea
> diff --git a/submodules/proxmox-backup-qemu b/submodules/proxmox-backup-qemu
> new file mode 160000
> index 0000000..73a09e9
> --- /dev/null
> +++ b/submodules/proxmox-backup-qemu
> @@ -0,0 +1 @@
> +Subproject commit 73a09e96720434e4aba7f876f9c6cf56bce58c2c
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2023-09-29 13:38 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-22 12:41 [pbs-devel] [PATCH vma-to-pbs] Initial commit Filip Schauer
2023-09-26 13:11 ` Wolfgang Bumiller
2023-09-29 13:22 ` Filip Schauer
2023-09-29 13:38 ` Lukas Wagner
2023-09-29 13:12 Filip Schauer
2023-09-29 13:15 ` Filip Schauer
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox