From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 8DFF972AC1 for ; Wed, 16 Jun 2021 12:56:45 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 5BADB10B3A for ; Wed, 16 Jun 2021 12:56:45 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 73C6010AD0 for ; Wed, 16 Jun 2021 12:56:43 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 4D35B420CE for ; Wed, 16 Jun 2021 12:56:43 +0200 (CEST) From: Stefan Reiter To: pbs-devel@lists.proxmox.com Date: Wed, 16 Jun 2021 12:55:51 +0200 Message-Id: <20210616105552.2594536-5-s.reiter@proxmox.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210616105552.2594536-1-s.reiter@proxmox.com> References: <20210616105552.2594536-1-s.reiter@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.789 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [data.name, disk.rs] Subject: [pbs-devel] [PATCH proxmox-backup 4/5] file-restore/disk: support ZFS pools X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 16 Jun 2021 10:56:45 -0000 Uses the ZFS utils to detect, import and mount zpools. These are available as a new Bucket type 'zpool'. Requires some minor changes to the existing disk and partiton detection code, so the ZFS-specific part can use the information gathered in the previous pass to associate drive names with their 'drive-xxxN.img.fidx' node. For detecting size, the zpool has to be imported. This is only done with pools containing 5 or less disks, as anything else might take too long (and should be seldomly found within VMs). Signed-off-by: Stefan Reiter --- @Thomas: I changed my mind about the "guess the size from the disk sizes" approach for a couple of reasons: * it didn't seem terribly accurate, comparing the values with what ZFS reports even for simple striped setups * it would require implementing the calculation for all RAID-Z modes and such, including handling of differently sized disks - possible, but probably not worth it, considering: * the actual import/mount if done *right away* is actually surprisingly fast, I would assume since the initial scan on import already loads all required metadata into disk/QEMU caches src/bin/proxmox_restore_daemon/disk.rs | 153 ++++++++++++++++++++++++- 1 file changed, 152 insertions(+), 1 deletion(-) diff --git a/src/bin/proxmox_restore_daemon/disk.rs b/src/bin/proxmox_restore_daemon/disk.rs index 1ff5468f..5b66dd2f 100644 --- a/src/bin/proxmox_restore_daemon/disk.rs +++ b/src/bin/proxmox_restore_daemon/disk.rs @@ -7,13 +7,17 @@ use std::collections::HashMap; use std::fs::{create_dir_all, File}; use std::io::{BufRead, BufReader}; use std::path::{Component, Path, PathBuf}; +use std::process::Command; use proxmox::const_regex; use proxmox::tools::fs; use proxmox_backup::api2::types::BLOCKDEVICE_NAME_REGEX; +use proxmox_backup::tools::run_command; const_regex! { VIRTIO_PART_REGEX = r"^vd[a-z]+(\d+)$"; + ZPOOL_POOL_NAME_REGEX = r"^ {3}pool: (.*)$"; + ZPOOL_IMPORT_DISK_REGEX = r"^\t {2,4}(vd[a-z]+(?:\d+)?)\s+ONLINE$"; } lazy_static! { @@ -43,6 +47,7 @@ pub enum ResolveResult { BucketComponents(Vec<(String, u64)>), } +#[derive(Clone)] struct PartitionBucketData { dev_node: String, number: i32, @@ -50,6 +55,13 @@ struct PartitionBucketData { size: u64, } +#[derive(Clone)] +struct ZFSBucketData { + name: String, + mountpoint: Option, + size: u64, +} + /// A "Bucket" represents a mapping found on a disk, e.g. a partition, a zfs dataset or an LV. A /// uniquely identifying path to a file then consists of four components: /// "/disk/bucket/component/path" @@ -60,9 +72,11 @@ struct PartitionBucketData { /// path: relative path of the file on the filesystem indicated by the other parts, may contain /// more subdirectories /// e.g.: "/drive-scsi0/part/0/etc/passwd" +#[derive(Clone)] enum Bucket { Partition(PartitionBucketData), RawFs(PartitionBucketData), + ZPool(ZFSBucketData), } impl Bucket { @@ -81,6 +95,13 @@ impl Bucket { } } Bucket::RawFs(_) => ty == "raw", + Bucket::ZPool(data) => { + if let Some(ref comp) = comp.get(0) { + ty == "zpool" && comp.as_ref() == &data.name + } else { + false + } + } }) } @@ -88,6 +109,7 @@ impl Bucket { match self { Bucket::Partition(_) => "part", Bucket::RawFs(_) => "raw", + Bucket::ZPool(_) => "zpool", } } @@ -104,6 +126,7 @@ impl Bucket { Ok(match self { Bucket::Partition(data) => data.number.to_string(), Bucket::RawFs(_) => "raw".to_owned(), + Bucket::ZPool(data) => data.name.clone(), }) } @@ -111,6 +134,7 @@ impl Bucket { Ok(match type_string { "part" => 1, "raw" => 0, + "zpool" => 1, _ => bail!("invalid bucket type for component depth: {}", type_string), }) } @@ -118,6 +142,7 @@ impl Bucket { fn size(&self) -> u64 { match self { Bucket::Partition(data) | Bucket::RawFs(data) => data.size, + Bucket::ZPool(data) => data.size, } } } @@ -162,6 +187,59 @@ impl Filesystems { data.mountpoint = Some(mp.clone()); Ok(mp) } + Bucket::ZPool(data) => { + if let Some(mp) = &data.mountpoint { + return Ok(mp.clone()); + } + + let mntpath = format!("/mnt/{}", &data.name); + create_dir_all(&mntpath)?; + + // call ZFS tools to import and mount the pool with the root mount at 'mntpath' + let mut cmd = Command::new("/sbin/zpool"); + cmd.args( + [ + "import", + "-f", + "-o", + "readonly=on", + "-d", + "/dev", + "-R", + &mntpath, + &data.name, + ] + .iter(), + ); + if let Err(msg) = run_command(cmd, None) { + // ignore double import, this may happen if a previous attempt failed further + // down below - this way we can at least try again + if !msg + .to_string() + .contains("a pool with that name already exists") + { + return Err(msg); + } + } + + // 'mount -a' simply mounts all datasets that haven't been automounted, which + // should only be ones that we've imported just now + let mut cmd = Command::new("/sbin/zfs"); + cmd.args(["mount", "-a"].iter()); + run_command(cmd, None)?; + + // Now that we have imported the pool, we can also query the size + let mut cmd = Command::new("/sbin/zpool"); + cmd.args(["list", "-o", "size", "-Hp", &data.name].iter()); + let size = run_command(cmd, None)?; + if let Ok(size) = size.trim().parse::() { + data.size = size; + } + + let mp = PathBuf::from(mntpath); + data.mountpoint = Some(mp.clone()); + Ok(mp) + } } } @@ -204,9 +282,11 @@ impl DiskState { pub fn scan() -> Result { let filesystems = Filesystems::scan()?; + let mut disk_map = HashMap::new(); + let mut drive_info = HashMap::new(); + // create mapping for virtio drives and .fidx files (via serial description) // note: disks::DiskManager relies on udev, which we don't have - let mut disk_map = HashMap::new(); for entry in proxmox_backup::tools::fs::scan_subdir( libc::AT_FDCWD, "/sys/block", @@ -230,6 +310,8 @@ impl DiskState { } }; + drive_info.insert(name.to_owned(), fidx.clone()); + // attempt to mount device directly let dev_node = format!("/dev/{}", name); let size = Self::make_dev_node(&dev_node, &sys_path)?; @@ -281,11 +363,55 @@ impl DiskState { }); parts.push(bucket); + + drive_info.insert(part_name.to_owned(), fidx.clone()); } disk_map.insert(fidx, parts); } + // After the above, every valid disk should have a device node in /dev, so we can query all + // of them for zpools + let mut cmd = Command::new("/sbin/zpool"); + cmd.args(["import", "-d", "/dev"].iter()); + let result = run_command(cmd, None).unwrap(); + for (pool, disks) in Self::parse_zpool_import(&result) { + let mut bucket = Bucket::ZPool(ZFSBucketData { + name: pool.clone(), + size: 0, + mountpoint: None, + }); + + // anything more than 5 disks we assume to take too long to mount, so we don't + // automatically - this means that no size can be reported + if disks.len() <= 5 { + let mp = filesystems.ensure_mounted(&mut bucket); + info!( + "zpool '{}' (on: {:?}) auto-mounted at '{:?}' (size: {}B)", + &pool, + &disks, + mp, + bucket.size() + ); + } else { + info!( + "zpool '{}' (on: {:?}) auto-mount skipped, too many disks", + &pool, &disks + ); + } + + for disk in disks { + if let Some(fidx) = drive_info.get(&disk) { + match disk_map.get_mut(fidx) { + Some(v) => v.push(bucket.clone()), + None => { + disk_map.insert(fidx.to_owned(), vec![bucket.clone()]); + } + } + } + } + } + Ok(Self { filesystems, disk_map, @@ -419,4 +545,29 @@ impl DiskState { stat::mknod(path, stat::SFlag::S_IFBLK, stat::Mode::S_IRWXU, dev)?; Ok(()) } + + fn parse_zpool_import(data: &str) -> Vec<(String, Vec)> { + let mut ret = Vec::new(); + let mut disks = Vec::new(); + let mut cur = "".to_string(); + for line in data.lines() { + if let Some(groups) = (ZPOOL_POOL_NAME_REGEX.regex_obj)().captures(line) { + if let Some(name) = groups.get(1) { + if !disks.is_empty() { + ret.push((cur, disks.clone())); + } + disks.clear(); + cur = name.as_str().to_owned(); + } + } else if let Some(groups) = (ZPOOL_IMPORT_DISK_REGEX.regex_obj)().captures(line) { + if let Some(disk) = groups.get(1) { + disks.push(disk.as_str().to_owned()); + } + } + } + if !disks.is_empty() && !cur.is_empty() { + ret.push((cur, disks)); + } + ret + } } -- 2.30.2