From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 315C51FF13F for ; Thu, 12 Mar 2026 15:03:49 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 1B0A41835C; Thu, 12 Mar 2026 15:03:45 +0100 (CET) From: Lukas Wagner To: pdm-devel@lists.proxmox.com Subject: [PATCH proxmox 06/26] disks: import from Proxmox Backup Server Date: Thu, 12 Mar 2026 14:52:07 +0100 Message-ID: <20260312135229.420729-7-l.wagner@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260312135229.420729-1-l.wagner@proxmox.com> References: <20260312135229.420729-1-l.wagner@proxmox.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1773323522477 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.054 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_SHORT 0.001 Use of a URL Shortener for very short URL POISEN_SPAM_PILL 0.1 Meta: its spam POISEN_SPAM_PILL_2 0.1 random spam to be learned in bayes RCVD_IN_MSPIKE_H2 0.001 Average reputation (+2) SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record X-MailFrom: l.wagner@proxmox.com X-Mailman-Rule-Hits: max-size X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; news-moderation; no-subject; digests; suspicious-header Message-ID-Hash: IBLXA2JGKTNVXWZB3I4VG33H5NPYSSDL X-Message-ID-Hash: IBLXA2JGKTNVXWZB3I4VG33H5NPYSSDL X-Mailman-Approved-At: Thu, 12 Mar 2026 15:03:57 +0100 X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Datacenter Manager development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: This is based on the disks module from PBS and left unchanged. The version has not been set to 1.0 yet since it seems like this crate could use a bit a cleanup (custom error type instead of anyhow, documentation). Signed-off-by: Lukas Wagner --- Cargo.toml | 6 + proxmox-disks/Cargo.toml | 30 + proxmox-disks/debian/changelog | 5 + proxmox-disks/debian/control | 94 ++ proxmox-disks/debian/copyright | 18 + proxmox-disks/debian/debcargo.toml | 7 + proxmox-disks/src/lib.rs | 1396 ++++++++++++++++++++++++++++ proxmox-disks/src/lvm.rs | 60 ++ proxmox-disks/src/parse_helpers.rs | 52 ++ proxmox-disks/src/smart.rs | 227 +++++ proxmox-disks/src/zfs.rs | 205 ++++ proxmox-disks/src/zpool_list.rs | 294 ++++++ proxmox-disks/src/zpool_status.rs | 496 ++++++++++ 13 files changed, 2890 insertions(+) create mode 100644 proxmox-disks/Cargo.toml create mode 100644 proxmox-disks/debian/changelog create mode 100644 proxmox-disks/debian/control create mode 100644 proxmox-disks/debian/copyright create mode 100644 proxmox-disks/debian/debcargo.toml create mode 100644 proxmox-disks/src/lib.rs create mode 100644 proxmox-disks/src/lvm.rs create mode 100644 proxmox-disks/src/parse_helpers.rs create mode 100644 proxmox-disks/src/smart.rs create mode 100644 proxmox-disks/src/zfs.rs create mode 100644 proxmox-disks/src/zpool_list.rs create mode 100644 proxmox-disks/src/zpool_status.rs diff --git a/Cargo.toml b/Cargo.toml index 97593a5d..8f3886bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ members = [ "proxmox-config-digest", "proxmox-daemon", "proxmox-deb-version", + "proxmox-disks", "proxmox-dns-api", "proxmox-fixed-string", "proxmox-docgen", @@ -112,6 +113,9 @@ mail-parser = "0.11" md5 = "0.7.0" native-tls = "0.2" nix = "0.29" +nom = "7" +# used by proxmox-disks, can be replaced by OnceLock from std once it supports get_or_try_init +once_cell = "1.3.1" openssl = "0.10" pam-sys = "0.5" percent-encoding = "2.1" @@ -139,6 +143,7 @@ tracing = "0.1" tracing-journald = "0.3.1" tracing-log = { version = "0.2", default-features = false } tracing-subscriber = "0.3.16" +udev = "0.9" url = "2.2" walkdir = "2" zstd = "0.13" @@ -154,6 +159,7 @@ proxmox-async = { version = "0.5.0", path = "proxmox-async" } proxmox-base64 = { version = "1.0.0", path = "proxmox-base64" } proxmox-compression = { version = "1.0.0", path = "proxmox-compression" } proxmox-daemon = { version = "1.0.0", path = "proxmox-daemon" } +proxmox-disks = { version = "0.1.0", path = "proxmox-daemon" } proxmox-fixed-string = { version = "0.1.0", path = "proxmox-fixed-string" } proxmox-http = { version = "1.0.5", path = "proxmox-http" } proxmox-http-error = { version = "1.0.0", path = "proxmox-http-error" } diff --git a/proxmox-disks/Cargo.toml b/proxmox-disks/Cargo.toml new file mode 100644 index 00000000..29bf56fe --- /dev/null +++ b/proxmox-disks/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "proxmox-disks" +description = "disk management and utilities" +version = "0.1.0" + +authors.workspace = true +edition.workspace = true +exclude.workspace = true +homepage.workspace = true +license.workspace = true +repository.workspace = true + +[dependencies] +anyhow.workspace = true +crossbeam-channel.workspace = true +libc.workspace = true +nix.workspace = true +nom.workspace = true +once_cell.workspace = true +regex.workspace = true +serde_json.workspace = true +serde.workspace = true +udev.workspace = true + +proxmox-io.workspace = true +proxmox-lang.workspace = true +proxmox-log.workspace = true +proxmox-parallel-handler.workspace = true +proxmox-schema = { workspace = true, features = [ "api-macro", "api-types" ] } +proxmox-sys.workspace = true diff --git a/proxmox-disks/debian/changelog b/proxmox-disks/debian/changelog new file mode 100644 index 00000000..d41a2000 --- /dev/null +++ b/proxmox-disks/debian/changelog @@ -0,0 +1,5 @@ +rust-proxmox-disks (0.1.0-1) unstable; urgency=medium + + * initial version. + + -- Proxmox Support Team Tue, 10 Mar 2026 15:05:21 +0100 diff --git a/proxmox-disks/debian/control b/proxmox-disks/debian/control new file mode 100644 index 00000000..2b5dfb68 --- /dev/null +++ b/proxmox-disks/debian/control @@ -0,0 +1,94 @@ +Source: rust-proxmox-disks +Section: rust +Priority: optional +Build-Depends: debhelper-compat (= 13), + dh-sequence-cargo +Build-Depends-Arch: cargo:native , + rustc:native , + libstd-rust-dev , + librust-anyhow-1+default-dev , + librust-crossbeam-channel-0.5+default-dev , + librust-libc-0.2+default-dev (>= 0.2.107-~~) , + librust-nix-0.29+default-dev , + librust-nom-7+default-dev , + librust-once-cell-1+default-dev (>= 1.3.1-~~) , + librust-proxmox-io-1+default-dev (>= 1.2.1-~~) , + librust-proxmox-lang-1+default-dev (>= 1.5-~~) , + librust-proxmox-log-1+default-dev , + librust-proxmox-parallel-handler-1+default-dev , + librust-proxmox-schema-5+api-types-dev (>= 5.0.1-~~) , + librust-proxmox-schema-5+default-dev (>= 5.0.1-~~) , + librust-proxmox-sys-1+default-dev , + librust-regex-1+default-dev (>= 1.5-~~) , + librust-serde-1+default-dev , + librust-serde-json-1+default-dev , + librust-udev-0.9+default-dev +Maintainer: Proxmox Support Team +Standards-Version: 4.7.2 +Vcs-Git: git://git.proxmox.com/git/proxmox.git +Vcs-Browser: https://git.proxmox.com/?p=proxmox.git +Homepage: https://proxmox.com +X-Cargo-Crate: proxmox-disks + +Package: librust-proxmox-disks-dev +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + librust-anyhow-1+default-dev, + librust-crossbeam-channel-0.5+default-dev, + librust-libc-0.2+default-dev (>= 0.2.107-~~), + librust-nix-0.29+default-dev, + librust-nom-7+default-dev, + librust-once-cell-1+default-dev (>= 1.3.1-~~), + librust-proxmox-io-1+default-dev (>= 1.2.1-~~), + librust-proxmox-lang-1+default-dev (>= 1.5-~~), + librust-proxmox-log-1+default-dev, + librust-proxmox-parallel-handler-1+default-dev, + librust-proxmox-sys-1+default-dev, + librust-regex-1+default-dev (>= 1.5-~~), + librust-serde-1+default-dev, + librust-serde-json-1+default-dev, + librust-udev-0.9+default-dev +Recommends: + librust-proxmox-disks+default-dev (= ${binary:Version}) +Suggests: + librust-proxmox-disks+api-types-dev (= ${binary:Version}) +Provides: + librust-proxmox-disks-0-dev (= ${binary:Version}), + librust-proxmox-disks-0.1-dev (= ${binary:Version}), + librust-proxmox-disks-0.1.0-dev (= ${binary:Version}) +Description: Disk management and utilities - Rust source code + Source code for Debianized Rust crate "proxmox-disks" + +Package: librust-proxmox-disks+api-types-dev +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + librust-proxmox-disks-dev (= ${binary:Version}), + librust-proxmox-schema-5+api-macro-dev (>= 5.0.1-~~), + librust-proxmox-schema-5+api-types-dev (>= 5.0.1-~~) +Provides: + librust-proxmox-disks-0+api-types-dev (= ${binary:Version}), + librust-proxmox-disks-0.1+api-types-dev (= ${binary:Version}), + librust-proxmox-disks-0.1.0+api-types-dev (= ${binary:Version}) +Description: Disk management and utilities - feature "api-types" + This metapackage enables feature "api-types" for the Rust proxmox-disks crate, + by pulling in any additional dependencies needed by that feature. + +Package: librust-proxmox-disks+default-dev +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + librust-proxmox-disks-dev (= ${binary:Version}), + librust-proxmox-schema-5+api-types-dev (>= 5.0.1-~~), + librust-proxmox-schema-5+default-dev (>= 5.0.1-~~) +Provides: + librust-proxmox-disks-0+default-dev (= ${binary:Version}), + librust-proxmox-disks-0.1+default-dev (= ${binary:Version}), + librust-proxmox-disks-0.1.0+default-dev (= ${binary:Version}) +Description: Disk management and utilities - feature "default" + This metapackage enables feature "default" for the Rust proxmox-disks crate, by + pulling in any additional dependencies needed by that feature. diff --git a/proxmox-disks/debian/copyright b/proxmox-disks/debian/copyright new file mode 100644 index 00000000..01138fa0 --- /dev/null +++ b/proxmox-disks/debian/copyright @@ -0,0 +1,18 @@ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ + +Files: + * +Copyright: 2026 Proxmox Server Solutions GmbH +License: AGPL-3.0-or-later + This program is free software: you can redistribute it and/or modify it under + the terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) any + later version. + . + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more + details. + . + You should have received a copy of the GNU Affero General Public License along + with this program. If not, see . diff --git a/proxmox-disks/debian/debcargo.toml b/proxmox-disks/debian/debcargo.toml new file mode 100644 index 00000000..b7864cdb --- /dev/null +++ b/proxmox-disks/debian/debcargo.toml @@ -0,0 +1,7 @@ +overlay = "." +crate_src_path = ".." +maintainer = "Proxmox Support Team " + +[source] +vcs_git = "git://git.proxmox.com/git/proxmox.git" +vcs_browser = "https://git.proxmox.com/?p=proxmox.git" diff --git a/proxmox-disks/src/lib.rs b/proxmox-disks/src/lib.rs new file mode 100644 index 00000000..e6056c14 --- /dev/null +++ b/proxmox-disks/src/lib.rs @@ -0,0 +1,1396 @@ +//! Disk query/management utilities for. + +use std::collections::{HashMap, HashSet}; +use std::ffi::{OsStr, OsString}; +use std::io; +use std::os::unix::ffi::{OsStrExt, OsStringExt}; +use std::os::unix::fs::{FileExt, MetadataExt, OpenOptionsExt}; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, LazyLock}; + +use anyhow::{bail, format_err, Context as _, Error}; +use libc::dev_t; +use once_cell::sync::OnceCell; + +use ::serde::{Deserialize, Serialize}; + +use proxmox_lang::{io_bail, io_format_err}; +use proxmox_log::info; +use proxmox_parallel_handler::ParallelHandler; +use proxmox_schema::api; +use proxmox_sys::linux::procfs::{mountinfo::Device, MountInfo}; + +use proxmox_schema::api_types::{ + BLOCKDEVICE_DISK_AND_PARTITION_NAME_REGEX, BLOCKDEVICE_NAME_REGEX, UUID_REGEX, +}; + +mod zfs; +pub use zfs::*; +mod zpool_status; +pub use zpool_status::*; +mod zpool_list; +pub use zpool_list::*; +mod lvm; +pub use lvm::*; +mod smart; +pub use smart::*; + +mod parse_helpers; + +static ISCSI_PATH_REGEX: LazyLock = + LazyLock::new(|| regex::Regex::new(r"host[^/]*/session[^/]*").unwrap()); + +/// Disk management context. +/// +/// This provides access to disk information with some caching for faster querying of multiple +/// devices. +pub struct DiskManage { + mount_info: OnceCell, + mounted_devices: OnceCell>, +} + +/// Information for a device as returned by lsblk. +#[derive(Deserialize)] +pub struct LsblkInfo { + /// Path to the device. + path: String, + /// Partition type GUID. + #[serde(rename = "parttype")] + partition_type: Option, + /// File system label. + #[serde(rename = "fstype")] + file_system_type: Option, + /// File system UUID. + uuid: Option, +} + +impl DiskManage { + /// Create a new disk management context. + pub fn new() -> Arc { + Arc::new(Self { + mount_info: OnceCell::new(), + mounted_devices: OnceCell::new(), + }) + } + + /// Get the current mount info. This simply caches the result of `MountInfo::read` from the + /// `proxmox::sys` module. + pub fn mount_info(&self) -> Result<&MountInfo, Error> { + self.mount_info.get_or_try_init(MountInfo::read) + } + + /// Get a `Disk` from a device node (eg. `/dev/sda`). + pub fn disk_by_node>(self: Arc, devnode: P) -> io::Result { + let devnode = devnode.as_ref(); + + let meta = std::fs::metadata(devnode)?; + if (meta.mode() & libc::S_IFBLK) == libc::S_IFBLK { + self.disk_by_dev_num(meta.rdev()) + } else { + io_bail!("not a block device: {:?}", devnode); + } + } + + /// Get a `Disk` for a specific device number. + pub fn disk_by_dev_num(self: Arc, devnum: dev_t) -> io::Result { + self.disk_by_sys_path(format!( + "/sys/dev/block/{}:{}", + unsafe { libc::major(devnum) }, + unsafe { libc::minor(devnum) }, + )) + } + + /// Get a `Disk` for a path in `/sys`. + pub fn disk_by_sys_path>(self: Arc, path: P) -> io::Result { + let device = udev::Device::from_syspath(path.as_ref())?; + Ok(Disk { + manager: self, + device, + info: Default::default(), + }) + } + + /// Get a `Disk` for a name in `/sys/block/`. + pub fn disk_by_name(self: Arc, name: &str) -> io::Result { + let syspath = format!("/sys/block/{name}"); + self.disk_by_sys_path(syspath) + } + + /// Get a `Disk` for a name in `/sys/class/block/`. + pub fn partition_by_name(self: Arc, name: &str) -> io::Result { + let syspath = format!("/sys/class/block/{name}"); + self.disk_by_sys_path(syspath) + } + + /// Gather information about mounted disks: + fn mounted_devices(&self) -> Result<&HashSet, Error> { + self.mounted_devices + .get_or_try_init(|| -> Result<_, Error> { + let mut mounted = HashSet::new(); + + for (_id, mp) in self.mount_info()? { + let source = match mp.mount_source.as_deref() { + Some(s) => s, + None => continue, + }; + + let path = Path::new(source); + if !path.is_absolute() { + continue; + } + + let meta = match std::fs::metadata(path) { + Ok(meta) => meta, + Err(ref err) if err.kind() == io::ErrorKind::NotFound => continue, + Err(other) => return Err(Error::from(other)), + }; + + if (meta.mode() & libc::S_IFBLK) != libc::S_IFBLK { + // not a block device + continue; + } + + mounted.insert(meta.rdev()); + } + + Ok(mounted) + }) + } + + /// Information about file system type and used device for a path + /// + /// Returns tuple (fs_type, device, mount_source) + pub fn find_mounted_device( + &self, + path: &std::path::Path, + ) -> Result)>, Error> { + let stat = nix::sys::stat::stat(path)?; + let device = Device::from_dev_t(stat.st_dev); + + let root_path = std::path::Path::new("/"); + + for (_id, entry) in self.mount_info()? { + if entry.root == root_path && entry.device == device { + return Ok(Some(( + entry.fs_type.clone(), + entry.device, + entry.mount_source.clone(), + ))); + } + } + + Ok(None) + } + + /// Check whether a specific device node is mounted. + /// + /// Note that this tries to `stat` the sources of all mount points without caching the result + /// of doing so, so this is always somewhat expensive. + pub fn is_devnum_mounted(&self, dev: dev_t) -> Result { + self.mounted_devices().map(|mounted| mounted.contains(&dev)) + } +} + +/// Queries (and caches) various information about a specific disk. +/// +/// This belongs to a `Disks` and provides information for a single disk. +pub struct Disk { + manager: Arc, + device: udev::Device, + info: DiskInfo, +} + +/// Helper struct (so we can initialize this with Default) +/// +/// We probably want this to be serializable to the same hash type we use in perl currently. +#[derive(Default)] +struct DiskInfo { + size: OnceCell, + vendor: OnceCell>, + model: OnceCell>, + rotational: OnceCell>, + // for perl: #[serde(rename = "devpath")] + ata_rotation_rate_rpm: OnceCell>, + // for perl: #[serde(rename = "devpath")] + device_path: OnceCell>, + wwn: OnceCell>, + serial: OnceCell>, + // for perl: #[serde(skip_serializing)] + partition_table_type: OnceCell>, + // for perl: #[serde(skip_serializing)] + partition_entry_scheme: OnceCell>, + // for perl: #[serde(skip_serializing)] + partition_entry_uuid: OnceCell>, + // for perl: #[serde(skip_serializing)] + partition_entry_type: OnceCell>, + gpt: OnceCell, + // ??? + bus: OnceCell>, + // ??? + fs_type: OnceCell>, + // ??? + has_holders: OnceCell, + // ??? + is_mounted: OnceCell, +} + +impl Disk { + /// Try to get the device number for this disk. + /// + /// (In udev this can fail...) + pub fn devnum(&self) -> Result { + // not sure when this can fail... + self.device + .devnum() + .ok_or_else(|| format_err!("failed to get device number")) + } + + /// Get the sys-name of this device. (The final component in the `/sys` path). + pub fn sysname(&self) -> &OsStr { + self.device.sysname() + } + + /// Get the this disk's `/sys` path. + pub fn syspath(&self) -> &Path { + self.device.syspath() + } + + /// Get the device node in `/dev`, if any. + pub fn device_path(&self) -> Option<&Path> { + //self.device.devnode() + self.info + .device_path + .get_or_init(|| self.device.devnode().map(Path::to_owned)) + .as_ref() + .map(PathBuf::as_path) + } + + /// Get the parent device. + pub fn parent(&self) -> Option { + self.device.parent().map(|parent| Self { + manager: self.manager.clone(), + device: parent, + info: Default::default(), + }) + } + + /// Read from a file in this device's sys path. + /// + /// Note: path must be a relative path! + pub fn read_sys(&self, path: &Path) -> io::Result>> { + assert!(path.is_relative()); + + std::fs::read(self.syspath().join(path)) + .map(Some) + .or_else(|err| { + if err.kind() == io::ErrorKind::NotFound { + Ok(None) + } else { + Err(err) + } + }) + } + + /// Convenience wrapper for reading a `/sys` file which contains just a simple `OsString`. + pub fn read_sys_os_str>(&self, path: P) -> io::Result> { + Ok(self.read_sys(path.as_ref())?.map(|mut v| { + if Some(&b'\n') == v.last() { + v.pop(); + } + OsString::from_vec(v) + })) + } + + /// Convenience wrapper for reading a `/sys` file which contains just a simple utf-8 string. + pub fn read_sys_str>(&self, path: P) -> io::Result> { + Ok(match self.read_sys(path.as_ref())? { + Some(data) => Some(String::from_utf8(data).map_err(io::Error::other)?), + None => None, + }) + } + + /// Convenience wrapper for unsigned integer `/sys` values up to 64 bit. + pub fn read_sys_u64>(&self, path: P) -> io::Result> { + Ok(match self.read_sys_str(path)? { + Some(data) => Some(data.trim().parse().map_err(io::Error::other)?), + None => None, + }) + } + + /// Get the disk's size in bytes. + pub fn size(&self) -> io::Result { + Ok(*self.info.size.get_or_try_init(|| { + self.read_sys_u64("size")?.map(|s| s * 512).ok_or_else(|| { + io_format_err!( + "failed to get disk size from {:?}", + self.syspath().join("size"), + ) + }) + })?) + } + + /// Get the device vendor (`/sys/.../device/vendor`) entry if available. + pub fn vendor(&self) -> io::Result> { + Ok(self + .info + .vendor + .get_or_try_init(|| self.read_sys_os_str("device/vendor"))? + .as_ref() + .map(OsString::as_os_str)) + } + + /// Get the device model (`/sys/.../device/model`) entry if available. + pub fn model(&self) -> Option<&OsStr> { + self.info + .model + .get_or_init(|| self.device.property_value("ID_MODEL").map(OsStr::to_owned)) + .as_ref() + .map(OsString::as_os_str) + } + + /// Check whether this is a rotational disk. + /// + /// Returns `None` if there's no `queue/rotational` file, in which case no information is + /// known. `Some(false)` if `queue/rotational` is zero, `Some(true)` if it has a non-zero + /// value. + pub fn rotational(&self) -> io::Result> { + Ok(*self + .info + .rotational + .get_or_try_init(|| -> io::Result> { + Ok(self.read_sys_u64("queue/rotational")?.map(|n| n != 0)) + })?) + } + + /// Get the WWN if available. + pub fn wwn(&self) -> Option<&OsStr> { + self.info + .wwn + .get_or_init(|| self.device.property_value("ID_WWN").map(|v| v.to_owned())) + .as_ref() + .map(OsString::as_os_str) + } + + /// Get the device serial if available. + pub fn serial(&self) -> Option<&OsStr> { + self.info + .serial + .get_or_init(|| { + self.device + .property_value("ID_SERIAL_SHORT") + .map(|v| v.to_owned()) + }) + .as_ref() + .map(OsString::as_os_str) + } + + /// Get the ATA rotation rate value from udev. This is not necessarily the same as sysfs' + /// `rotational` value. + pub fn ata_rotation_rate_rpm(&self) -> Option { + *self.info.ata_rotation_rate_rpm.get_or_init(|| { + std::str::from_utf8( + self.device + .property_value("ID_ATA_ROTATION_RATE_RPM")? + .as_bytes(), + ) + .ok()? + .parse() + .ok() + }) + } + + /// Get the partition table type, if any. + pub fn partition_table_type(&self) -> Option<&OsStr> { + self.info + .partition_table_type + .get_or_init(|| { + self.device + .property_value("ID_PART_TABLE_TYPE") + .map(|v| v.to_owned()) + }) + .as_ref() + .map(OsString::as_os_str) + } + + /// Check if this contains a GPT partition table. + pub fn has_gpt(&self) -> bool { + *self.info.gpt.get_or_init(|| { + self.partition_table_type() + .map(|s| s == "gpt") + .unwrap_or(false) + }) + } + + /// Get the partitioning scheme of which this device is a partition. + pub fn partition_entry_scheme(&self) -> Option<&OsStr> { + self.info + .partition_entry_scheme + .get_or_init(|| { + self.device + .property_value("ID_PART_ENTRY_SCHEME") + .map(|v| v.to_owned()) + }) + .as_ref() + .map(OsString::as_os_str) + } + + /// Check if this is a partition. + pub fn is_partition(&self) -> bool { + self.partition_entry_scheme().is_some() + } + + /// Get the type of partition entry (ie. type UUID from the entry in the GPT partition table). + pub fn partition_entry_type(&self) -> Option<&OsStr> { + self.info + .partition_entry_type + .get_or_init(|| { + self.device + .property_value("ID_PART_ENTRY_TYPE") + .map(|v| v.to_owned()) + }) + .as_ref() + .map(OsString::as_os_str) + } + + /// Get the partition entry UUID (ie. the UUID from the entry in the GPT partition table). + pub fn partition_entry_uuid(&self) -> Option<&OsStr> { + self.info + .partition_entry_uuid + .get_or_init(|| { + self.device + .property_value("ID_PART_ENTRY_UUID") + .map(|v| v.to_owned()) + }) + .as_ref() + .map(OsString::as_os_str) + } + + /// Get the bus type used for this disk. + pub fn bus(&self) -> Option<&OsStr> { + self.info + .bus + .get_or_init(|| self.device.property_value("ID_BUS").map(|v| v.to_owned())) + .as_ref() + .map(OsString::as_os_str) + } + + /// Attempt to guess the disk type. + pub fn guess_disk_type(&self) -> io::Result { + Ok(match self.rotational()? { + Some(false) => DiskType::Ssd, + Some(true) => DiskType::Hdd, + None => match self.ata_rotation_rate_rpm() { + Some(_) => DiskType::Hdd, + None => match self.bus() { + Some(bus) if bus == "usb" => DiskType::Usb, + _ => DiskType::Unknown, + }, + }, + }) + } + + /// Get the file system type found on the disk, if any. + /// + /// Note that `None` may also just mean "unknown". + pub fn fs_type(&self) -> Option<&OsStr> { + self.info + .fs_type + .get_or_init(|| { + self.device + .property_value("ID_FS_TYPE") + .map(|v| v.to_owned()) + }) + .as_ref() + .map(OsString::as_os_str) + } + + /// Check if there are any "holders" in `/sys`. This usually means the device is in use by + /// another kernel driver like the device mapper. + pub fn has_holders(&self) -> io::Result { + Ok(*self + .info + .has_holders + .get_or_try_init(|| -> io::Result { + let mut subdir = self.syspath().to_owned(); + subdir.push("holders"); + for entry in std::fs::read_dir(subdir)? { + match entry?.file_name().as_bytes() { + b"." | b".." => (), + _ => return Ok(true), + } + } + Ok(false) + })?) + } + + /// Check if this disk is mounted. + pub fn is_mounted(&self) -> Result { + Ok(*self + .info + .is_mounted + .get_or_try_init(|| self.manager.is_devnum_mounted(self.devnum()?))?) + } + + /// Read block device stats + /// + /// see + pub fn read_stat(&self) -> std::io::Result> { + if let Some(stat) = self.read_sys(Path::new("stat"))? { + let stat = unsafe { std::str::from_utf8_unchecked(&stat) }; + let stat: Vec = stat + .split_ascii_whitespace() + .map(|s| s.parse().unwrap_or_default()) + .collect(); + + if stat.len() < 15 { + return Ok(None); + } + + return Ok(Some(BlockDevStat { + read_ios: stat[0], + read_sectors: stat[2], + write_ios: stat[4] + stat[11], // write + discard + write_sectors: stat[6] + stat[13], // write + discard + io_ticks: stat[10], + })); + } + Ok(None) + } + + /// List device partitions + pub fn partitions(&self) -> Result, Error> { + let sys_path = self.syspath(); + let device = self.sysname().to_string_lossy().to_string(); + + let mut map = HashMap::new(); + + for item in proxmox_sys::fs::read_subdir(libc::AT_FDCWD, sys_path)? { + let item = item?; + let name = match item.file_name().to_str() { + Ok(name) => name, + Err(_) => continue, // skip non utf8 entries + }; + + if !name.starts_with(&device) { + continue; + } + + let mut part_path = sys_path.to_owned(); + part_path.push(name); + + let disk_part = self.manager.clone().disk_by_sys_path(&part_path)?; + + if let Some(partition) = disk_part.read_sys_u64("partition")? { + map.insert(partition, disk_part); + } + } + + Ok(map) + } +} + +#[api()] +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +/// This is just a rough estimate for a "type" of disk. +pub enum DiskType { + /// We know nothing. + Unknown, + + /// May also be a USB-HDD. + Hdd, + + /// May also be a USB-SSD. + Ssd, + + /// Some kind of USB disk, but we don't know more than that. + Usb, +} + +#[derive(Debug)] +/// Represents the contents of the `/sys/block//stat` file. +pub struct BlockDevStat { + pub read_ios: u64, + pub read_sectors: u64, + pub write_ios: u64, + pub write_sectors: u64, + pub io_ticks: u64, // milliseconds +} + +/// Use lsblk to read partition type uuids and file system types. +pub fn get_lsblk_info() -> Result, Error> { + let mut command = std::process::Command::new("lsblk"); + command.args(["--json", "-o", "path,parttype,fstype,uuid"]); + + let output = proxmox_sys::command::run_command(command, None)?; + + let mut output: serde_json::Value = output.parse()?; + + Ok(serde_json::from_value(output["blockdevices"].take())?) +} + +/// Get set of devices with a file system label. +/// +/// The set is indexed by using the unix raw device number (dev_t is u64) +fn get_file_system_devices(lsblk_info: &[LsblkInfo]) -> Result, Error> { + let mut device_set: HashSet = HashSet::new(); + + for info in lsblk_info.iter() { + if info.file_system_type.is_some() { + let meta = std::fs::metadata(&info.path)?; + device_set.insert(meta.rdev()); + } + } + + Ok(device_set) +} + +#[api()] +#[derive(Debug, Serialize, Deserialize, Eq, PartialEq)] +#[serde(rename_all = "lowercase")] +/// What a block device partition is used for. +pub enum PartitionUsageType { + /// Partition is not used (as far we can tell) + Unused, + /// Partition is used by LVM + LVM, + /// Partition is used by ZFS + ZFS, + /// Partition is ZFS reserved + ZfsReserved, + /// Partition is an EFI partition + EFI, + /// Partition is a BIOS partition + BIOS, + /// Partition contains a file system label + FileSystem, +} + +#[api()] +#[derive(Debug, Serialize, Deserialize, Eq, PartialEq)] +#[serde(rename_all = "lowercase")] +/// What a block device (disk) is used for. +pub enum DiskUsageType { + /// Disk is not used (as far we can tell) + Unused, + /// Disk is mounted + Mounted, + /// Disk is used by LVM + LVM, + /// Disk is used by ZFS + ZFS, + /// Disk is used by device-mapper + DeviceMapper, + /// Disk has partitions + Partitions, + /// Disk contains a file system label + FileSystem, +} + +#[api()] +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +/// Basic information about a partition +pub struct PartitionInfo { + /// The partition name + pub name: String, + /// What the partition is used for + pub used: PartitionUsageType, + /// Is the partition mounted + pub mounted: bool, + /// The filesystem of the partition + pub filesystem: Option, + /// The partition devpath + pub devpath: Option, + /// Size in bytes + pub size: Option, + /// GPT partition + pub gpt: bool, + /// UUID + pub uuid: Option, +} + +#[api( + properties: { + used: { + type: DiskUsageType, + }, + "disk-type": { + type: DiskType, + }, + status: { + type: SmartStatus, + }, + partitions: { + optional: true, + items: { + type: PartitionInfo + } + } + } +)] +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +/// Information about how a Disk is used +pub struct DiskUsageInfo { + /// Disk name (`/sys/block/`) + pub name: String, + pub used: DiskUsageType, + pub disk_type: DiskType, + pub status: SmartStatus, + /// Disk wearout + pub wearout: Option, + /// Vendor + pub vendor: Option, + /// Model + pub model: Option, + /// WWN + pub wwn: Option, + /// Disk size + pub size: u64, + /// Serisal number + pub serial: Option, + /// Partitions on the device + pub partitions: Option>, + /// Linux device path (/dev/xxx) + pub devpath: Option, + /// Set if disk contains a GPT partition table + pub gpt: bool, + /// RPM + pub rpm: Option, +} + +fn scan_partitions( + disk_manager: Arc, + lvm_devices: &HashSet, + zfs_devices: &HashSet, + device: &str, +) -> Result { + let mut sys_path = std::path::PathBuf::from("/sys/block"); + sys_path.push(device); + + let mut used = DiskUsageType::Unused; + + let mut found_lvm = false; + let mut found_zfs = false; + let mut found_mountpoints = false; + let mut found_dm = false; + let mut found_partitions = false; + + for item in proxmox_sys::fs::read_subdir(libc::AT_FDCWD, &sys_path)? { + let item = item?; + let name = match item.file_name().to_str() { + Ok(name) => name, + Err(_) => continue, // skip non utf8 entries + }; + if !name.starts_with(device) { + continue; + } + + found_partitions = true; + + let mut part_path = sys_path.clone(); + part_path.push(name); + + let data = disk_manager.clone().disk_by_sys_path(&part_path)?; + + let devnum = data.devnum()?; + + if lvm_devices.contains(&devnum) { + found_lvm = true; + } + + if data.is_mounted()? { + found_mountpoints = true; + } + + if data.has_holders()? { + found_dm = true; + } + + if zfs_devices.contains(&devnum) { + found_zfs = true; + } + } + + if found_mountpoints { + used = DiskUsageType::Mounted; + } else if found_lvm { + used = DiskUsageType::LVM; + } else if found_zfs { + used = DiskUsageType::ZFS; + } else if found_dm { + used = DiskUsageType::DeviceMapper; + } else if found_partitions { + used = DiskUsageType::Partitions; + } + + Ok(used) +} + +pub struct DiskUsageQuery { + smart: bool, + partitions: bool, +} + +impl Default for DiskUsageQuery { + fn default() -> Self { + Self::new() + } +} + +impl DiskUsageQuery { + pub const fn new() -> Self { + Self { + smart: true, + partitions: false, + } + } + + pub fn smart(&mut self, smart: bool) -> &mut Self { + self.smart = smart; + self + } + + pub fn partitions(&mut self, partitions: bool) -> &mut Self { + self.partitions = partitions; + self + } + + pub fn query(&self) -> Result, Error> { + get_disks(None, !self.smart, self.partitions) + } + + pub fn find(&self, disk: &str) -> Result { + let mut map = get_disks(Some(vec![disk.to_string()]), !self.smart, self.partitions)?; + if let Some(info) = map.remove(disk) { + Ok(info) + } else { + bail!("failed to get disk usage info - internal error"); // should not happen + } + } + + pub fn find_all(&self, disks: Vec) -> Result, Error> { + get_disks(Some(disks), !self.smart, self.partitions) + } +} + +fn get_partitions_info( + partitions: HashMap, + lvm_devices: &HashSet, + zfs_devices: &HashSet, + file_system_devices: &HashSet, + lsblk_infos: &[LsblkInfo], +) -> Vec { + partitions + .values() + .map(|disk| { + let devpath = disk + .device_path() + .map(|p| p.to_owned()) + .map(|p| p.to_string_lossy().to_string()); + + let mut used = PartitionUsageType::Unused; + + if let Ok(devnum) = disk.devnum() { + if lvm_devices.contains(&devnum) { + used = PartitionUsageType::LVM; + } else if zfs_devices.contains(&devnum) { + used = PartitionUsageType::ZFS; + } else if file_system_devices.contains(&devnum) { + used = PartitionUsageType::FileSystem; + } + } + + let mounted = disk.is_mounted().unwrap_or(false); + let mut filesystem = None; + let mut uuid = None; + if let Some(devpath) = devpath.as_ref() { + for info in lsblk_infos.iter().filter(|i| i.path.eq(devpath)) { + uuid = info.uuid.clone().filter(|uuid| UUID_REGEX.is_match(uuid)); + used = match info.partition_type.as_deref() { + Some("21686148-6449-6e6f-744e-656564454649") => PartitionUsageType::BIOS, + Some("c12a7328-f81f-11d2-ba4b-00a0c93ec93b") => PartitionUsageType::EFI, + Some("6a945a3b-1dd2-11b2-99a6-080020736631") => { + PartitionUsageType::ZfsReserved + } + _ => used, + }; + if used == PartitionUsageType::FileSystem { + filesystem.clone_from(&info.file_system_type); + } + } + } + + PartitionInfo { + name: disk.sysname().to_str().unwrap_or("?").to_string(), + devpath, + used, + mounted, + filesystem, + size: disk.size().ok(), + gpt: disk.has_gpt(), + uuid, + } + }) + .collect() +} + +/// Get disk usage information for multiple disks +fn get_disks( + // filter - list of device names (without leading /dev) + disks: Option>, + // do no include data from smartctl + no_smart: bool, + // include partitions + include_partitions: bool, +) -> Result, Error> { + let disk_manager = DiskManage::new(); + + let lsblk_info = get_lsblk_info()?; + + let zfs_devices = + zfs_devices(&lsblk_info, None).or_else(|err| -> Result, Error> { + eprintln!("error getting zfs devices: {err}"); + Ok(HashSet::new()) + })?; + + let lvm_devices = get_lvm_devices(&lsblk_info)?; + + let file_system_devices = get_file_system_devices(&lsblk_info)?; + + // fixme: ceph journals/volumes + + let mut result = HashMap::new(); + let mut device_paths = Vec::new(); + + for item in proxmox_sys::fs::scan_subdir(libc::AT_FDCWD, "/sys/block", &BLOCKDEVICE_NAME_REGEX)? + { + let item = item?; + + let name = item.file_name().to_str().unwrap().to_string(); + + if let Some(ref disks) = disks { + if !disks.contains(&name) { + continue; + } + } + + let sys_path = format!("/sys/block/{name}"); + + if let Ok(target) = std::fs::read_link(&sys_path) { + if let Some(target) = target.to_str() { + if ISCSI_PATH_REGEX.is_match(target) { + continue; + } // skip iSCSI devices + } + } + + let disk = disk_manager.clone().disk_by_sys_path(&sys_path)?; + + let devnum = disk.devnum()?; + + let size = match disk.size() { + Ok(size) => size, + Err(_) => continue, // skip devices with unreadable size + }; + + let disk_type = match disk.guess_disk_type() { + Ok(disk_type) => disk_type, + Err(_) => continue, // skip devices with undetectable type + }; + + let mut usage = DiskUsageType::Unused; + + if lvm_devices.contains(&devnum) { + usage = DiskUsageType::LVM; + } + + match disk.is_mounted() { + Ok(true) => usage = DiskUsageType::Mounted, + Ok(false) => {} + Err(_) => continue, // skip devices with undetectable mount status + } + + if zfs_devices.contains(&devnum) { + usage = DiskUsageType::ZFS; + } + + let vendor = disk + .vendor() + .unwrap_or(None) + .map(|s| s.to_string_lossy().trim().to_string()); + + let model = disk.model().map(|s| s.to_string_lossy().into_owned()); + + let serial = disk.serial().map(|s| s.to_string_lossy().into_owned()); + + let devpath = disk + .device_path() + .map(|p| p.to_owned()) + .map(|p| p.to_string_lossy().to_string()); + + device_paths.push((name.clone(), devpath.clone())); + + let wwn = disk.wwn().map(|s| s.to_string_lossy().into_owned()); + + let partitions: Option> = if include_partitions { + disk.partitions().map_or(None, |parts| { + Some(get_partitions_info( + parts, + &lvm_devices, + &zfs_devices, + &file_system_devices, + &lsblk_info, + )) + }) + } else { + None + }; + + if usage != DiskUsageType::Mounted { + match scan_partitions(disk_manager.clone(), &lvm_devices, &zfs_devices, &name) { + Ok(part_usage) => { + if part_usage != DiskUsageType::Unused { + usage = part_usage; + } + } + Err(_) => continue, // skip devices if scan_partitions fail + }; + } + + if usage == DiskUsageType::Unused && file_system_devices.contains(&devnum) { + usage = DiskUsageType::FileSystem; + } + + if usage == DiskUsageType::Unused && disk.has_holders()? { + usage = DiskUsageType::DeviceMapper; + } + + let info = DiskUsageInfo { + name: name.clone(), + vendor, + model, + partitions, + serial, + devpath, + size, + wwn, + disk_type, + status: SmartStatus::Unknown, + wearout: None, + used: usage, + gpt: disk.has_gpt(), + rpm: disk.ata_rotation_rate_rpm(), + }; + + result.insert(name, info); + } + + if !no_smart { + let (tx, rx) = crossbeam_channel::bounded(result.len()); + + let parallel_handler = + ParallelHandler::new("smartctl data", 4, move |device: (String, String)| { + match get_smart_data(Path::new(&device.1), false) { + Ok(smart_data) => tx.send((device.0, smart_data))?, + // do not fail the whole disk output just because smartctl couldn't query one + Err(err) => { + proxmox_log::error!("failed to gather smart data for {} – {err}", device.1) + } + } + Ok(()) + }); + + for (name, path) in device_paths.into_iter() { + if let Some(p) = path { + parallel_handler.send((name, p))?; + } + } + + parallel_handler.complete()?; + while let Ok(msg) = rx.recv() { + if let Some(value) = result.get_mut(&msg.0) { + value.wearout = msg.1.wearout; + value.status = msg.1.status; + } + } + } + Ok(result) +} + +/// Try to reload the partition table +pub fn reread_partition_table(disk: &Disk) -> Result<(), Error> { + let disk_path = match disk.device_path() { + Some(path) => path, + None => bail!("disk {:?} has no node in /dev", disk.syspath()), + }; + + let mut command = std::process::Command::new("blockdev"); + command.arg("--rereadpt"); + command.arg(disk_path); + + proxmox_sys::command::run_command(command, None)?; + + Ok(()) +} + +/// Initialize disk by writing a GPT partition table +pub fn inititialize_gpt_disk(disk: &Disk, uuid: Option<&str>) -> Result<(), Error> { + let disk_path = match disk.device_path() { + Some(path) => path, + None => bail!("disk {:?} has no node in /dev", disk.syspath()), + }; + + let uuid = uuid.unwrap_or("R"); // R .. random disk GUID + + let mut command = std::process::Command::new("sgdisk"); + command.arg(disk_path); + command.args(["-U", uuid]); + + proxmox_sys::command::run_command(command, None)?; + + Ok(()) +} + +/// Wipes all labels, the first 200 MiB, and the last 4096 bytes of a disk/partition. +/// If called with a partition, also sets the partition type to 0x83 'Linux filesystem'. +pub fn wipe_blockdev(disk: &Disk) -> Result<(), Error> { + let disk_path = match disk.device_path() { + Some(path) => path, + None => bail!("disk {:?} has no node in /dev", disk.syspath()), + }; + + let is_partition = disk.is_partition(); + + let mut to_wipe: Vec = Vec::new(); + + let partitions_map = disk.partitions()?; + for part_disk in partitions_map.values() { + let part_path = match part_disk.device_path() { + Some(path) => path, + None => bail!("disk {:?} has no node in /dev", part_disk.syspath()), + }; + to_wipe.push(part_path.to_path_buf()); + } + + to_wipe.push(disk_path.to_path_buf()); + + info!("Wiping block device {}", disk_path.display()); + + let mut wipefs_command = std::process::Command::new("wipefs"); + wipefs_command.arg("--all").args(&to_wipe); + + let wipefs_output = proxmox_sys::command::run_command(wipefs_command, None)?; + info!("wipefs output: {wipefs_output}"); + + zero_disk_start_and_end(disk)?; + + if is_partition { + // set the partition type to 0x83 'Linux filesystem' + change_parttype(disk, "8300")?; + } + + Ok(()) +} + +pub fn zero_disk_start_and_end(disk: &Disk) -> Result<(), Error> { + let disk_path = match disk.device_path() { + Some(path) => path, + None => bail!("disk {:?} has no node in /dev", disk.syspath()), + }; + + let disk_size = disk.size()?; + let file = std::fs::OpenOptions::new() + .write(true) + .custom_flags(libc::O_CLOEXEC | libc::O_DSYNC) + .open(disk_path) + .with_context(|| "failed to open device {disk_path:?} for writing")?; + let write_size = disk_size.min(200 * 1024 * 1024); + let zeroes = proxmox_io::boxed::zeroed(write_size as usize); + file.write_all_at(&zeroes, 0) + .with_context(|| "failed to wipe start of device {disk_path:?}")?; + if disk_size > write_size { + file.write_all_at(&zeroes[0..4096], disk_size - 4096) + .with_context(|| "failed to wipe end of device {disk_path:?}")?; + } + Ok(()) +} + +pub fn change_parttype(part_disk: &Disk, part_type: &str) -> Result<(), Error> { + let part_path = match part_disk.device_path() { + Some(path) => path, + None => bail!("disk {:?} has no node in /dev", part_disk.syspath()), + }; + if let Ok(stat) = nix::sys::stat::stat(part_path) { + let mut sgdisk_command = std::process::Command::new("sgdisk"); + let major = unsafe { libc::major(stat.st_rdev) }; + let minor = unsafe { libc::minor(stat.st_rdev) }; + let partnum_path = &format!("/sys/dev/block/{major}:{minor}/partition"); + let partnum: u32 = std::fs::read_to_string(partnum_path)?.trim_end().parse()?; + sgdisk_command.arg(format!("-t{partnum}:{part_type}")); + let part_disk_parent = match part_disk.parent() { + Some(disk) => disk, + None => bail!("disk {:?} has no node in /dev", part_disk.syspath()), + }; + let part_disk_parent_path = match part_disk_parent.device_path() { + Some(path) => path, + None => bail!("disk {:?} has no node in /dev", part_disk.syspath()), + }; + sgdisk_command.arg(part_disk_parent_path); + let sgdisk_output = proxmox_sys::command::run_command(sgdisk_command, None)?; + info!("sgdisk output: {sgdisk_output}"); + } + Ok(()) +} + +/// Create a single linux partition using the whole available space +pub fn create_single_linux_partition(disk: &Disk) -> Result { + let disk_path = match disk.device_path() { + Some(path) => path, + None => bail!("disk {:?} has no node in /dev", disk.syspath()), + }; + + let mut command = std::process::Command::new("sgdisk"); + command.args(["-n1", "-t1:8300"]); + command.arg(disk_path); + + proxmox_sys::command::run_command(command, None)?; + + let mut partitions = disk.partitions()?; + + match partitions.remove(&1) { + Some(partition) => Ok(partition), + None => bail!("unable to lookup device partition"), + } +} + +#[api()] +#[derive(Debug, Copy, Clone, Serialize, Deserialize, Eq, PartialEq)] +#[serde(rename_all = "lowercase")] +/// A file system type supported by our tooling. +pub enum FileSystemType { + /// Linux Ext4 + Ext4, + /// XFS + Xfs, +} + +impl std::fmt::Display for FileSystemType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let text = match self { + FileSystemType::Ext4 => "ext4", + FileSystemType::Xfs => "xfs", + }; + write!(f, "{text}") + } +} + +impl std::str::FromStr for FileSystemType { + type Err = serde_json::Error; + + fn from_str(s: &str) -> Result { + use serde::de::IntoDeserializer; + Self::deserialize(s.into_deserializer()) + } +} + +/// Create a file system on a disk or disk partition +pub fn create_file_system(disk: &Disk, fs_type: FileSystemType) -> Result<(), Error> { + let disk_path = match disk.device_path() { + Some(path) => path, + None => bail!("disk {:?} has no node in /dev", disk.syspath()), + }; + + let fs_type = fs_type.to_string(); + + let mut command = std::process::Command::new("mkfs"); + command.args(["-t", &fs_type]); + command.arg(disk_path); + + proxmox_sys::command::run_command(command, None)?; + + Ok(()) +} +/// Block device name completion helper +pub fn complete_disk_name(_arg: &str, _param: &HashMap) -> Vec { + let dir = + match proxmox_sys::fs::scan_subdir(libc::AT_FDCWD, "/sys/block", &BLOCKDEVICE_NAME_REGEX) { + Ok(dir) => dir, + Err(_) => return vec![], + }; + + dir.flatten() + .map(|item| item.file_name().to_str().unwrap().to_string()) + .collect() +} + +/// Block device partition name completion helper +pub fn complete_partition_name(_arg: &str, _param: &HashMap) -> Vec { + let dir = match proxmox_sys::fs::scan_subdir( + libc::AT_FDCWD, + "/sys/class/block", + &BLOCKDEVICE_DISK_AND_PARTITION_NAME_REGEX, + ) { + Ok(dir) => dir, + Err(_) => return vec![], + }; + + dir.flatten() + .map(|item| item.file_name().to_str().unwrap().to_string()) + .collect() +} + +/// Read the FS UUID (parse blkid output) +/// +/// Note: Calling blkid is more reliable than using the udev ID_FS_UUID property. +pub fn get_fs_uuid(disk: &Disk) -> Result { + let disk_path = match disk.device_path() { + Some(path) => path, + None => bail!("disk {:?} has no node in /dev", disk.syspath()), + }; + + let mut command = std::process::Command::new("blkid"); + command.args(["-o", "export"]); + command.arg(disk_path); + + let output = proxmox_sys::command::run_command(command, None)?; + + for line in output.lines() { + if let Some(uuid) = line.strip_prefix("UUID=") { + return Ok(uuid.to_string()); + } + } + + bail!("get_fs_uuid failed - missing UUID"); +} + +/// Mount a disk by its UUID and the mount point. +pub fn mount_by_uuid(uuid: &str, mount_point: &Path) -> Result<(), Error> { + let mut command = std::process::Command::new("mount"); + command.arg(format!("UUID={uuid}")); + command.arg(mount_point); + + proxmox_sys::command::run_command(command, None)?; + Ok(()) +} + +/// Create bind mount. +pub fn bind_mount(path: &Path, target: &Path) -> Result<(), Error> { + let mut command = std::process::Command::new("mount"); + command.arg("--bind"); + command.arg(path); + command.arg(target); + + proxmox_sys::command::run_command(command, None)?; + Ok(()) +} + +/// Unmount a disk by its mount point. +pub fn unmount_by_mountpoint(path: &Path) -> Result<(), Error> { + let mut command = std::process::Command::new("umount"); + command.arg(path); + + proxmox_sys::command::run_command(command, None)?; + Ok(()) +} diff --git a/proxmox-disks/src/lvm.rs b/proxmox-disks/src/lvm.rs new file mode 100644 index 00000000..1456a21c --- /dev/null +++ b/proxmox-disks/src/lvm.rs @@ -0,0 +1,60 @@ +use std::collections::HashSet; +use std::os::unix::fs::MetadataExt; +use std::sync::LazyLock; + +use anyhow::Error; +use serde_json::Value; + +use super::LsblkInfo; + +static LVM_UUIDS: LazyLock> = LazyLock::new(|| { + let mut set = HashSet::new(); + set.insert("e6d6d379-f507-44c2-a23c-238f2a3df928"); + set +}); + +/// Get set of devices used by LVM (pvs). +/// +/// The set is indexed by using the unix raw device number (dev_t is u64) +pub fn get_lvm_devices(lsblk_info: &[LsblkInfo]) -> Result, Error> { + const PVS_BIN_PATH: &str = "pvs"; + + let mut command = std::process::Command::new(PVS_BIN_PATH); + command.args([ + "--reportformat", + "json", + "--noheadings", + "--readonly", + "-o", + "pv_name", + ]); + + let output = proxmox_sys::command::run_command(command, None)?; + + let mut device_set: HashSet = HashSet::new(); + + for info in lsblk_info.iter() { + if let Some(partition_type) = &info.partition_type { + if LVM_UUIDS.contains(partition_type.as_str()) { + let meta = std::fs::metadata(&info.path)?; + device_set.insert(meta.rdev()); + } + } + } + + let output: Value = output.parse()?; + + match output["report"][0]["pv"].as_array() { + Some(list) => { + for info in list { + if let Some(pv_name) = info["pv_name"].as_str() { + let meta = std::fs::metadata(pv_name)?; + device_set.insert(meta.rdev()); + } + } + } + None => return Ok(device_set), + } + + Ok(device_set) +} diff --git a/proxmox-disks/src/parse_helpers.rs b/proxmox-disks/src/parse_helpers.rs new file mode 100644 index 00000000..563866d6 --- /dev/null +++ b/proxmox-disks/src/parse_helpers.rs @@ -0,0 +1,52 @@ +use anyhow::{bail, Error}; + +use nom::{ + bytes::complete::take_while1, + combinator::all_consuming, + error::{ContextError, VerboseError}, +}; + +pub(crate) type IResult> = Result<(I, O), nom::Err>; + +fn verbose_err<'a>(i: &'a str, ctx: &'static str) -> VerboseError<&'a str> { + VerboseError::add_context(i, ctx, VerboseError { errors: vec![] }) +} + +pub(crate) fn parse_error<'a>( + i: &'a str, + context: &'static str, +) -> nom::Err> { + nom::Err::Error(verbose_err(i, context)) +} + +pub(crate) fn parse_failure<'a>( + i: &'a str, + context: &'static str, +) -> nom::Err> { + nom::Err::Error(verbose_err(i, context)) +} + +/// Recognizes one or more non-whitespace characters +pub(crate) fn notspace1(i: &str) -> IResult<&str, &str> { + take_while1(|c| !(c == ' ' || c == '\t' || c == '\n'))(i) +} + +/// Parse complete input, generate verbose error message with line numbers +pub(crate) fn parse_complete<'a, F, O>(what: &str, i: &'a str, parser: F) -> Result +where + F: FnMut(&'a str) -> IResult<&'a str, O>, +{ + match all_consuming(parser)(i) { + Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { + bail!( + "unable to parse {} - {}", + what, + nom::error::convert_error(i, err) + ); + } + Err(err) => { + bail!("unable to parse {} - {}", what, err); + } + Ok((_, data)) => Ok(data), + } +} diff --git a/proxmox-disks/src/smart.rs b/proxmox-disks/src/smart.rs new file mode 100644 index 00000000..1d41cee2 --- /dev/null +++ b/proxmox-disks/src/smart.rs @@ -0,0 +1,227 @@ +use std::sync::LazyLock; +use std::{ + collections::{HashMap, HashSet}, + path::Path, +}; + +use ::serde::{Deserialize, Serialize}; +use anyhow::Error; + +use proxmox_schema::api; + +#[api()] +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +/// SMART status +pub enum SmartStatus { + /// Smart tests passed - everything is OK + Passed, + /// Smart tests failed - disk has problems + Failed, + /// Unknown status + Unknown, +} + +#[api()] +#[derive(Debug, Serialize, Deserialize)] +/// SMART Attribute +pub struct SmartAttribute { + /// Attribute name + name: String, + // FIXME: remove value with next major release (PBS 3.0) + /// duplicate of raw - kept for API stability + value: String, + /// Attribute raw value + raw: String, + // the rest of the values is available for ATA type + /// ATA Attribute ID + #[serde(skip_serializing_if = "Option::is_none")] + id: Option, + /// ATA Flags + #[serde(skip_serializing_if = "Option::is_none")] + flags: Option, + /// ATA normalized value (0..100) + #[serde(skip_serializing_if = "Option::is_none")] + normalized: Option, + /// ATA worst + #[serde(skip_serializing_if = "Option::is_none")] + worst: Option, + /// ATA threshold + #[serde(skip_serializing_if = "Option::is_none")] + threshold: Option, +} + +#[api( + properties: { + status: { + type: SmartStatus, + }, + wearout: { + description: "Wearout level.", + type: f64, + optional: true, + }, + attributes: { + description: "SMART attributes.", + type: Array, + items: { + type: SmartAttribute, + }, + }, + }, +)] +#[derive(Debug, Serialize, Deserialize)] +/// Data from smartctl +pub struct SmartData { + pub status: SmartStatus, + pub wearout: Option, + pub attributes: Vec, +} + +/// Read smartctl data for a disk (/dev/XXX). +pub fn get_smart_data(disk_path: &Path, health_only: bool) -> Result { + const SMARTCTL_BIN_PATH: &str = "smartctl"; + + let mut command = std::process::Command::new(SMARTCTL_BIN_PATH); + command.arg("-H"); + if !health_only { + command.args(["-A", "-j"]); + } + + command.arg(disk_path); + + let output = proxmox_sys::command::run_command( + command, + Some( + |exitcode| (exitcode & 0b0011) == 0, // only bits 0-1 are fatal errors + ), + )?; + + let output: serde_json::Value = output.parse()?; + + let mut wearout = None; + + let mut attributes = Vec::new(); + let mut wearout_candidates = HashMap::new(); + + // ATA devices + if let Some(list) = output["ata_smart_attributes"]["table"].as_array() { + for item in list { + let id = match item["id"].as_u64() { + Some(id) => id, + None => continue, // skip attributes without id + }; + + let name = match item["name"].as_str() { + Some(name) => name.to_string(), + None => continue, // skip attributes without name + }; + + let raw_value = match item["raw"]["string"].as_str() { + Some(value) => value.to_string(), + None => continue, // skip attributes without raw value + }; + + let flags = match item["flags"]["string"].as_str() { + Some(flags) => flags.to_string(), + None => continue, // skip attributes without flags + }; + + let normalized = match item["value"].as_f64() { + Some(v) => v, + None => continue, // skip attributes without normalize value + }; + + let worst = match item["worst"].as_f64() { + Some(v) => v, + None => continue, // skip attributes without worst entry + }; + + let threshold = match item["thresh"].as_f64() { + Some(v) => v, + None => continue, // skip attributes without threshold entry + }; + + if WEAROUT_FIELD_NAMES.contains(&name as &str) { + wearout_candidates.insert(name.clone(), normalized); + } + + attributes.push(SmartAttribute { + name, + value: raw_value.clone(), + raw: raw_value, + id: Some(id), + flags: Some(flags), + normalized: Some(normalized), + worst: Some(worst), + threshold: Some(threshold), + }); + } + } + + if !wearout_candidates.is_empty() { + for field in WEAROUT_FIELD_ORDER { + if let Some(value) = wearout_candidates.get(field as &str) { + wearout = Some(*value); + break; + } + } + } + + // NVME devices + if let Some(list) = output["nvme_smart_health_information_log"].as_object() { + for (name, value) in list { + if name == "percentage_used" { + // extract wearout from nvme text, allow for decimal values + if let Some(v) = value.as_f64() { + if v <= 100.0 { + wearout = Some(100.0 - v); + } + } + } + if let Some(value) = value.as_f64() { + attributes.push(SmartAttribute { + name: name.to_string(), + value: value.to_string(), + raw: value.to_string(), + id: None, + flags: None, + normalized: None, + worst: None, + threshold: None, + }); + } + } + } + + let status = match output["smart_status"]["passed"].as_bool() { + None => SmartStatus::Unknown, + Some(true) => SmartStatus::Passed, + Some(false) => SmartStatus::Failed, + }; + + Ok(SmartData { + status, + wearout, + attributes, + }) +} + +static WEAROUT_FIELD_ORDER: &[&str] = &[ + "Media_Wearout_Indicator", + "SSD_Life_Left", + "Wear_Leveling_Count", + "Perc_Write/Erase_Ct_BC", + "Perc_Rated_Life_Remain", + "Remaining_Lifetime_Perc", + "Percent_Lifetime_Remain", + "Lifetime_Left", + "PCT_Life_Remaining", + "Lifetime_Remaining", + "Percent_Life_Remaining", + "Percent_Lifetime_Used", + "Perc_Rated_Life_Used", +]; + +static WEAROUT_FIELD_NAMES: LazyLock> = + LazyLock::new(|| WEAROUT_FIELD_ORDER.iter().cloned().collect()); diff --git a/proxmox-disks/src/zfs.rs b/proxmox-disks/src/zfs.rs new file mode 100644 index 00000000..0babb887 --- /dev/null +++ b/proxmox-disks/src/zfs.rs @@ -0,0 +1,205 @@ +use std::collections::HashSet; +use std::os::unix::fs::MetadataExt; +use std::path::PathBuf; +use std::sync::{LazyLock, Mutex}; + +use anyhow::{bail, Error}; + +use proxmox_schema::const_regex; + +use super::*; + +static ZFS_UUIDS: LazyLock> = LazyLock::new(|| { + let mut set = HashSet::new(); + set.insert("6a898cc3-1dd2-11b2-99a6-080020736631"); // apple + set.insert("516e7cba-6ecf-11d6-8ff8-00022d09712b"); // bsd + set +}); + +fn get_pool_from_dataset(dataset: &str) -> &str { + if let Some(idx) = dataset.find('/') { + dataset[0..idx].as_ref() + } else { + dataset + } +} + +/// Returns kernel IO-stats for zfs pools +pub fn zfs_pool_stats(pool: &OsStr) -> Result, Error> { + let mut path = PathBuf::from("/proc/spl/kstat/zfs"); + path.push(pool); + path.push("io"); + + let text = match proxmox_sys::fs::file_read_optional_string(&path)? { + Some(text) => text, + None => { + return Ok(None); + } + }; + + let lines: Vec<&str> = text.lines().collect(); + + if lines.len() < 3 { + bail!("unable to parse {:?} - got less than 3 lines", path); + } + + // https://github.com/openzfs/zfs/blob/master/lib/libspl/include/sys/kstat.h#L578 + // nread nwritten reads writes wtime wlentime wupdate rtime rlentime rupdate wcnt rcnt + // Note: w -> wait (wtime -> wait time) + // Note: r -> run (rtime -> run time) + // All times are nanoseconds + let stat: Vec = lines[2] + .split_ascii_whitespace() + .map(|s| s.parse().unwrap_or_default()) + .collect(); + + let ticks = (stat[4] + stat[7]) / 1_000_000; // convert to milisec + + let stat = BlockDevStat { + read_sectors: stat[0] >> 9, + write_sectors: stat[1] >> 9, + read_ios: stat[2], + write_ios: stat[3], + io_ticks: ticks, + }; + + Ok(Some(stat)) +} + +/// Get set of devices used by zfs (or a specific zfs pool) +/// +/// The set is indexed by using the unix raw device number (dev_t is u64) +pub fn zfs_devices(lsblk_info: &[LsblkInfo], pool: Option) -> Result, Error> { + let list = zpool_list(pool.as_ref(), true)?; + + let mut device_set = HashSet::new(); + for entry in list { + for device in entry.devices { + let meta = std::fs::metadata(device)?; + device_set.insert(meta.rdev()); + } + } + if pool.is_none() { + for info in lsblk_info.iter() { + if let Some(partition_type) = &info.partition_type { + if ZFS_UUIDS.contains(partition_type.as_str()) { + let meta = std::fs::metadata(&info.path)?; + device_set.insert(meta.rdev()); + } + } + } + } + + Ok(device_set) +} + +const ZFS_KSTAT_BASE_PATH: &str = "/proc/spl/kstat/zfs"; +const_regex! { + OBJSET_REGEX = r"^objset-0x[a-fA-F0-9]+$"; +} + +static ZFS_DATASET_OBJSET_MAP: LazyLock>> = + LazyLock::new(|| Mutex::new(HashMap::new())); + +// parses /proc/spl/kstat/zfs/POOL/objset-ID files +// they have the following format: +// +// 0 0 0x00 0 0000 00000000000 000000000000000000 +// name type data +// dataset_name 7 pool/dataset +// writes 4 0 +// nwritten 4 0 +// reads 4 0 +// nread 4 0 +// nunlinks 4 0 +// nunlinked 4 0 +// +// we are only interested in the dataset_name, writes, nwrites, reads and nread +fn parse_objset_stat(pool: &str, objset_id: &str) -> Result<(String, BlockDevStat), Error> { + let path = PathBuf::from(format!("{ZFS_KSTAT_BASE_PATH}/{pool}/{objset_id}")); + + let text = match proxmox_sys::fs::file_read_optional_string(path)? { + Some(text) => text, + None => bail!("could not parse '{}' stat file", objset_id), + }; + + let mut dataset_name = String::new(); + let mut stat = BlockDevStat { + read_sectors: 0, + write_sectors: 0, + read_ios: 0, + write_ios: 0, + io_ticks: 0, + }; + + for (i, line) in text.lines().enumerate() { + if i < 2 { + continue; + } + + let mut parts = line.split_ascii_whitespace(); + let name = parts.next(); + parts.next(); // discard type + let value = parts.next().ok_or_else(|| format_err!("no value found"))?; + match name { + Some("dataset_name") => dataset_name = value.to_string(), + Some("writes") => stat.write_ios = value.parse().unwrap_or_default(), + Some("nwritten") => stat.write_sectors = value.parse::().unwrap_or_default() / 512, + Some("reads") => stat.read_ios = value.parse().unwrap_or_default(), + Some("nread") => stat.read_sectors = value.parse::().unwrap_or_default() / 512, + _ => {} + } + } + + Ok((dataset_name, stat)) +} + +fn get_mapping(dataset: &str) -> Option<(String, String)> { + ZFS_DATASET_OBJSET_MAP + .lock() + .unwrap() + .get(dataset) + .map(|c| c.to_owned()) +} + +/// Updates the dataset <-> objset_map +pub(crate) fn update_zfs_objset_map(pool: &str) -> Result<(), Error> { + let mut map = ZFS_DATASET_OBJSET_MAP.lock().unwrap(); + map.clear(); + let path = PathBuf::from(format!("{ZFS_KSTAT_BASE_PATH}/{pool}")); + + proxmox_sys::fs::scandir( + libc::AT_FDCWD, + &path, + &OBJSET_REGEX, + |_l2_fd, filename, _type| { + let (name, _) = parse_objset_stat(pool, filename)?; + map.insert(name, (pool.to_string(), filename.to_string())); + Ok(()) + }, + )?; + + Ok(()) +} + +/// Gets io stats for the dataset from /proc/spl/kstat/zfs/POOL/objset-ID +pub fn zfs_dataset_stats(dataset: &str) -> Result { + let mut mapping = get_mapping(dataset); + if mapping.is_none() { + let pool = get_pool_from_dataset(dataset); + update_zfs_objset_map(pool)?; + mapping = get_mapping(dataset); + } + let (pool, objset_id) = + mapping.ok_or_else(|| format_err!("could not find objset id for dataset"))?; + + match parse_objset_stat(&pool, &objset_id) { + Ok((_, stat)) => Ok(stat), + Err(err) => { + // on error remove dataset from map, it probably vanished or the + // mapping was incorrect + ZFS_DATASET_OBJSET_MAP.lock().unwrap().remove(dataset); + Err(err) + } + } +} diff --git a/proxmox-disks/src/zpool_list.rs b/proxmox-disks/src/zpool_list.rs new file mode 100644 index 00000000..4083629f --- /dev/null +++ b/proxmox-disks/src/zpool_list.rs @@ -0,0 +1,294 @@ +use anyhow::{bail, Error}; + +use crate::parse_helpers::{notspace1, IResult}; + +use nom::{ + bytes::complete::{take_till, take_till1, take_while1}, + character::complete::{char, digit1, line_ending, space0, space1}, + combinator::{all_consuming, map_res, opt, recognize}, + multi::many0, + sequence::{preceded, tuple}, +}; + +#[derive(Debug, PartialEq)] +pub struct ZFSPoolUsage { + pub size: u64, + pub alloc: u64, + pub free: u64, + pub dedup: f64, + pub frag: u64, +} + +#[derive(Debug, PartialEq)] +pub struct ZFSPoolInfo { + pub name: String, + pub health: String, + pub usage: Option, + pub devices: Vec, +} + +fn parse_optional_u64(i: &str) -> IResult<&str, Option> { + if let Some(rest) = i.strip_prefix('-') { + Ok((rest, None)) + } else { + let (i, value) = map_res(recognize(digit1), str::parse)(i)?; + Ok((i, Some(value))) + } +} + +fn parse_optional_f64(i: &str) -> IResult<&str, Option> { + if let Some(rest) = i.strip_prefix('-') { + Ok((rest, None)) + } else { + let (i, value) = nom::number::complete::double(i)?; + Ok((i, Some(value))) + } +} + +fn parse_pool_device(i: &str) -> IResult<&str, String> { + let (i, (device, _, _rest)) = tuple(( + preceded(space1, take_till1(|c| c == ' ' || c == '\t')), + space1, + preceded(take_till(|c| c == '\n'), char('\n')), + ))(i)?; + + Ok((i, device.to_string())) +} + +fn parse_zpool_list_header(i: &str) -> IResult<&str, ZFSPoolInfo> { + // name, size, allocated, free, checkpoint, expandsize, fragmentation, capacity, dedupratio, health, altroot. + + let (i, (text, size, alloc, free, _, _, frag, _, dedup, health, _altroot, _eol)) = tuple(( + take_while1(|c| char::is_alphanumeric(c) || c == '-' || c == ':' || c == '_' || c == '.'), // name + preceded(space1, parse_optional_u64), // size + preceded(space1, parse_optional_u64), // allocated + preceded(space1, parse_optional_u64), // free + preceded(space1, notspace1), // checkpoint + preceded(space1, notspace1), // expandsize + preceded(space1, parse_optional_u64), // fragmentation + preceded(space1, notspace1), // capacity + preceded(space1, parse_optional_f64), // dedup + preceded(space1, notspace1), // health + opt(preceded(space1, notspace1)), // optional altroot + line_ending, + ))(i)?; + + let status = if let (Some(size), Some(alloc), Some(free), Some(frag), Some(dedup)) = + (size, alloc, free, frag, dedup) + { + ZFSPoolInfo { + name: text.into(), + health: health.into(), + usage: Some(ZFSPoolUsage { + size, + alloc, + free, + frag, + dedup, + }), + devices: Vec::new(), + } + } else { + ZFSPoolInfo { + name: text.into(), + health: health.into(), + usage: None, + devices: Vec::new(), + } + }; + + Ok((i, status)) +} + +fn parse_zpool_list_item(i: &str) -> IResult<&str, ZFSPoolInfo> { + let (i, mut stat) = parse_zpool_list_header(i)?; + let (i, devices) = many0(parse_pool_device)(i)?; + + for device_path in devices.into_iter().filter(|n| n.starts_with("/dev/")) { + stat.devices.push(device_path); + } + + let (i, _) = many0(tuple((space0, char('\n'))))(i)?; // skip empty lines + + Ok((i, stat)) +} + +/// Parse zpool list output +/// +/// Note: This does not reveal any details on how the pool uses the devices, because +/// the zpool list output format is not really defined... +fn parse_zpool_list(i: &str) -> Result, Error> { + match all_consuming(many0(parse_zpool_list_item))(i) { + Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { + bail!( + "unable to parse zfs list output - {}", + nom::error::convert_error(i, err) + ); + } + Err(err) => { + bail!("unable to parse zfs list output - {}", err); + } + Ok((_, ce)) => Ok(ce), + } +} + +/// Run zpool list and return parsed output +/// +/// Devices are only included when run with verbose flags +/// set. Without, device lists are empty. +pub fn zpool_list(pool: Option<&String>, verbose: bool) -> Result, Error> { + // Note: zpools list verbose output can include entries for 'special', 'cache' and 'logs' + // and maybe other things. + + let mut command = std::process::Command::new("zpool"); + command.args(["list", "-H", "-p", "-P"]); + + // Note: We do not use -o to define output properties, because zpool command ignores + // that completely for special vdevs and devices + + if verbose { + command.arg("-v"); + } + + if let Some(pool) = pool { + command.arg(pool); + } + + let output = proxmox_sys::command::run_command(command, None)?; + + parse_zpool_list(&output) +} + +#[test] +fn test_zfs_parse_list() -> Result<(), Error> { + let output = ""; + + let data = parse_zpool_list(output)?; + let expect = Vec::new(); + + assert_eq!(data, expect); + + let output = "btest 427349245952 405504 427348840448 - - 0 0 1.00 ONLINE -\n"; + let data = parse_zpool_list(output)?; + let expect = vec![ZFSPoolInfo { + name: "btest".to_string(), + health: "ONLINE".to_string(), + devices: Vec::new(), + usage: Some(ZFSPoolUsage { + size: 427349245952, + alloc: 405504, + free: 427348840448, + dedup: 1.0, + frag: 0, + }), + }]; + + assert_eq!(data, expect); + + let output = "\ +rpool 535260299264 402852388864 132407910400 - - 22 75 1.00 ONLINE - + /dev/disk/by-id/ata-Crucial_CT500MX200SSD1_154210EB4078-part3 498216206336 392175546368 106040659968 - - 22 78 - ONLINE +special - - - - - - - - - + /dev/sda2 37044092928 10676842496 26367250432 - - 63 28 - ONLINE +logs - - - - - - - - - + /dev/sda3 4831838208 1445888 4830392320 - - 0 0 - ONLINE + +"; + + let data = parse_zpool_list(output)?; + let expect = vec![ + ZFSPoolInfo { + name: String::from("rpool"), + health: String::from("ONLINE"), + devices: vec![String::from( + "/dev/disk/by-id/ata-Crucial_CT500MX200SSD1_154210EB4078-part3", + )], + usage: Some(ZFSPoolUsage { + size: 535260299264, + alloc: 402852388864, + free: 132407910400, + dedup: 1.0, + frag: 22, + }), + }, + ZFSPoolInfo { + name: String::from("special"), + health: String::from("-"), + devices: vec![String::from("/dev/sda2")], + usage: None, + }, + ZFSPoolInfo { + name: String::from("logs"), + health: String::from("-"), + devices: vec![String::from("/dev/sda3")], + usage: None, + }, + ]; + + assert_eq!(data, expect); + + let output = "\ +b-test 427349245952 761856 427348484096 - - 0 0 1.00 ONLINE - + mirror 213674622976 438272 213674184704 - - 0 0 - ONLINE + /dev/sda1 - - - - - - - - ONLINE + /dev/sda2 - - - - - - - - ONLINE + mirror 213674622976 323584 213674299392 - - 0 0 - ONLINE + /dev/sda3 - - - - - - - - ONLINE + /dev/sda4 - - - - - - - - ONLINE +logs - - - - - - - - - + /dev/sda5 213674622976 0 213674622976 - - 0 0 - ONLINE +"; + + let data = parse_zpool_list(output)?; + let expect = vec![ + ZFSPoolInfo { + name: String::from("b-test"), + health: String::from("ONLINE"), + usage: Some(ZFSPoolUsage { + size: 427349245952, + alloc: 761856, + free: 427348484096, + dedup: 1.0, + frag: 0, + }), + devices: vec![ + String::from("/dev/sda1"), + String::from("/dev/sda2"), + String::from("/dev/sda3"), + String::from("/dev/sda4"), + ], + }, + ZFSPoolInfo { + name: String::from("logs"), + health: String::from("-"), + usage: None, + devices: vec![String::from("/dev/sda5")], + }, + ]; + + assert_eq!(data, expect); + + let output = "\ +b.test 427349245952 761856 427348484096 - - 0 0 1.00 ONLINE - + mirror 213674622976 438272 213674184704 - - 0 0 - ONLINE + /dev/sda1 - - - - - - - - ONLINE +"; + + let data = parse_zpool_list(output)?; + let expect = vec![ZFSPoolInfo { + name: String::from("b.test"), + health: String::from("ONLINE"), + usage: Some(ZFSPoolUsage { + size: 427349245952, + alloc: 761856, + free: 427348484096, + dedup: 1.0, + frag: 0, + }), + devices: vec![String::from("/dev/sda1")], + }]; + + assert_eq!(data, expect); + + Ok(()) +} diff --git a/proxmox-disks/src/zpool_status.rs b/proxmox-disks/src/zpool_status.rs new file mode 100644 index 00000000..674dbe63 --- /dev/null +++ b/proxmox-disks/src/zpool_status.rs @@ -0,0 +1,496 @@ +use std::mem::{replace, take}; + +use anyhow::{bail, Error}; +use serde::{Deserialize, Serialize}; +use serde_json::{Map, Value}; + +use crate::parse_helpers::{notspace1, parse_complete, parse_error, parse_failure, IResult}; + +use nom::{ + bytes::complete::{tag, take_while, take_while1}, + character::complete::{line_ending, space0, space1}, + combinator::opt, + error::VerboseError, + multi::{many0, many1}, + sequence::preceded, +}; + +#[derive(Debug, Serialize, Deserialize)] +pub struct ZFSPoolVDevState { + pub name: String, + pub lvl: u64, + #[serde(skip_serializing_if = "Option::is_none")] + pub state: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub read: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub write: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub cksum: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub msg: Option, +} + +fn expand_tab_length(input: &str) -> usize { + input.chars().map(|c| if c == '\t' { 8 } else { 1 }).sum() +} + +fn parse_zpool_status_vdev(i: &str) -> IResult<&str, ZFSPoolVDevState> { + let (n, indent) = space0(i)?; + + let indent_len = expand_tab_length(indent); + + if (indent_len & 1) != 0 { + return Err(parse_failure(n, "wrong indent length")); + } + let i = n; + + let indent_level = (indent_len as u64) / 2; + + let (i, vdev_name) = notspace1(i)?; + + if let Ok((n, _)) = preceded(space0::<&str, VerboseError<&str>>, line_ending)(i) { + // special device + let vdev = ZFSPoolVDevState { + name: vdev_name.to_string(), + lvl: indent_level, + state: None, + read: None, + write: None, + cksum: None, + msg: None, + }; + return Ok((n, vdev)); + } + + let (i, state) = preceded(space1, notspace1)(i)?; + if let Ok((n, _)) = preceded(space0::<&str, VerboseError<&str>>, line_ending)(i) { + // spares + let vdev = ZFSPoolVDevState { + name: vdev_name.to_string(), + lvl: indent_level, + state: Some(state.to_string()), + read: None, + write: None, + cksum: None, + msg: None, + }; + return Ok((n, vdev)); + } + + let (i, read) = preceded(space1, nom::character::complete::u64)(i)?; + let (i, write) = preceded(space1, nom::character::complete::u64)(i)?; + let (i, cksum) = preceded(space1, nom::character::complete::u64)(i)?; + let (i, msg) = opt(preceded(space1, take_while(|c| c != '\n')))(i)?; + let (i, _) = line_ending(i)?; + + let vdev = ZFSPoolVDevState { + name: vdev_name.to_string(), + lvl: indent_level, + state: Some(state.to_string()), + read: Some(read), + write: Some(write), + cksum: Some(cksum), + msg: msg.map(String::from), + }; + + Ok((i, vdev)) +} + +fn parse_zpool_status_tree(i: &str) -> IResult<&str, Vec> { + // skip header + let (i, _) = tag("NAME")(i)?; + let (i, _) = space1(i)?; + let (i, _) = tag("STATE")(i)?; + let (i, _) = space1(i)?; + let (i, _) = tag("READ")(i)?; + let (i, _) = space1(i)?; + let (i, _) = tag("WRITE")(i)?; + let (i, _) = space1(i)?; + let (i, _) = tag("CKSUM")(i)?; + let (i, _) = line_ending(i)?; + + // parse vdev list + many1(parse_zpool_status_vdev)(i) +} + +fn space_indented_line(indent: usize) -> impl Fn(&str) -> IResult<&str, &str> { + move |i| { + let mut len = 0; + let mut n = i; + loop { + if n.starts_with('\t') { + len += 8; + } else if n.starts_with(' ') { + len += 1; + } else { + break; + } + n = &n[1..]; + if len >= indent { + break; + } + } + if len != indent { + return Err(parse_error(i, "not correctly indented")); + } + + take_while1(|c| c != '\n')(n) + } +} + +fn parse_zpool_status_field(i: &str) -> IResult<&str, (String, String)> { + let (i, prefix) = take_while1(|c| c != ':')(i)?; + let (i, _) = tag(":")(i)?; + let (i, mut value) = take_while(|c| c != '\n')(i)?; + if value.starts_with(' ') { + value = &value[1..]; + } + + let (mut i, _) = line_ending(i)?; + + let field = prefix.trim().to_string(); + + let prefix_len = expand_tab_length(prefix); + + let indent: usize = prefix_len + 2; + + let mut parse_continuation = opt(space_indented_line(indent)); + + let mut value = value.to_string(); + + if field == "config" { + let (n, _) = line_ending(i)?; + i = n; + } + + loop { + let (n, cont) = parse_continuation(i)?; + + if let Some(cont) = cont { + let (n, _) = line_ending(n)?; + i = n; + if !value.is_empty() { + value.push('\n'); + } + value.push_str(cont); + } else { + if field == "config" { + let (n, _) = line_ending(i)?; + value.push('\n'); + i = n; + } + break; + } + } + + Ok((i, (field, value))) +} + +pub fn parse_zpool_status_config_tree(i: &str) -> Result, Error> { + parse_complete("zfs status config tree", i, parse_zpool_status_tree) +} + +fn parse_zpool_status(input: &str) -> Result, Error> { + parse_complete("zfs status output", input, many0(parse_zpool_status_field)) +} + +pub fn vdev_list_to_tree(vdev_list: &[ZFSPoolVDevState]) -> Result { + indented_list_to_tree(vdev_list, |vdev| { + let node = serde_json::to_value(vdev).unwrap(); + (node, vdev.lvl) + }) +} + +fn indented_list_to_tree<'a, T, F, I>(items: I, to_node: F) -> Result +where + T: 'a, + I: IntoIterator, + F: Fn(&T) -> (Value, u64), +{ + struct StackItem { + node: Map, + level: u64, + children_of_parent: Vec, + } + + let mut stack = Vec::::new(); + // hold current node and the children of the current parent (as that's where we insert) + let mut cur = StackItem { + node: Map::::new(), + level: 0, + children_of_parent: Vec::new(), + }; + + for item in items { + let (node, node_level) = to_node(item); + let vdev_level = 1 + node_level; + let mut node = match node { + Value::Object(map) => map, + _ => bail!("to_node returned wrong type"), + }; + + node.insert("leaf".to_string(), Value::Bool(true)); + + // if required, go back up (possibly multiple levels): + while vdev_level < cur.level { + cur.children_of_parent.push(Value::Object(cur.node)); + let mut parent = stack.pop().unwrap(); + parent + .node + .insert("children".to_string(), Value::Array(cur.children_of_parent)); + parent.node.insert("leaf".to_string(), Value::Bool(false)); + cur = parent; + + if vdev_level > cur.level { + // when we encounter misimatching levels like "0, 2, 1" instead of "0, 1, 2, 1" + bail!("broken indentation between levels"); + } + } + + if vdev_level > cur.level { + // indented further, push our current state and start a new "map" + stack.push(StackItem { + node: replace(&mut cur.node, node), + level: replace(&mut cur.level, vdev_level), + children_of_parent: take(&mut cur.children_of_parent), + }); + } else { + // same indentation level, add to children of the previous level: + cur.children_of_parent + .push(Value::Object(replace(&mut cur.node, node))); + } + } + + while !stack.is_empty() { + cur.children_of_parent.push(Value::Object(cur.node)); + let mut parent = stack.pop().unwrap(); + parent + .node + .insert("children".to_string(), Value::Array(cur.children_of_parent)); + parent.node.insert("leaf".to_string(), Value::Bool(false)); + cur = parent; + } + + Ok(Value::Object(cur.node)) +} + +#[test] +fn test_vdev_list_to_tree() { + const DEFAULT: ZFSPoolVDevState = ZFSPoolVDevState { + name: String::new(), + lvl: 0, + state: None, + read: None, + write: None, + cksum: None, + msg: None, + }; + + #[rustfmt::skip] + let input = vec![ + //ZFSPoolVDevState { name: "root".to_string(), lvl: 0, ..DEFAULT }, + ZFSPoolVDevState { name: "vdev1".to_string(), lvl: 1, ..DEFAULT }, + ZFSPoolVDevState { name: "vdev1-disk1".to_string(), lvl: 2, ..DEFAULT }, + ZFSPoolVDevState { name: "vdev1-disk2".to_string(), lvl: 2, ..DEFAULT }, + ZFSPoolVDevState { name: "vdev2".to_string(), lvl: 1, ..DEFAULT }, + ZFSPoolVDevState { name: "vdev2-g1".to_string(), lvl: 2, ..DEFAULT }, + ZFSPoolVDevState { name: "vdev2-g1-d1".to_string(), lvl: 3, ..DEFAULT }, + ZFSPoolVDevState { name: "vdev2-g1-d2".to_string(), lvl: 3, ..DEFAULT }, + ZFSPoolVDevState { name: "vdev2-g2".to_string(), lvl: 2, ..DEFAULT }, + ZFSPoolVDevState { name: "vdev3".to_string(), lvl: 1, ..DEFAULT }, + ZFSPoolVDevState { name: "vdev4".to_string(), lvl: 1, ..DEFAULT }, + ZFSPoolVDevState { name: "vdev4-g1".to_string(), lvl: 2, ..DEFAULT }, + ZFSPoolVDevState { name: "vdev4-g1-d1".to_string(), lvl: 3, ..DEFAULT }, + ZFSPoolVDevState { name: "vdev4-g1-d1-x1".to_string(), lvl: 4, ..DEFAULT }, + ZFSPoolVDevState { name: "vdev4-g2".to_string(), lvl: 2, ..DEFAULT }, // up by 2 + ]; + + const EXPECTED: &str = "{\ + \"children\":[{\ + \"children\":[{\ + \"leaf\":true,\ + \"lvl\":2,\"name\":\"vdev1-disk1\"\ + },{\ + \"leaf\":true,\ + \"lvl\":2,\"name\":\"vdev1-disk2\"\ + }],\ + \"leaf\":false,\ + \"lvl\":1,\"name\":\"vdev1\"\ + },{\ + \"children\":[{\ + \"children\":[{\ + \"leaf\":true,\ + \"lvl\":3,\"name\":\"vdev2-g1-d1\"\ + },{\ + \"leaf\":true,\ + \"lvl\":3,\"name\":\"vdev2-g1-d2\"\ + }],\ + \"leaf\":false,\ + \"lvl\":2,\"name\":\"vdev2-g1\"\ + },{\ + \"leaf\":true,\ + \"lvl\":2,\"name\":\"vdev2-g2\"\ + }],\ + \"leaf\":false,\ + \"lvl\":1,\"name\":\"vdev2\"\ + },{\ + \"leaf\":true,\ + \"lvl\":1,\"name\":\"vdev3\"\ + },{\ + \"children\":[{\ + \"children\":[{\ + \"children\":[{\ + \"leaf\":true,\ + \"lvl\":4,\"name\":\"vdev4-g1-d1-x1\"\ + }],\ + \"leaf\":false,\ + \"lvl\":3,\"name\":\"vdev4-g1-d1\"\ + }],\ + \"leaf\":false,\ + \"lvl\":2,\"name\":\"vdev4-g1\"\ + },{\ + \"leaf\":true,\ + \"lvl\":2,\"name\":\"vdev4-g2\"\ + }],\ + \"leaf\":false,\ + \"lvl\":1,\"name\":\"vdev4\"\ + }],\ + \"leaf\":false\ + }"; + let expected: Value = + serde_json::from_str(EXPECTED).expect("failed to parse expected json value"); + + let tree = vdev_list_to_tree(&input).expect("failed to turn valid vdev list into a tree"); + assert_eq!(tree, expected); +} + +pub fn zpool_status(pool: &str) -> Result, Error> { + let mut command = std::process::Command::new("zpool"); + command.args(["status", "-p", "-P", pool]); + + let output = proxmox_sys::command::run_command(command, None)?; + + parse_zpool_status(&output) +} + +#[cfg(test)] +fn test_parse(output: &str) -> Result<(), Error> { + let mut found_config = false; + + for (k, v) in parse_zpool_status(output)? { + println!("<{k}> => '{v}'"); + if k == "config" { + let vdev_list = parse_zpool_status_config_tree(&v)?; + let _tree = vdev_list_to_tree(&vdev_list); + found_config = true; + } + } + if !found_config { + bail!("got zpool status without config key"); + } + + Ok(()) +} + +#[test] +fn test_zpool_status_parser() -> Result<(), Error> { + let output = r###" pool: tank + state: DEGRADED +status: One or more devices could not be opened. Sufficient replicas exist for + the pool to continue functioning in a degraded state. +action: Attach the missing device and online it using 'zpool online'. + see: http://www.sun.com/msg/ZFS-8000-2Q + scrub: none requested +config: + + NAME STATE READ WRITE CKSUM + tank DEGRADED 0 0 0 + mirror-0 DEGRADED 0 0 0 + c1t0d0 ONLINE 0 0 0 + c1t2d0 ONLINE 0 0 0 + c1t1d0 UNAVAIL 0 0 0 cannot open + mirror-1 DEGRADED 0 0 0 + tank1 DEGRADED 0 0 0 + tank2 DEGRADED 0 0 0 + +errors: No known data errors +"###; + + test_parse(output) +} + +#[test] +fn test_zpool_status_parser2() -> Result<(), Error> { + // Note: this input create TABS + let output = r###" pool: btest + state: ONLINE + scan: none requested +config: + + NAME STATE READ WRITE CKSUM + btest ONLINE 0 0 0 + mirror-0 ONLINE 0 0 0 + /dev/sda1 ONLINE 0 0 0 + /dev/sda2 ONLINE 0 0 0 + mirror-1 ONLINE 0 0 0 + /dev/sda3 ONLINE 0 0 0 + /dev/sda4 ONLINE 0 0 0 + logs + /dev/sda5 ONLINE 0 0 0 + +errors: No known data errors +"###; + test_parse(output) +} + +#[test] +fn test_zpool_status_parser3() -> Result<(), Error> { + let output = r###" pool: bt-est + state: ONLINE + scan: none requested +config: + + NAME STATE READ WRITE CKSUM + bt-est ONLINE 0 0 0 + mirror-0 ONLINE 0 0 0 + /dev/sda1 ONLINE 0 0 0 + /dev/sda2 ONLINE 0 0 0 + mirror-1 ONLINE 0 0 0 + /dev/sda3 ONLINE 0 0 0 + /dev/sda4 ONLINE 0 0 0 + logs + /dev/sda5 ONLINE 0 0 0 + +errors: No known data errors +"###; + + test_parse(output) +} + +#[test] +fn test_zpool_status_parser_spares() -> Result<(), Error> { + let output = r###" pool: tank + state: ONLINE + scan: none requested +config: + + NAME STATE READ WRITE CKSUM + tank ONLINE 0 0 0 + mirror-0 ONLINE 0 0 0 + /dev/sda1 ONLINE 0 0 0 + /dev/sda2 ONLINE 0 0 0 + mirror-1 ONLINE 0 0 0 + /dev/sda3 ONLINE 0 0 0 + /dev/sda4 ONLINE 0 0 0 + logs + /dev/sda5 ONLINE 0 0 0 + spares + /dev/sdb AVAIL + /dev/sdc AVAIL + +errors: No known data errors +"###; + + test_parse(output) +} -- 2.47.3