From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id D44451FF141 for ; Fri, 13 Feb 2026 10:47:44 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 8E37F33C26; Fri, 13 Feb 2026 10:47:19 +0100 (CET) From: Kefu Chai To: pve-devel@lists.proxmox.com Subject: [PATCH pve-cluster 12/14 v2] pmxcfs-rs: add pmxcfs main daemon binary Date: Fri, 13 Feb 2026 17:33:49 +0800 Message-ID: <20260213094119.2379288-13-k.chai@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260213094119.2379288-1-k.chai@proxmox.com> References: <20260213094119.2379288-1-k.chai@proxmox.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1770975806781 X-SPAM-LEVEL: Spam detection results: 0 AWL -1.321 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLACK 3 Contains an URL listed in the URIBL blacklist [types.rs] URIBL_CSS_A 0.1 Contains URL's A record listed in the Spamhaus CSS blocklist [185.73.182.252] X-MailFrom: k.chai@proxmox.com X-Mailman-Rule-Hits: max-size X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; news-moderation; no-subject; digests; suspicious-header Message-ID-Hash: FAAXYU5AHHSEYF276FGD363SJXP672L6 X-Message-ID-Hash: FAAXYU5AHHSEYF276FGD363SJXP672L6 X-Mailman-Approved-At: Fri, 13 Feb 2026 10:47:06 +0100 X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox VE development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Main daemon binary integrating all crates with: - FUSE filesystem with broadcast-first architecture - 8 plugin system files (clusterlog, versions, nodeip, etc.) - Configuration initialization using Config::shared() - Status initialization with status::init_with_config_and_rrd() - Enhanced corosync config import logic - 5 FUSE integration tests (basic, cluster, integration, locks, symlink) Signed-off-by: Kefu Chai --- src/pmxcfs-rs/Cargo.toml | 11 +- src/pmxcfs-rs/pmxcfs/Cargo.toml | 84 + src/pmxcfs-rs/pmxcfs/README.md | 174 ++ .../pmxcfs/src/cluster_config_service.rs | 317 ++++ src/pmxcfs-rs/pmxcfs/src/daemon.rs | 314 ++++ src/pmxcfs-rs/pmxcfs/src/file_lock.rs | 105 ++ src/pmxcfs-rs/pmxcfs/src/fuse/README.md | 199 ++ src/pmxcfs-rs/pmxcfs/src/fuse/filesystem.rs | 1644 +++++++++++++++++ src/pmxcfs-rs/pmxcfs/src/fuse/mod.rs | 4 + src/pmxcfs-rs/pmxcfs/src/ipc/mod.rs | 16 + src/pmxcfs-rs/pmxcfs/src/ipc/request.rs | 314 ++++ src/pmxcfs-rs/pmxcfs/src/ipc/service.rs | 684 +++++++ src/pmxcfs-rs/pmxcfs/src/lib.rs | 13 + src/pmxcfs-rs/pmxcfs/src/logging.rs | 44 + src/pmxcfs-rs/pmxcfs/src/main.rs | 711 +++++++ src/pmxcfs-rs/pmxcfs/src/memdb_callbacks.rs | 663 +++++++ src/pmxcfs-rs/pmxcfs/src/plugins/README.md | 203 ++ .../pmxcfs/src/plugins/clusterlog.rs | 293 +++ src/pmxcfs-rs/pmxcfs/src/plugins/debug.rs | 145 ++ src/pmxcfs-rs/pmxcfs/src/plugins/members.rs | 198 ++ src/pmxcfs-rs/pmxcfs/src/plugins/mod.rs | 30 + src/pmxcfs-rs/pmxcfs/src/plugins/registry.rs | 305 +++ src/pmxcfs-rs/pmxcfs/src/plugins/rrd.rs | 97 + src/pmxcfs-rs/pmxcfs/src/plugins/types.rs | 112 ++ src/pmxcfs-rs/pmxcfs/src/plugins/version.rs | 178 ++ src/pmxcfs-rs/pmxcfs/src/plugins/vmlist.rs | 120 ++ src/pmxcfs-rs/pmxcfs/src/quorum_service.rs | 207 +++ src/pmxcfs-rs/pmxcfs/src/restart_flag.rs | 60 + src/pmxcfs-rs/pmxcfs/src/status_callbacks.rs | 352 ++++ src/pmxcfs-rs/pmxcfs/tests/common/mod.rs | 221 +++ src/pmxcfs-rs/pmxcfs/tests/fuse_basic_test.rs | 216 +++ .../pmxcfs/tests/fuse_cluster_test.rs | 220 +++ .../pmxcfs/tests/fuse_integration_test.rs | 414 +++++ src/pmxcfs-rs/pmxcfs/tests/fuse_locks_test.rs | 377 ++++ .../pmxcfs/tests/local_integration.rs | 277 +++ src/pmxcfs-rs/pmxcfs/tests/quorum_behavior.rs | 274 +++ .../pmxcfs/tests/single_node_functional.rs | 361 ++++ .../pmxcfs/tests/symlink_quorum_test.rs | 145 ++ 38 files changed, 10100 insertions(+), 2 deletions(-) create mode 100644 src/pmxcfs-rs/pmxcfs/Cargo.toml create mode 100644 src/pmxcfs-rs/pmxcfs/README.md create mode 100644 src/pmxcfs-rs/pmxcfs/src/cluster_config_service.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/daemon.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/file_lock.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/fuse/README.md create mode 100644 src/pmxcfs-rs/pmxcfs/src/fuse/filesystem.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/fuse/mod.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/ipc/mod.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/ipc/request.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/ipc/service.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/lib.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/logging.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/main.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/memdb_callbacks.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/plugins/README.md create mode 100644 src/pmxcfs-rs/pmxcfs/src/plugins/clusterlog.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/plugins/debug.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/plugins/members.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/plugins/mod.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/plugins/registry.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/plugins/rrd.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/plugins/types.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/plugins/version.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/plugins/vmlist.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/quorum_service.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/restart_flag.rs create mode 100644 src/pmxcfs-rs/pmxcfs/src/status_callbacks.rs create mode 100644 src/pmxcfs-rs/pmxcfs/tests/common/mod.rs create mode 100644 src/pmxcfs-rs/pmxcfs/tests/fuse_basic_test.rs create mode 100644 src/pmxcfs-rs/pmxcfs/tests/fuse_cluster_test.rs create mode 100644 src/pmxcfs-rs/pmxcfs/tests/fuse_integration_test.rs create mode 100644 src/pmxcfs-rs/pmxcfs/tests/fuse_locks_test.rs create mode 100644 src/pmxcfs-rs/pmxcfs/tests/local_integration.rs create mode 100644 src/pmxcfs-rs/pmxcfs/tests/quorum_behavior.rs create mode 100644 src/pmxcfs-rs/pmxcfs/tests/single_node_functional.rs create mode 100644 src/pmxcfs-rs/pmxcfs/tests/symlink_quorum_test.rs diff --git a/src/pmxcfs-rs/Cargo.toml b/src/pmxcfs-rs/Cargo.toml index 31bade5f4..a5b67b699 100644 --- a/src/pmxcfs-rs/Cargo.toml +++ b/src/pmxcfs-rs/Cargo.toml @@ -11,6 +11,7 @@ members = [ "pmxcfs-services", # Service framework for automatic retry and lifecycle management "pmxcfs-ipc", # libqb-compatible IPC server "pmxcfs-dfsm", # Distributed Finite State Machine + "pmxcfs", # Main daemon binary ] resolver = "2" @@ -41,6 +42,7 @@ rust-corosync = "0.1" # Core async runtime tokio = { version = "1.35", features = ["full"] } tokio-util = "0.7" +futures = "0.3" # Error handling anyhow = "1.0" @@ -48,29 +50,34 @@ thiserror = "1.0" # Logging and tracing tracing = "0.1" -tracing-subscriber = "0.3" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +tracing-journald = "0.3" # Async trait support async-trait = "0.1" # Serialization serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" bincode = "1.3" bytemuck = { version = "1.14", features = ["derive"] } # Network and cluster bytes = "1.5" sha2 = "0.10" +base64 = "0.21" # Concurrency primitives parking_lot = "0.12" # System integration libc = "0.2" -nix = { version = "0.29", features = ["socket", "poll"] } +nix = { version = "0.27", features = ["fs", "process", "signal", "user", "socket"] } +users = "0.11" # Utilities num_enum = "0.7" +chrono = "0.4" # Development dependencies tempfile = "3.8" diff --git a/src/pmxcfs-rs/pmxcfs/Cargo.toml b/src/pmxcfs-rs/pmxcfs/Cargo.toml new file mode 100644 index 000000000..83fb8edad --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/Cargo.toml @@ -0,0 +1,84 @@ +[package] +name = "pmxcfs" +description = "Proxmox Cluster File System - Rust implementation" +homepage = "https://www.proxmox.com" + +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true + +[lints] +workspace = true + +[lib] +name = "pmxcfs_rs" +path = "src/lib.rs" + +[[bin]] +name = "pmxcfs" +path = "src/main.rs" + +[dependencies] +# Workspace members +pmxcfs-config.workspace = true +pmxcfs-api-types.workspace = true +pmxcfs-memdb.workspace = true +pmxcfs-dfsm.workspace = true +pmxcfs-rrd.workspace = true +pmxcfs-status.workspace = true +pmxcfs-ipc.workspace = true +pmxcfs-services.workspace = true + +# Core async runtime +tokio.workspace = true +tokio-util.workspace = true +async-trait.workspace = true + +# Error handling +anyhow.workspace = true +thiserror.workspace = true + +# Logging and tracing +tracing.workspace = true +tracing-subscriber.workspace = true +tracing-journald.workspace = true + +# Serialization +serde.workspace = true +serde_json.workspace = true +bincode.workspace = true + +# Command-line parsing +clap = { version = "4.4", features = ["derive"] } + +# FUSE filesystem (using local fork with rename support) +proxmox-fuse = { path = "../../../../proxmox-fuse-rs" } + +# Network and cluster +bytes.workspace = true +sha2.workspace = true +bytemuck.workspace = true +base64.workspace = true + +# System integration +libc.workspace = true +nix.workspace = true +users.workspace = true + +# Corosync/CPG bindings +rust-corosync.workspace = true + +# Concurrency primitives +parking_lot.workspace = true + +# Utilities +chrono.workspace = true +futures.workspace = true +num_enum.workspace = true + +[dev-dependencies] +tempfile.workspace = true +pmxcfs-test-utils.workspace = true +filetime = "0.2" diff --git a/src/pmxcfs-rs/pmxcfs/README.md b/src/pmxcfs-rs/pmxcfs/README.md new file mode 100644 index 000000000..eb457d3ba --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/README.md @@ -0,0 +1,174 @@ +# pmxcfs Rust Implementation + +This directory contains the Rust reimplementation of pmxcfs (Proxmox Cluster File System). + +## Architecture Overview + +pmxcfs is a FUSE-based cluster filesystem that provides: +- **Cluster-wide configuration storage** via replicated database (pmxcfs-memdb) +- **State synchronization** across nodes via Corosync CPG (pmxcfs-dfsm) +- **Virtual files** for runtime status (plugins: .version, .members, .vmlist, .rrd) +- **Quorum enforcement** for write protection +- **IPC server** for management tools (pvecm, pvenode) + +### Component Architecture + +### FUSE Plugin System + +Virtual files that appear in `/etc/pve` but don't exist in the database: + +| Plugin | File | Purpose | C Equivalent | +|--------|------|---------|--------------| +| `version.rs` | `.version` | Cluster version info | `cfs-plug-func.c` (cfs_plug_version_read) | +| `members.rs` | `.members` | Cluster member list | `cfs-plug-func.c` (cfs_plug_members_read) | +| `vmlist.rs` | `.vmlist` | VM/CT registry | `cfs-plug-func.c` (cfs_plug_vmlist_read) | +| `rrd.rs` | `.rrd` | RRD dump (all metrics) | `cfs-plug-func.c` (cfs_plug_rrd_read) | +| `clusterlog.rs` | `.clusterlog` | Cluster log viewer | `cfs-plug-func.c` (cfs_plug_clusterlog_read) | +| `debug.rs` | `.debug` | Runtime debug control | `cfs-plug-func.c` (cfs_plug_debug) | + +#### Plugin Trait + +Plugins are registered in `plugins/registry.rs` and integrated into the FUSE filesystem. + +### C File Mapping + +| C Source | Rust Equivalent | Description | +|----------|-----------------|-------------| +| `pmxcfs.c` | `main.rs`, `daemon.rs` | Main entry point, daemon lifecycle | +| `cfs-plug.c` | `fuse/filesystem.rs` | FUSE operations dispatcher | +| `cfs-plug-memdb.c` | `fuse/filesystem.rs` | MemDb integration | +| `cfs-plug-func.c` | `plugins/*.rs` | Virtual file plugins | +| `server.c` | `ipc_service.rs` + pmxcfs-ipc | IPC server | +| `loop.c` | pmxcfs-services | Service management | + +## Key Differences from C Implementation + +### Command-line Options + +Both implementations support the core options with identical behavior: +- `-d` / `--debug` - Turn on debug messages +- `-f` / `--foreground` - Do not daemonize server +- `-l` / `--local` - Force local mode (ignore corosync.conf, force quorum) + +The Rust implementation adds these additional options for flexibility and testing: +- `--test-dir ` - Test directory (sets all paths to subdirectories for isolated testing) +- `--mount ` - Custom mount point (default: /etc/pve) +- `--db ` - Custom database path (default: /var/lib/pve-cluster/config.db) +- `--rundir ` - Custom runtime directory (default: /run/pmxcfs) +- `--cluster-name ` - Cluster name / CPG group name for Corosync isolation (default: "pmxcfs") + +The Rust version is fully backward-compatible with C version command-line usage. The additional options are for advanced use cases (testing, multi-instance deployments) and don't affect standard deployment scenarios. + +### Logging + +**C Implementation**: Uses libqb's qb_log with traditional syslog format + +**Rust Implementation**: Uses tracing + tracing-subscriber with structured output integrated with systemd journald + +Log messages may appear in different format, but journald integration provides same searchability as syslog. Log levels work equivalently (debug, info, warn, error). + +## Plugin System Details + +### Virtual File Plugins + +Each plugin provides a read-only (or read-write) virtual file accessible through the FUSE mount: + +#### `.version` - Version Information + +**Path:** `/etc/pve/.version` +**Format:** `{start_time}:{vmlist_version}:{path_versions...}` +**Purpose:** Allows tools to detect configuration changes +**Implementation:** `plugins/version.rs` + +Example output: +Each number is a version counter that increments on changes. + +#### `.members` - Cluster Members + +**Path:** `/etc/pve/.members` +**Format:** INI-style with member info +**Purpose:** Lists active cluster nodes +**Implementation:** `plugins/members.rs` + +Example output: +Format: `{nodeid}\t{name}\t{online}\t{ip}` + +#### `.vmlist` - VM/CT Registry + +**Path:** `/etc/pve/.vmlist` +**Format:** INI-style with VM info +**Purpose:** Cluster-wide VM/CT registry +**Implementation:** `plugins/vmlist.rs` + +Example output: +Format: `{vmid}\t{node}\t{version}` + +#### `.rrd` - RRD Metrics Dump + +**Path:** `/etc/pve/.rrd` +**Format:** Custom RRD dump format +**Purpose:** Exports all RRD metrics for graph generation +**Implementation:** `plugins/rrd.rs` + +Example output: + +#### `.clusterlog` - Cluster Log + +**Path:** `/etc/pve/.clusterlog` +**Format:** Plain text log entries +**Purpose:** Aggregated cluster-wide log +**Implementation:** `plugins/clusterlog.rs` + +Example output: + +#### `.debug` - Debug Control + +**Path:** `/etc/pve/.debug` +**Format:** Text commands +**Purpose:** Runtime debug level control +**Implementation:** `plugins/debug.rs` + +Write "1" to enable debug logging, "0" to disable. + +### Plugin Registration + +Plugins are registered in `plugins/registry.rs`: + +### FUSE Integration + +The FUSE filesystem checks plugins before MemDb: + +## Crate Structure + +The Rust implementation is organized as a workspace with 9 crates: + +| Crate | Purpose | Lines | C Equivalent | +|-------|---------|-------|--------------| +| **pmxcfs** | Main daemon binary | ~3500 | pmxcfs.c + plugins | +| **pmxcfs-api-types** | Shared types | ~400 | cfs-utils.h | +| **pmxcfs-config** | Configuration | ~75 | (inline in C) | +| **pmxcfs-memdb** | In-memory database | ~2500 | memdb.c + database.c | +| **pmxcfs-dfsm** | State machine | ~3000 | dfsm.c + dcdb.c | +| **pmxcfs-rrd** | RRD persistence | ~800 | status.c (embedded) | +| **pmxcfs-status** | Status tracking | ~900 | status.c | +| **pmxcfs-ipc** | IPC server | ~2000 | server.c | +| **pmxcfs-services** | Service framework | ~500 | loop.c | + +Total: **~14,000 lines** vs C implementation **~15,000 lines** + +## Migration Notes + +The Rust implementation can coexist with C nodes in the same cluster: +- **Wire protocol**: 100% compatible (DFSM, IPC, RRD) +- **Database format**: SQLite schema identical +- **Corosync integration**: Uses same CPG groups +- **File format**: All config files compatible + +## References + +### Documentation +- [Implementation Plan](../../pmxcfs-rust-rewrite-plan.rst) +- Individual crate README.md files for detailed docs + +### C Implementation +- `src/pmxcfs/` - Original C implementation diff --git a/src/pmxcfs-rs/pmxcfs/src/cluster_config_service.rs b/src/pmxcfs-rs/pmxcfs/src/cluster_config_service.rs new file mode 100644 index 000000000..309db2dca --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/cluster_config_service.rs @@ -0,0 +1,317 @@ +//! Cluster Configuration Service +//! +//! This service monitors Corosync cluster configuration changes via the CMAP API. +//! It tracks nodelist changes and configuration version updates, matching the C +//! implementation's service_confdb functionality. + +use async_trait::async_trait; +use pmxcfs_services::{Service, ServiceError}; +use rust_corosync::{self as corosync, CsError, cmap}; +use std::sync::Arc; +use tracing::{debug, error, info, warn}; + +use pmxcfs_status::Status; + +/// Cluster configuration service (matching C's service_confdb) +/// +/// Monitors Corosync CMAP for: +/// - Nodelist changes (`nodelist.node.*`) +/// - Configuration version changes (`totem.config_version`) +/// +/// Updates cluster info when configuration changes are detected. +pub struct ClusterConfigService { + /// CMAP handle (None when not initialized) + cmap_handle: parking_lot::RwLock>, + /// Nodelist track handle + nodelist_track_handle: parking_lot::RwLock>, + /// Config version track handle + version_track_handle: parking_lot::RwLock>, + /// Status instance for cluster info updates + status: Arc, + /// Flag indicating configuration changes detected + changes_detected: parking_lot::RwLock, +} + +impl ClusterConfigService { + /// Create a new cluster configuration service + pub fn new(status: Arc) -> Self { + Self { + cmap_handle: parking_lot::RwLock::new(None), + nodelist_track_handle: parking_lot::RwLock::new(None), + version_track_handle: parking_lot::RwLock::new(None), + status, + changes_detected: parking_lot::RwLock::new(false), + } + } + + /// Read cluster configuration from CMAP + fn read_cluster_config(&self, handle: &cmap::Handle) -> Result<(), anyhow::Error> { + // Read config version + let config_version = match cmap::get(*handle, &"totem.config_version".to_string()) { + Ok(cmap::Data::UInt64(v)) => v, + Ok(cmap::Data::UInt32(v)) => v as u64, + Ok(cmap::Data::UInt16(v)) => v as u64, + Ok(cmap::Data::UInt8(v)) => v as u64, + Ok(_) => { + warn!("Unexpected data type for totem.config_version"); + 0 + } + Err(e) => { + warn!("Failed to read totem.config_version: {:?}", e); + 0 + } + }; + + // Read cluster name + let cluster_name = match cmap::get(*handle, &"totem.cluster_name".to_string()) { + Ok(cmap::Data::String(s)) => s, + Ok(_) => { + error!("totem.cluster_name has unexpected type"); + return Err(anyhow::anyhow!("Invalid cluster_name type")); + } + Err(e) => { + error!("Failed to read totem.cluster_name: {:?}", e); + return Err(anyhow::anyhow!("Failed to read cluster_name")); + } + }; + + info!( + "Cluster configuration: name='{}', version={}", + cluster_name, config_version + ); + + // Read cluster nodes + self.read_cluster_nodes(handle, &cluster_name, config_version)?; + + Ok(()) + } + + /// Read cluster nodes from CMAP nodelist + fn read_cluster_nodes( + &self, + handle: &cmap::Handle, + cluster_name: &str, + config_version: u64, + ) -> Result<(), anyhow::Error> { + let mut nodes = Vec::new(); + + // Iterate through nodelist (nodelist.node.0, nodelist.node.1, etc.) + for node_idx in 0..256 { + let nodeid_key = format!("nodelist.node.{node_idx}.nodeid"); + let name_key = format!("nodelist.node.{node_idx}.name"); + let ring0_key = format!("nodelist.node.{node_idx}.ring0_addr"); + + // Try to read node ID - if it doesn't exist, we've reached the end + let nodeid = match cmap::get(*handle, &nodeid_key) { + Ok(cmap::Data::UInt32(id)) => id, + Ok(cmap::Data::UInt8(id)) => id as u32, + Ok(cmap::Data::UInt16(id)) => id as u32, + Err(CsError::CsErrNotExist) => break, // No more nodes + Err(e) => { + debug!("Error reading {}: {:?}", nodeid_key, e); + continue; + } + Ok(_) => { + warn!("Unexpected type for {}", nodeid_key); + continue; + } + }; + + let name = match cmap::get(*handle, &name_key) { + Ok(cmap::Data::String(s)) => s, + _ => { + debug!("No name for node {}", nodeid); + format!("node{nodeid}") + } + }; + + let ip = match cmap::get(*handle, &ring0_key) { + Ok(cmap::Data::String(s)) => s, + _ => String::new(), + }; + + debug!( + "Found cluster node: id={}, name={}, ip={}", + nodeid, name, ip + ); + nodes.push((nodeid, name, ip)); + } + + info!("Found {} cluster nodes", nodes.len()); + + // Update cluster info in Status + self.status + .update_cluster_info(cluster_name.to_string(), config_version, nodes)?; + + Ok(()) + } +} + +/// CMAP track callback (matches C's track_callback) +/// +/// This function is called by Corosync whenever a tracked CMAP key changes. +/// We use user_data to pass a pointer to the ClusterConfigService. +fn track_callback( + _handle: &cmap::Handle, + _track_handle: &cmap::TrackHandle, + _event: cmap::TrackType, + key_name: &String, // Note: rust-corosync API uses &String not &str + _new_value: &cmap::Data, + _old_value: &cmap::Data, + user_data: u64, +) { + debug!("CMAP track callback: key_name={}", key_name); + + if user_data == 0 { + error!("BUG: CMAP track callback called with null user_data"); + return; + } + + // Safety: user_data contains a valid pointer to ClusterConfigService + // The pointer remains valid because ServiceManager holds the service + unsafe { + let service_ptr = user_data as *const ClusterConfigService; + let service = &*service_ptr; + *service.changes_detected.write() = true; + } +} + +#[async_trait] +impl Service for ClusterConfigService { + fn name(&self) -> &str { + "cluster-config" + } + + async fn initialize(&mut self) -> pmxcfs_services::Result { + info!("Initializing cluster configuration service"); + + // Initialize CMAP connection + let handle = cmap::initialize(cmap::Map::Icmap).map_err(|e| { + ServiceError::InitializationFailed(format!("cmap_initialize failed: {e:?}")) + })?; + + // Store self pointer as user_data for callbacks + let self_ptr = self as *const Self as u64; + + // Create callback struct + let callback = cmap::NotifyCallback { + notify_fn: Some(track_callback), + }; + + // Set up nodelist tracking (matches C's CMAP_TRACK_PREFIX | CMAP_TRACK_ADD | ...) + let nodelist_track = cmap::track_add( + handle, + &"nodelist.node.".to_string(), + cmap::TrackType::PREFIX + | cmap::TrackType::ADD + | cmap::TrackType::DELETE + | cmap::TrackType::MODIFY, + &callback, + self_ptr, + ) + .map_err(|e| { + cmap::finalize(handle).ok(); + ServiceError::InitializationFailed(format!("cmap_track_add (nodelist) failed: {e:?}")) + })?; + + // Set up config version tracking + let version_track = cmap::track_add( + handle, + &"totem.config_version".to_string(), + cmap::TrackType::ADD | cmap::TrackType::DELETE | cmap::TrackType::MODIFY, + &callback, + self_ptr, + ) + .map_err(|e| { + cmap::track_delete(handle, nodelist_track).ok(); + cmap::finalize(handle).ok(); + ServiceError::InitializationFailed(format!( + "cmap_track_add (config_version) failed: {e:?}" + )) + })?; + + // Get file descriptor for event monitoring + let fd = cmap::fd_get(handle).map_err(|e| { + cmap::track_delete(handle, version_track).ok(); + cmap::track_delete(handle, nodelist_track).ok(); + cmap::finalize(handle).ok(); + ServiceError::InitializationFailed(format!("cmap_fd_get failed: {e:?}")) + })?; + + // Read initial configuration + if let Err(e) = self.read_cluster_config(&handle) { + warn!("Failed to read initial cluster configuration: {}", e); + // Don't fail initialization - we'll try again on next change + } + + // Store handles + *self.cmap_handle.write() = Some(handle); + *self.nodelist_track_handle.write() = Some(nodelist_track); + *self.version_track_handle.write() = Some(version_track); + + info!( + "Cluster configuration service initialized successfully with fd {}", + fd + ); + Ok(fd) + } + + async fn dispatch(&mut self) -> pmxcfs_services::Result { + let handle = *self.cmap_handle.read().as_ref().ok_or_else(|| { + ServiceError::DispatchFailed("CMAP handle not initialized".to_string()) + })?; + + // Dispatch CMAP events (matches C's cmap_dispatch with CS_DISPATCH_ALL) + match cmap::dispatch(handle, corosync::DispatchFlags::All) { + Ok(_) => { + // Check if changes were detected (matches C implementation) + if *self.changes_detected.read() { + *self.changes_detected.write() = false; + + // Re-read cluster configuration + if let Err(e) = self.read_cluster_config(&handle) { + warn!("Failed to update cluster configuration: {}", e); + } + } + Ok(true) + } + Err(CsError::CsErrTryAgain) => { + // TRY_AGAIN is expected, continue normally + Ok(true) + } + Err(CsError::CsErrLibrary) | Err(CsError::CsErrBadHandle) => { + // Connection lost, need to reinitialize + warn!("CMAP connection lost, requesting reinitialization"); + Ok(false) + } + Err(e) => { + error!("CMAP dispatch failed: {:?}", e); + Err(ServiceError::DispatchFailed(format!( + "cmap_dispatch failed: {e:?}" + ))) + } + } + } + + async fn finalize(&mut self) -> pmxcfs_services::Result<()> { + info!("Finalizing cluster configuration service"); + + if let Some(handle) = self.cmap_handle.write().take() { + // Remove track handles + if let Some(version_track) = self.version_track_handle.write().take() { + cmap::track_delete(handle, version_track).ok(); + } + if let Some(nodelist_track) = self.nodelist_track_handle.write().take() { + cmap::track_delete(handle, nodelist_track).ok(); + } + + // Finalize CMAP connection + if let Err(e) = cmap::finalize(handle) { + warn!("Error finalizing CMAP: {:?}", e); + } + } + + info!("Cluster configuration service finalized"); + Ok(()) + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/daemon.rs b/src/pmxcfs-rs/pmxcfs/src/daemon.rs new file mode 100644 index 000000000..2327bfd23 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/daemon.rs @@ -0,0 +1,314 @@ +//! Daemon builder with integrated PID file management +//! +//! This module provides a builder-based API for daemonization that combines +//! process forking, parent-child signaling, and PID file management into a +//! cohesive, easy-to-use abstraction. +//! +//! Inspired by the daemonize crate but tailored for pmxcfs needs with async support. + +use anyhow::{Context, Result}; +use nix::unistd::{ForkResult, fork, pipe}; +use pmxcfs_api_types::PmxcfsError; +use std::fs::{self, File}; +use std::os::unix::fs::PermissionsExt; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::path::PathBuf; + +/// RAII guard for PID file - automatically removes file on drop +pub struct PidFileGuard { + path: PathBuf, +} + +impl Drop for PidFileGuard { + fn drop(&mut self) { + if let Err(e) = fs::remove_file(&self.path) { + tracing::warn!( + "Failed to remove PID file at {}: {}", + self.path.display(), + e + ); + } else { + tracing::debug!("Removed PID file at {}", self.path.display()); + } + } +} + +/// Represents the daemon process after daemonization +pub enum DaemonProcess { + /// Parent process - should exit after receiving this + Parent, + /// Child process - contains RAII guard for PID file cleanup + Child(PidFileGuard), +} + +/// Builder for daemon configuration with integrated PID file management +/// +/// Provides a fluent API for configuring daemonization behavior including +/// PID file location, group ownership, and parent-child signaling. +pub struct Daemon { + pid_file: Option, + group: Option, +} + +impl Daemon { + /// Create a new daemon builder with default settings + pub fn new() -> Self { + Self { + pid_file: None, + group: None, + } + } + + /// Set the PID file path + /// + /// The PID file will be created with 0o644 permissions and owned by root:group. + pub fn pid_file>(mut self, path: P) -> Self { + self.pid_file = Some(path.into()); + self + } + + /// Set the group ID for PID file ownership + pub fn group(mut self, gid: u32) -> Self { + self.group = Some(gid); + self + } + + /// Start the daemonization process (foreground mode) + /// + /// Returns a guard that manages PID file lifecycle. + /// The PID file is written immediately and cleaned up when the guard is dropped. + pub fn start_foreground(self) -> Result { + let pid_file_path = self + .pid_file + .ok_or_else(|| PmxcfsError::System("PID file path must be specified".into()))?; + + let gid = self.group.unwrap_or(0); + + // Write PID file with current process ID + write_pid_file(&pid_file_path, std::process::id(), gid)?; + + tracing::info!("Running in foreground mode with PID {}", std::process::id()); + + Ok(PidFileGuard { + path: pid_file_path, + }) + } + + /// Start the daemonization process (daemon mode) + /// + /// Forks the process and returns either: + /// - `DaemonProcess::Parent` - The parent should exit after cleanup + /// - `DaemonProcess::Child(guard)` - The child should continue with daemon operations + /// + /// This uses a pipe-based signaling mechanism where the parent waits for the + /// child to signal readiness before writing the PID file and exiting. + pub fn start_daemon(self) -> Result { + let pid_file_path = self + .pid_file + .ok_or_else(|| PmxcfsError::System("PID file path must be specified".into()))?; + + let gid = self.group.unwrap_or(0); + + // Create pipe for parent-child signaling + let (read_fd, write_fd) = pipe().context("Failed to create pipe for daemonization")?; + + match unsafe { fork() } { + Ok(ForkResult::Parent { child }) => { + // Parent: wait for child to signal readiness + unsafe { libc::close(write_fd) }; + + let mut buffer = [0u8; 1]; + let bytes_read = + unsafe { libc::read(read_fd, buffer.as_mut_ptr() as *mut libc::c_void, 1) }; + let errno = std::io::Error::last_os_error(); + unsafe { libc::close(read_fd) }; + + if bytes_read == -1 { + return Err( + PmxcfsError::System(format!("Failed to read from child: {errno}")).into(), + ); + } else if bytes_read != 1 || buffer[0] != b'1' { + return Err( + PmxcfsError::System("Child failed to send ready signal".into()).into(), + ); + } + + // Child is ready - write PID file with child's PID + let child_pid = child.as_raw() as u32; + write_pid_file(&pid_file_path, child_pid, gid)?; + + tracing::info!("Child process {} signaled ready, parent exiting", child_pid); + + Ok(DaemonProcess::Parent) + } + Ok(ForkResult::Child) => { + // Child: become daemon and return signal handle + unsafe { libc::close(read_fd) }; + + // Create new session + unsafe { + if libc::setsid() == -1 { + return Err( + PmxcfsError::System("Failed to create new session".into()).into() + ); + } + } + + // Change to root directory + std::env::set_current_dir("/")?; + + // Redirect standard streams to /dev/null + let devnull = File::open("/dev/null")?; + unsafe { + libc::dup2(devnull.as_raw_fd(), 0); + libc::dup2(devnull.as_raw_fd(), 1); + libc::dup2(devnull.as_raw_fd(), 2); + } + + // Return child variant - we don't use the write_fd in this simplified version + // Note: This method is not actually used - use start_daemon_with_signal instead + unsafe { libc::close(write_fd) }; + Ok(DaemonProcess::Child(PidFileGuard { + path: pid_file_path, + })) + } + Err(e) => Err(PmxcfsError::System(format!("Failed to fork: {e}")).into()), + } + } + + /// Start daemonization with deferred signaling + /// + /// Returns (DaemonProcess, Option) where SignalHandle + /// must be used to signal the parent when ready. + pub fn start_daemon_with_signal(self) -> Result<(DaemonProcess, Option)> { + let pid_file_path = self + .pid_file + .clone() + .ok_or_else(|| PmxcfsError::System("PID file path must be specified".into()))?; + + let gid = self.group.unwrap_or(0); + + // Create pipe for parent-child signaling + let (read_fd, write_fd) = pipe().context("Failed to create pipe for daemonization")?; + + match unsafe { fork() } { + Ok(ForkResult::Parent { child }) => { + // Parent: wait for child to signal readiness + unsafe { libc::close(write_fd) }; + + let mut buffer = [0u8; 1]; + let bytes_read = + unsafe { libc::read(read_fd, buffer.as_mut_ptr() as *mut libc::c_void, 1) }; + let errno = std::io::Error::last_os_error(); + unsafe { libc::close(read_fd) }; + + if bytes_read == -1 { + return Err( + PmxcfsError::System(format!("Failed to read from child: {errno}")).into(), + ); + } else if bytes_read != 1 || buffer[0] != b'1' { + return Err( + PmxcfsError::System("Child failed to send ready signal".into()).into(), + ); + } + + // Child is ready - write PID file with child's PID + let child_pid = child.as_raw() as u32; + write_pid_file(&pid_file_path, child_pid, gid)?; + + tracing::info!("Child process {} signaled ready, parent exiting", child_pid); + + Ok((DaemonProcess::Parent, None)) + } + Ok(ForkResult::Child) => { + // Child: become daemon and return signal handle + unsafe { libc::close(read_fd) }; + + // Create new session + unsafe { + if libc::setsid() == -1 { + return Err( + PmxcfsError::System("Failed to create new session".into()).into() + ); + } + } + + // Change to root directory + std::env::set_current_dir("/")?; + + // Redirect standard streams to /dev/null + let devnull = File::open("/dev/null")?; + unsafe { + libc::dup2(devnull.as_raw_fd(), 0); + libc::dup2(devnull.as_raw_fd(), 1); + libc::dup2(devnull.as_raw_fd(), 2); + } + + let signal_handle = SignalHandle { write_fd }; + let guard = PidFileGuard { + path: pid_file_path, + }; + + Ok((DaemonProcess::Child(guard), Some(signal_handle))) + } + Err(e) => Err(PmxcfsError::System(format!("Failed to fork: {e}")).into()), + } + } +} + +impl Default for Daemon { + fn default() -> Self { + Self::new() + } +} + +/// Handle for signaling parent process readiness +/// +/// The child process must call `signal_ready()` to inform the parent +/// that all initialization is complete and it's safe to write the PID file. +pub struct SignalHandle { + write_fd: RawFd, +} + +impl SignalHandle { + /// Signal parent that child is ready + /// + /// This must be called after all initialization is complete. + /// The parent will write the PID file and exit after receiving this signal. + pub fn signal_ready(self) -> Result<()> { + unsafe { + let result = libc::write(self.write_fd, b"1".as_ptr() as *const libc::c_void, 1); + libc::close(self.write_fd); + + if result != 1 { + return Err(PmxcfsError::System("Failed to signal parent process".into()).into()); + } + } + tracing::debug!("Signaled parent process - child ready"); + Ok(()) + } +} + +/// Write PID file with specified process ID +fn write_pid_file(path: &PathBuf, pid: u32, gid: u32) -> Result<()> { + let content = format!("{pid}\n"); + + fs::write(path, content) + .with_context(|| format!("Failed to write PID file to {}", path.display()))?; + + // Set permissions (0o644 = rw-r--r--) + let metadata = fs::metadata(path)?; + let mut perms = metadata.permissions(); + perms.set_mode(0o644); + fs::set_permissions(path, perms)?; + + // Set ownership (root:gid) + let path_cstr = std::ffi::CString::new(path.to_string_lossy().as_ref()).unwrap(); + unsafe { + libc::chown(path_cstr.as_ptr(), 0, gid as libc::gid_t); + } + + tracing::info!("Created PID file at {} with PID {}", path.display(), pid); + + Ok(()) +} diff --git a/src/pmxcfs-rs/pmxcfs/src/file_lock.rs b/src/pmxcfs-rs/pmxcfs/src/file_lock.rs new file mode 100644 index 000000000..2180e67b7 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/file_lock.rs @@ -0,0 +1,105 @@ +//! File locking utilities +//! +//! This module provides file-based locking to ensure only one pmxcfs instance +//! runs at a time. It uses the flock(2) system call with exclusive locks. + +use anyhow::{Context, Result}; +use pmxcfs_api_types::PmxcfsError; +use std::fs::File; +use std::os::unix::fs::OpenOptionsExt; +use std::os::unix::io::AsRawFd; +use std::path::PathBuf; +use tracing::{info, warn}; + +/// RAII wrapper for a file lock +/// +/// The lock is automatically released when the FileLock is dropped. +pub struct FileLock(File); + +impl FileLock { + const MAX_RETRIES: u32 = 10; + const RETRY_DELAY: std::time::Duration = std::time::Duration::from_secs(1); + + /// Acquire an exclusive file lock with retries (async) + /// + /// This function attempts to acquire an exclusive, non-blocking lock on the + /// specified file. It will retry up to 10 times with 1-second delays between + /// attempts, matching the C implementation's behavior. + /// + /// The blocking operations (file I/O and sleep) are executed on a blocking + /// thread pool to avoid blocking the async runtime. + /// + /// # Arguments + /// + /// * `lockfile_path` - Path to the lock file + /// + /// # Returns + /// + /// Returns a `FileLock` which automatically releases the lock when dropped. + /// + /// # Errors + /// + /// Returns an error if: + /// - The lock file cannot be created + /// - The lock cannot be acquired after 10 retry attempts + pub async fn acquire(lockfile_path: PathBuf) -> Result { + // Open/create the lock file on blocking thread pool + let file = tokio::task::spawn_blocking({ + let lockfile_path = lockfile_path.clone(); + move || { + File::options() + .create(true) + .read(true) + .append(true) + .mode(0o600) + .open(&lockfile_path) + .with_context(|| { + format!("Unable to create lock file at {}", lockfile_path.display()) + }) + } + }) + .await + .context("Failed to spawn blocking task for file creation")??; + + // Try to acquire lock with retries (matching C implementation) + for attempt in 0..=Self::MAX_RETRIES { + if Self::try_lock(&file).await? { + info!(path = %lockfile_path.display(), "Acquired pmxcfs lock"); + return Ok(FileLock(file)); + } + + if attempt == Self::MAX_RETRIES { + return Err(PmxcfsError::System("Unable to acquire pmxcfs lock".into()).into()); + } + + if attempt == 0 { + warn!("Unable to acquire pmxcfs lock - retrying"); + } + + tokio::time::sleep(Self::RETRY_DELAY).await; + } + + unreachable!("Loop should have returned or errored") + } + + /// Attempt to acquire the lock (non-blocking) + async fn try_lock(file: &File) -> Result { + let result = tokio::task::spawn_blocking({ + let fd = file.as_raw_fd(); + move || unsafe { libc::flock(fd, libc::LOCK_EX | libc::LOCK_NB) } + }) + .await + .context("Failed to spawn blocking task for flock")?; + + Ok(result == 0) + } +} + +impl Drop for FileLock { + fn drop(&mut self) { + // Safety: We own the file descriptor + unsafe { + libc::flock(self.0.as_raw_fd(), libc::LOCK_UN); + } + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/fuse/README.md b/src/pmxcfs-rs/pmxcfs/src/fuse/README.md new file mode 100644 index 000000000..832207964 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/fuse/README.md @@ -0,0 +1,199 @@ +# PMXCFS FUSE Filesystem + +## Overview + +PMXCFS provides a FUSE-based cluster filesystem mounted at `/etc/pve`. This filesystem exposes cluster configuration, VM/container configurations, and dynamic status information. + +## Filesystem Structure + +``` +/etc/pve/ +├── local -> nodes/{nodename}/ # Symlink plugin +├── qemu-server -> nodes/{nodename}/qemu-server/ # Symlink plugin +├── lxc -> nodes/{nodename}/lxc/ # Symlink plugin +├── openvz -> nodes/{nodename}/openvz/ # Symlink plugin (legacy) +│ +├── .version # Plugin file +├── .members # Plugin file +├── .vmlist # Plugin file +├── .rrd # Plugin file +├── .clusterlog # Plugin file +├── .debug # Plugin file +│ +├── nodes/ +│ ├── {node1}/ +│ │ ├── qemu-server/ # VM configs +│ │ │ └── {vmid}.conf +│ │ ├── lxc/ # CT configs +│ │ │ └── {ctid}.conf +│ │ ├── openvz/ # Legacy (OpenVZ) +│ │ └── priv/ # Node-specific private data +│ └── {node2}/ +│ └── ... +│ +├── corosync.conf # Cluster configuration +├── corosync.conf.new # Staging for new config +├── storage.cfg # Storage configuration +├── user.cfg # User database +├── domains.cfg # Authentication domains +├── datacenter.cfg # Datacenter settings +├── vzdump.cron # Backup schedule +├── vzdump.conf # Backup configuration +├── jobs.cfg # Job definitions +│ +├── ha/ # High Availability +│ ├── crm_commands +│ ├── manager_status +│ ├── resources.cfg +│ ├── groups.cfg +│ ├── rules.cfg +│ └── fence.cfg +│ +├── sdn/ # Software Defined Networking +│ ├── vnets.cfg +│ ├── zones.cfg +│ ├── controllers.cfg +│ ├── subnets.cfg +│ └── ipams.cfg +│ +├── firewall/ +│ └── cluster.fw # Cluster firewall rules +│ +├── replication.cfg # Replication configuration +├── ceph.conf # Ceph configuration +│ +├── notifications.cfg # Notification settings +│ +└── priv/ # Cluster-wide private data + ├── shadow.cfg # Password hashes + ├── tfa.cfg # Two-factor auth + ├── token.cfg # API tokens + ├── notifications.cfg # Private notification config + └── acme/ + └── plugins.cfg # ACME plugin configs +``` + +## File Categories + +### Plugin Files (Dynamic Content) + +Files beginning with `.` are plugin files that generate content dynamically: +- `.version` - Cluster version and status +- `.members` - Cluster membership +- `.vmlist` - VM/container list +- `.rrd` - RRD metrics dump +- `.clusterlog` - Cluster log entries +- `.debug` - Debug mode toggle + +See `../plugins/README.md` for detailed format specifications. + +### Symlink Plugins + +Convenience symlinks to node-specific directories: +- `local/` - Points to current node's directory +- `qemu-server/` - Points to current node's VM configs +- `lxc/` - Points to current node's container configs +- `openvz/` - Points to current node's OpenVZ configs (legacy) + +### Configuration Files (40 tracked files) + +The following files are tracked for version changes and synchronized across the cluster: + +**Core Configuration**: +- `corosync.conf` - Corosync cluster configuration +- `corosync.conf.new` - Staged configuration before activation +- `storage.cfg` - Storage pool definitions +- `user.cfg` - User accounts and permissions +- `domains.cfg` - Authentication realm configuration +- `datacenter.cfg` - Datacenter-wide settings + +**Backup Configuration**: +- `vzdump.cron` - Backup schedule +- `vzdump.conf` - Backup job settings +- `jobs.cfg` - Recurring job definitions + +**High Availability** (6 files): +- `ha/crm_commands` - HA command queue +- `ha/manager_status` - HA manager status +- `ha/resources.cfg` - HA resource definitions +- `ha/groups.cfg` - HA service groups +- `ha/rules.cfg` - HA placement rules +- `ha/fence.cfg` - Fencing configuration + +**Software Defined Networking** (5 files): +- `sdn/vnets.cfg` - Virtual networks +- `sdn/zones.cfg` - Network zones +- `sdn/controllers.cfg` - SDN controllers +- `sdn/subnets.cfg` - Subnet definitions +- `sdn/ipams.cfg` - IP address management + +**Notification** (2 files): +- `notifications.cfg` - Public notification settings +- `priv/notifications.cfg` - Private notification credentials + +**Security** (5 files): +- `priv/shadow.cfg` - Password hashes +- `priv/tfa.cfg` - Two-factor authentication +- `priv/token.cfg` - API tokens +- `priv/acme/plugins.cfg` - ACME DNS plugins +- `firewall/cluster.fw` - Cluster-wide firewall rules + +**Other**: +- `replication.cfg` - Storage replication jobs +- `ceph.conf` - Ceph cluster configuration + +### Node-Specific Directories + +Each node has a directory under `nodes/{nodename}/` containing: +- `qemu-server/*.conf` - QEMU/KVM VM configurations +- `lxc/*.conf` - LXC container configurations +- `openvz/*.conf` - OpenVZ container configurations (legacy) +- `priv/` - Node-specific private data (not replicated) + +## FUSE Operations + +### Supported Operations + +All standard FUSE operations are supported: + +**Metadata Operations**: +- `getattr` - Get file/directory attributes +- `readdir` - List directory contents +- `statfs` - Get filesystem statistics + +**Read Operations**: +- `read` - Read file contents +- `readlink` - Read symlink target + +**Write Operations**: +- `write` - Write file contents +- `create` - Create new file +- `unlink` - Delete file +- `mkdir` - Create directory +- `rmdir` - Delete directory +- `rename` - Rename/move file +- `truncate` - Truncate file to size +- `utimens` - Update timestamps + +**Permission Operations**: +- `chmod` - Change file mode +- `chown` - Change file ownership + +### Permission Handling + +- **Regular paths**: Standard Unix permissions apply +- **Private paths** (`priv/` directories): Restricted to root only +- **Plugin files**: Read-only for most users, special handling for `.debug` + +### File Size Limits + +- Maximum file size: 1 MiB (1024 × 1024 bytes) +- Maximum filesystem size: 128 MiB +- Maximum inodes: 256,000 + +## Implementation + +The FUSE filesystem is implemented in `filesystem.rs` and integrates with: +- **MemDB**: Backend storage (SQLite + in-memory tree) +- **Plugin System**: Dynamic file generation +- **Cluster Sync**: Changes are propagated via DFSM protocol diff --git a/src/pmxcfs-rs/pmxcfs/src/fuse/filesystem.rs b/src/pmxcfs-rs/pmxcfs/src/fuse/filesystem.rs new file mode 100644 index 000000000..dc7beaeb5 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/fuse/filesystem.rs @@ -0,0 +1,1644 @@ +use anyhow::{Error, bail}; +use futures::stream::TryStreamExt; +use libc::{EACCES, EINVAL, EIO, EISDIR, ENOENT}; +use proxmox_fuse::requests::{self, FuseRequest}; +use proxmox_fuse::{EntryParam, Fuse, ReplyBufState, Request}; +use std::ffi::{OsStr, OsString}; +use std::io; +use std::mem; +use std::path::Path; +use std::sync::Arc; +use std::time::{SystemTime, UNIX_EPOCH}; + +use crate::plugins::{Plugin, PluginRegistry}; +use pmxcfs_config::Config; +use pmxcfs_dfsm::{Dfsm, DfsmBroadcast, FuseMessage}; +use pmxcfs_memdb::{MemDb, ROOT_INODE, TreeEntry}; +use pmxcfs_status::Status; + +const TTL: f64 = 1.0; + +/// FUSE filesystem context for pmxcfs +pub struct PmxcfsFilesystem { + memdb: MemDb, + dfsm: Option>>, + plugins: Arc, + status: Arc, + uid: u32, + gid: u32, +} + +impl PmxcfsFilesystem { + const PLUGIN_INODE_OFFSET: u64 = 1000000; + const FUSE_GENERATION: u64 = 1; + const NLINK_FILE: u32 = 1; + const NLINK_DIR: u32 = 2; + + pub fn new( + memdb: MemDb, + config: Arc, + dfsm: Option>>, + plugins: Arc, + status: Arc, + ) -> Self { + Self { + memdb, + gid: config.www_data_gid(), + dfsm, + plugins, + status, + uid: 0, // root + } + } + + /// Convert FUSE nodeid to internal inode + /// + /// FUSE protocol uses nodeid 1 for root, but internally we use ROOT_INODE (0). + /// Regular file inodes need to be offset by -1 to match internal numbering. + /// Plugin inodes are in a separate range (>= PLUGIN_INODE_OFFSET) and unchanged. + /// + /// Mapping: + /// - FUSE nodeid 1 → internal inode 0 (ROOT_INODE) + /// - FUSE nodeid N (where N > 1 and N < PLUGIN_INODE_OFFSET) → internal inode N-1 + /// - Plugin inodes (>= PLUGIN_INODE_OFFSET) are unchanged + #[inline] + fn fuse_to_inode(&self, fuse_nodeid: u64) -> u64 { + if fuse_nodeid >= Self::PLUGIN_INODE_OFFSET { + // Plugin inodes are unchanged + fuse_nodeid + } else { + // Regular inodes: FUSE nodeid N → internal inode N-1 + // This maps FUSE root (1) to internal ROOT_INODE (0) + fuse_nodeid - 1 + } + } + + /// Convert internal inode to FUSE nodeid + /// + /// Internally we use ROOT_INODE (0) for root, but FUSE protocol uses nodeid 1. + /// Regular file inodes need to be offset by +1 to match FUSE numbering. + /// Plugin inodes (>= PLUGIN_INODE_OFFSET) are unchanged. + /// + /// Mapping: + /// - Internal inode 0 (ROOT_INODE) → FUSE nodeid 1 + /// - Internal inode N (where N > 0 and N < PLUGIN_INODE_OFFSET) → FUSE nodeid N+1 + /// - Plugin inodes (>= PLUGIN_INODE_OFFSET) are unchanged + #[inline] + fn inode_to_fuse(&self, inode: u64) -> u64 { + if inode >= Self::PLUGIN_INODE_OFFSET { + // Plugin inodes are unchanged + inode + } else { + // Regular inodes: internal inode N → FUSE nodeid N+1 + // This maps internal ROOT_INODE (0) to FUSE root (1) + inode + 1 + } + } + + /// Check if a path is private (should have restricted permissions) + /// Matches C version's path_is_private() logic: + /// - Paths starting with "priv" or "priv/" are private + /// - Paths matching "nodes/*/priv" or "nodes/*/priv/*" are private + fn is_private_path(&self, path: &str) -> bool { + // Strip leading slashes + let path = path.trim_start_matches('/'); + + // Check if path starts with "priv" or "priv/" + if path.starts_with("priv") && (path.len() == 4 || path.as_bytes()[4] == b'/') { + return true; + } + + // Check for "nodes/*/priv" or "nodes/*/priv/*" pattern + if let Some(after_nodes) = path.strip_prefix("nodes/") { + // Find the next '/' to skip the node name + if let Some(slash_pos) = after_nodes.find('/') { + let after_nodename = &after_nodes[slash_pos..]; + + // Check if it starts with "/priv" and ends or continues with '/' + if after_nodename.starts_with("/priv") { + let priv_end = slash_pos + 5; // position after "/priv" + if after_nodes.len() == priv_end || after_nodes.as_bytes()[priv_end] == b'/' { + return true; + } + } + } + } + + false + } + + /// Get a TreeEntry by inode (helper for FUSE operations) + fn get_entry_by_inode(&self, inode: u64) -> Option { + self.memdb.get_entry_by_inode(inode) + } + + /// Get a TreeEntry by path + fn get_entry_by_path(&self, path: &str) -> Option { + self.memdb.lookup_path(path) + } + + /// Get the full path for an inode by traversing up the tree + fn get_path_for_inode(&self, inode: u64) -> String { + if inode == ROOT_INODE { + return "/".to_string(); + } + + let mut path_components = Vec::new(); + let mut current_inode = inode; + + // Traverse up the tree + while current_inode != ROOT_INODE { + if let Some(entry) = self.memdb.get_entry_by_inode(current_inode) { + path_components.push(entry.name.clone()); + current_inode = entry.parent; + } else { + // Entry not found, return root + return "/".to_string(); + } + } + + // Reverse to get correct order (we built from leaf to root) + path_components.reverse(); + + if path_components.is_empty() { + "/".to_string() + } else { + format!("/{}", path_components.join("/")) + } + } + + fn join_path(&self, parent_path: &str, name: &str) -> io::Result { + let mut path = std::path::PathBuf::from(parent_path); + path.push(name); + path.to_str() + .ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidInput, + "Path contains invalid UTF-8 characters", + ) + }) + .map(|s| s.to_string()) + } + + /// Convert a TreeEntry to libc::stat using current quorum state + /// Applies permission adjustments based on whether the path is private + /// + /// Matches C implementation (cfs-plug-memdb.c:95-116, pmxcfs.c:130-138): + /// 1. Start with quorum-dependent base permissions (0777/0555 dirs, 0666/0444 files) + /// 2. Apply AND masking: private=0777700, dirs/symlinks=0777755, files=0777750 + fn entry_to_stat(&self, entry: &TreeEntry, path: &str) -> libc::stat { + // Use current quorum state + let quorate = self.status.is_quorate(); + self.entry_to_stat_with_quorum(entry, path, quorate) + } + + /// Convert a TreeEntry to libc::stat with explicit quorum state + /// Applies permission adjustments based on whether the path is private + /// + /// Matches C implementation (cfs-plug-memdb.c:95-116, pmxcfs.c:130-138): + /// 1. Start with quorum-dependent base permissions (0777/0555 dirs, 0666/0444 files) + /// 2. Apply AND masking: private=0777700, dirs/symlinks=0777755, files=0777750 + fn entry_to_stat_with_quorum(&self, entry: &TreeEntry, path: &str, quorate: bool) -> libc::stat { + let mtime_secs = entry.mtime as i64; + let mut stat: libc::stat = unsafe { mem::zeroed() }; + + // Convert internal inode to FUSE nodeid for st_ino field + let fuse_nodeid = self.inode_to_fuse(entry.inode); + + if entry.is_dir() { + stat.st_ino = fuse_nodeid; + // Quorum-dependent directory permissions (C: 0777 when quorate, 0555 when not) + stat.st_mode = libc::S_IFDIR | if quorate { 0o777 } else { 0o555 }; + stat.st_nlink = Self::NLINK_DIR as u64; + stat.st_uid = self.uid; + stat.st_gid = self.gid; + stat.st_size = 4096; + stat.st_blksize = 4096; + stat.st_blocks = 8; + stat.st_atime = mtime_secs; + stat.st_atime_nsec = 0; + stat.st_mtime = mtime_secs; + stat.st_mtime_nsec = 0; + stat.st_ctime = mtime_secs; + stat.st_ctime_nsec = 0; + } else { + stat.st_ino = fuse_nodeid; + // Quorum-dependent file permissions (C: 0666 when quorate, 0444 when not) + stat.st_mode = libc::S_IFREG | if quorate { 0o666 } else { 0o444 }; + stat.st_nlink = Self::NLINK_FILE as u64; + stat.st_uid = self.uid; + stat.st_gid = self.gid; + stat.st_size = entry.size as i64; + stat.st_blksize = 4096; + stat.st_blocks = ((entry.size as i64 + 4095) / 4096) * 8; + stat.st_atime = mtime_secs; + stat.st_atime_nsec = 0; + stat.st_mtime = mtime_secs; + stat.st_mtime_nsec = 0; + stat.st_ctime = mtime_secs; + stat.st_ctime_nsec = 0; + } + + // Apply permission adjustments based on path privacy (matching C implementation) + // See pmxcfs.c cfs_fuse_getattr() lines 130-138 + // Uses AND masking to restrict permissions while preserving file type bits + if self.is_private_path(path) { + // Private paths: mask to rwx------ (0o700) + // C: stbuf->st_mode &= 0777700 + stat.st_mode &= 0o777700; + } else { + // Non-private paths: different masks for dirs vs files + if (stat.st_mode & libc::S_IFMT) == libc::S_IFDIR + || (stat.st_mode & libc::S_IFMT) == libc::S_IFLNK + { + // Directories and symlinks: mask to rwxr-xr-x (0o755) + // C: stbuf->st_mode &= 0777755 + stat.st_mode &= 0o777755; + } else { + // Regular files: mask to rwxr-x--- (0o750) + // C: stbuf->st_mode &= 0777750 + stat.st_mode &= 0o777750; + } + } + + stat + } + + /// Check if a plugin supports write operations + /// + /// Tests if the plugin has a custom write implementation by checking + /// if write() returns the default "Write not supported" error + fn plugin_supports_write(plugin: &Arc) -> bool { + // Try writing empty data - if it returns the default error, no write support + match plugin.write(&[]) { + Err(e) => { + let msg = e.to_string(); + !msg.contains("Write not supported") + } + Ok(_) => true, // Write succeeded, so it's supported + } + } + + /// Get stat for a plugin file + fn plugin_to_stat(&self, inode: u64, plugin: &Arc) -> libc::stat { + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs() as i64; + let data = plugin.read().unwrap_or_default(); + + let mut stat: libc::stat = unsafe { mem::zeroed() }; + stat.st_ino = inode; + + // Set file type and mode based on plugin type + if plugin.is_symlink() { + // Quorum-aware permissions for symlinks (matching C's cfs-plug-link.c:68-72) + // - When quorate: 0o777 (writable by all) + // - When not quorate: 0o555 (read-only for all) + let mode = if self.status.is_quorate() { + 0o777 + } else { + 0o555 + }; + stat.st_mode = libc::S_IFLNK | mode; + } else { + // Regular file plugin + let mut mode = plugin.mode(); + + // Strip write bits if plugin doesn't support writing + // Matches C implementation (cfs-plug-func.c:216-218) + if !Self::plugin_supports_write(plugin) { + mode &= !0o222; // Remove write bits (owner, group, other) + } + + stat.st_mode = libc::S_IFREG | mode; + } + + stat.st_nlink = Self::NLINK_FILE as u64; + stat.st_uid = self.uid; + stat.st_gid = self.gid; + stat.st_size = data.len() as i64; + stat.st_blksize = 4096; + stat.st_blocks = ((data.len() as i64 + 4095) / 4096) * 8; + stat.st_atime = now; + stat.st_atime_nsec = 0; + stat.st_mtime = now; + stat.st_mtime_nsec = 0; + stat.st_ctime = now; + stat.st_ctime_nsec = 0; + + stat + } + + /// Handle lookup operation + async fn handle_lookup(&self, parent_fuse: u64, name: &OsStr) -> io::Result { + tracing::debug!( + "lookup(parent={parent_fuse}, name={})", + name.to_string_lossy() + ); + + // Convert FUSE nodeid to internal inode + let parent = self.fuse_to_inode(parent_fuse); + + let name_str = name.to_string_lossy(); + + // Check if this is a plugin file in the root directory + if parent == ROOT_INODE { + let plugin_names = self.plugins.list(); + if let Some(plugin_idx) = plugin_names.iter().position(|p| p == name_str.as_ref()) { + // Found a plugin file + if let Some(plugin) = self.plugins.get(&name_str) { + let plugin_inode = Self::PLUGIN_INODE_OFFSET + plugin_idx as u64; + let stat = self.plugin_to_stat(plugin_inode, &plugin); + + return Ok(EntryParam { + inode: plugin_inode, // Plugin inodes already in FUSE space + generation: Self::FUSE_GENERATION, + attr: stat, + attr_timeout: TTL, + entry_timeout: TTL, + }); + } + } + } + + // Get parent entry + let parent_entry = if parent == ROOT_INODE { + // Root directory + self.get_entry_by_inode(ROOT_INODE) + .ok_or_else(|| io::Error::from_raw_os_error(ENOENT))? + } else { + self.get_entry_by_inode(parent) + .ok_or_else(|| io::Error::from_raw_os_error(ENOENT))? + }; + + // Construct the path + let parent_path = self.get_path_for_inode(parent_entry.inode); + let full_path = self.join_path(&parent_path, &name_str)?; + + // Look up the entry + if let Ok(exists) = self.memdb.exists(&full_path) + && exists + { + // Get the entry to find its inode + if let Some(entry) = self.get_entry_by_path(&full_path) { + let stat = self.entry_to_stat(&entry, &full_path); + // Convert internal inode to FUSE nodeid + let fuse_nodeid = self.inode_to_fuse(entry.inode); + return Ok(EntryParam { + inode: fuse_nodeid, + generation: Self::FUSE_GENERATION, + attr: stat, + attr_timeout: TTL, + entry_timeout: TTL, + }); + } + } + + Err(io::Error::from_raw_os_error(ENOENT)) + } + + /// Handle getattr operation + fn handle_getattr(&self, ino_fuse: u64) -> io::Result { + tracing::debug!("getattr(ino={})", ino_fuse); + + // Check if this is a plugin file (inode >= PLUGIN_INODE_OFFSET) + if ino_fuse >= Self::PLUGIN_INODE_OFFSET { + let plugin_idx = (ino_fuse - Self::PLUGIN_INODE_OFFSET) as usize; + let plugin_names = self.plugins.list(); + if plugin_idx < plugin_names.len() { + let plugin_name = &plugin_names[plugin_idx]; + if let Some(plugin) = self.plugins.get(plugin_name) { + return Ok(self.plugin_to_stat(ino_fuse, &plugin)); + } + } + } + + // Convert FUSE nodeid to internal inode + let ino = self.fuse_to_inode(ino_fuse); + + if let Some(entry) = self.get_entry_by_inode(ino) { + let path = self.get_path_for_inode(ino); + Ok(self.entry_to_stat(&entry, &path)) + } else { + Err(io::Error::from_raw_os_error(ENOENT)) + } + } + + /// Handle readdir operation + fn handle_readdir(&self, request: &mut requests::Readdir) -> Result<(), Error> { + let ino_fuse = request.inode; + tracing::debug!("readdir(ino={}, offset={})", ino_fuse, request.offset); + + // Convert FUSE nodeid to internal inode + let ino = self.fuse_to_inode(ino_fuse); + let offset = request.offset; + + // Get the directory path + let path = self.get_path_for_inode(ino); + + // Read directory entries from memdb + let entries = self + .memdb + .readdir(&path) + .map_err(|_| io::Error::from_raw_os_error(ENOENT))?; + + // Build complete list of entries + let mut all_entries: Vec<(u64, libc::stat, String)> = Vec::new(); + + // Add . and .. entries + // C implementation (cfs-plug-memdb.c:172) always passes quorate=0 for readdir stats + // This ensures directory listings show non-quorate permissions (read-only view) + if let Some(dir_entry) = self.get_entry_by_inode(ino) { + let dir_stat = self.entry_to_stat_with_quorum(&dir_entry, &path, false); + all_entries.push((ino_fuse, dir_stat, ".".to_string())); + all_entries.push((ino_fuse, dir_stat, "..".to_string())); + } + + // For root directory, add plugin files + if ino == ROOT_INODE { + let plugin_names = self.plugins.list(); + for (idx, plugin_name) in plugin_names.iter().enumerate() { + let plugin_inode = Self::PLUGIN_INODE_OFFSET + idx as u64; + if let Some(plugin) = self.plugins.get(plugin_name) { + let stat = self.plugin_to_stat(plugin_inode, &plugin); + all_entries.push((plugin_inode, stat, plugin_name.clone())); + } + } + } + + // Add actual entries from memdb + // C implementation (cfs-plug-memdb.c:172) always passes quorate=0 for readdir stats + for entry in &entries { + let entry_path = match self.join_path(&path, &entry.name) { + Ok(p) => p, + Err(e) => { + tracing::warn!("Skipping entry with invalid UTF-8 path: {}", e); + continue; + } + }; + let stat = self.entry_to_stat_with_quorum(entry, &entry_path, false); + // Convert internal inode to FUSE nodeid for directory entry + let fuse_nodeid = self.inode_to_fuse(entry.inode); + all_entries.push((fuse_nodeid, stat, entry.name.clone())); + } + + // Return entries starting from offset + let mut next = offset as isize; + for (_inode, stat, name) in all_entries.iter().skip(offset as usize) { + next += 1; + match request.add_entry(OsStr::new(name), stat, next)? { + ReplyBufState::Ok => (), + ReplyBufState::Full => return Ok(()), + } + } + + Ok(()) + } + + /// Handle read operation + fn handle_read(&self, ino_fuse: u64, offset: u64, size: usize) -> io::Result> { + tracing::debug!("read(ino={}, offset={}, size={})", ino_fuse, offset, size); + + // Check if this is a plugin file (inode >= PLUGIN_INODE_OFFSET) + if ino_fuse >= Self::PLUGIN_INODE_OFFSET { + let plugin_idx = (ino_fuse - Self::PLUGIN_INODE_OFFSET) as usize; + let plugin_names = self.plugins.list(); + if plugin_idx < plugin_names.len() { + let plugin_name = &plugin_names[plugin_idx]; + if let Some(plugin) = self.plugins.get(plugin_name) { + let data = plugin + .read() + .map_err(|_| io::Error::from_raw_os_error(EIO))?; + + let offset = offset as usize; + if offset >= data.len() { + return Ok(Vec::new()); + } else { + let end = std::cmp::min(offset + size, data.len()); + return Ok(data[offset..end].to_vec()); + } + } + } + } + + // Convert FUSE nodeid to internal inode + let ino = self.fuse_to_inode(ino_fuse); + + let path = self.get_path_for_inode(ino); + + // Check if this is a directory + if ino == ROOT_INODE { + // Root directory itself - can't read + return Err(io::Error::from_raw_os_error(EISDIR)); + } + + // Read from memdb + self.memdb + .read(&path, offset, size) + .map_err(|_| io::Error::from_raw_os_error(ENOENT)) + } + + /// Handle write operation + async fn handle_write(&self, ino_fuse: u64, offset: u64, data: &[u8]) -> io::Result { + tracing::debug!( + "write(ino={}, offset={}, size={})", + ino_fuse, + offset, + data.len() + ); + + // Check if this is a plugin file (inode >= PLUGIN_INODE_OFFSET) + if ino_fuse >= Self::PLUGIN_INODE_OFFSET { + let plugin_idx = (ino_fuse - Self::PLUGIN_INODE_OFFSET) as usize; + let plugin_names = self.plugins.list(); + + if plugin_idx < plugin_names.len() { + let plugin_name = &plugin_names[plugin_idx]; + if let Some(plugin) = self.plugins.get(plugin_name) { + // Validate offset (C only allows offset 0) + if offset != 0 { + tracing::warn!("Plugin write rejected: offset {} != 0", offset); + return Err(io::Error::from_raw_os_error(libc::EIO)); + } + + // Call plugin write + tracing::debug!("Writing {} bytes to plugin '{}'", data.len(), plugin_name); + plugin.write(data).map(|_| data.len()).map_err(|e| { + tracing::error!("Plugin write failed: {}", e); + io::Error::from_raw_os_error(libc::EIO) + })?; + + return Ok(data.len()); + } + } + + // Plugin not found or invalid index + return Err(io::Error::from_raw_os_error(libc::ENOENT)); + } + + // Regular memdb file write + // Convert FUSE nodeid to internal inode + let ino = self.fuse_to_inode(ino_fuse); + + let path = self.get_path_for_inode(ino); + + // C-style broadcast-first: send message and wait for result + // C implementation (cfs-plug-memdb.c:262-265) sends just the write chunk + // with original offset, not the full file contents + if let Some(dfsm) = &self.dfsm { + // Send write message with just the data chunk and original offset + // The DFSM delivery will apply the write to all nodes + let result = dfsm + .send_message_sync( + FuseMessage::Write { + path: path.clone(), + offset, + data: data.to_vec(), + }, + std::time::Duration::from_secs(10), + ) + .await + .map_err(|e| { + tracing::error!("DFSM send_message_sync failed: {}", e); + io::Error::from_raw_os_error(EIO) + })?; + + if result.result < 0 { + tracing::warn!("Write failed with errno: {}", -result.result); + return Err(io::Error::from_raw_os_error(-result.result as i32)); + } + + Ok(data.len()) + } else { + // No cluster - write locally + let mtime = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs() as u32; + + // FUSE write() should never truncate - truncation is handled separately + // via setattr (for explicit truncate) or open with O_TRUNC flag. + // Offset writes must preserve content beyond the write range (POSIX semantics). + self.memdb + .write(&path, offset, 0, mtime, data, false) + .map_err(|_| io::Error::from_raw_os_error(EACCES)) + } + } + + /// Handle mkdir operation + async fn handle_mkdir(&self, parent_fuse: u64, name: &OsStr, mode: u32) -> io::Result { + tracing::debug!( + "mkdir(parent={}, name={})", + parent_fuse, + name.to_string_lossy() + ); + + // Convert FUSE nodeid to internal inode + let parent = self.fuse_to_inode(parent_fuse); + + let parent_path = self.get_path_for_inode(parent); + let name_str = name.to_string_lossy(); + let full_path = self.join_path(&parent_path, &name_str)?; + + // C-style broadcast-first: send message and wait for result + if let Some(dfsm) = &self.dfsm { + let result = dfsm + .send_message_sync( + FuseMessage::Mkdir { + path: full_path.clone(), + }, + std::time::Duration::from_secs(10), + ) + .await + .map_err(|e| { + tracing::error!("DFSM send_message_sync failed: {}", e); + io::Error::from_raw_os_error(EIO) + })?; + + if result.result < 0 { + tracing::warn!("Mkdir failed with errno: {}", -result.result); + return Err(io::Error::from_raw_os_error(-result.result as i32)); + } + } else { + // No cluster - create locally + let mtime = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs() as u32; + + self.memdb + .create(&full_path, mode | libc::S_IFDIR, 0, mtime) + .map_err(|_| io::Error::from_raw_os_error(EACCES))?; + } + + // Look up the newly created entry (created via delivery callback) + let entry = self + .memdb + .lookup_path(&full_path) + .ok_or_else(|| io::Error::from_raw_os_error(EIO))?; + + let stat = self.entry_to_stat(&entry, &full_path); + // Convert internal inode to FUSE nodeid + let fuse_nodeid = self.inode_to_fuse(entry.inode); + Ok(EntryParam { + inode: fuse_nodeid, + generation: Self::FUSE_GENERATION, + attr: stat, + attr_timeout: TTL, + entry_timeout: TTL, + }) + } + + /// Handle rmdir operation + async fn handle_rmdir(&self, parent_fuse: u64, name: &OsStr) -> io::Result<()> { + tracing::debug!( + "rmdir(parent={}, name={})", + parent_fuse, + name.to_string_lossy() + ); + + // Convert FUSE nodeid to internal inode + let parent = self.fuse_to_inode(parent_fuse); + + let parent_path = self.get_path_for_inode(parent); + let name_str = name.to_string_lossy(); + let full_path = self.join_path(&parent_path, &name_str)?; + + // C-style broadcast-first: send message and wait for result + if let Some(dfsm) = &self.dfsm { + let result = dfsm + .send_message_sync( + FuseMessage::Delete { + path: full_path.clone(), + }, + std::time::Duration::from_secs(10), + ) + .await + .map_err(|e| { + tracing::error!("DFSM send_message_sync failed: {}", e); + io::Error::from_raw_os_error(EIO) + })?; + + if result.result < 0 { + tracing::warn!("Rmdir failed with errno: {}", -result.result); + return Err(io::Error::from_raw_os_error(-result.result as i32)); + } + } else { + // No cluster - delete locally + self.memdb + .delete(&full_path, 0, SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs() as u32) + .map_err(|_| io::Error::from_raw_os_error(EACCES))?; + } + + Ok(()) + } + + /// Handle create operation + async fn handle_create(&self, parent_fuse: u64, name: &OsStr, mode: u32) -> io::Result { + tracing::debug!( + "create(parent={}, name={})", + parent_fuse, + name.to_string_lossy() + ); + + // Convert FUSE nodeid to internal inode + let parent = self.fuse_to_inode(parent_fuse); + + let parent_path = self.get_path_for_inode(parent); + let name_str = name.to_string_lossy(); + let full_path = self.join_path(&parent_path, &name_str)?; + + // C-style broadcast-first: send message and wait for result + if let Some(dfsm) = &self.dfsm { + // Direct await - clean and idiomatic async code + let result = dfsm + .send_message_sync( + FuseMessage::Create { + path: full_path.clone(), + }, + std::time::Duration::from_secs(10), + ) + .await + .map_err(|e| { + tracing::error!("DFSM send_message_sync failed: {}", e); + io::Error::from_raw_os_error(EIO) + })?; + + // Check result from deliver callback + if result.result < 0 { + tracing::warn!("Create failed with errno: {}", -result.result); + return Err(io::Error::from_raw_os_error(-result.result as i32)); + } + } else { + // No cluster - create locally + let mtime = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs() as u32; + + self.memdb + .create(&full_path, mode | libc::S_IFREG, 0, mtime) + .map_err(|_| io::Error::from_raw_os_error(EACCES))?; + } + + // Look up the newly created entry (created via delivery callback) + let entry = self + .memdb + .lookup_path(&full_path) + .ok_or_else(|| io::Error::from_raw_os_error(EIO))?; + + let stat = self.entry_to_stat(&entry, &full_path); + // Convert internal inode to FUSE nodeid + let fuse_nodeid = self.inode_to_fuse(entry.inode); + Ok(EntryParam { + inode: fuse_nodeid, + generation: Self::FUSE_GENERATION, + attr: stat, + attr_timeout: TTL, + entry_timeout: TTL, + }) + } + + /// Handle unlink operation + async fn handle_unlink(&self, parent_fuse: u64, name: &OsStr) -> io::Result<()> { + tracing::debug!( + "unlink(parent={}, name={})", + parent_fuse, + name.to_string_lossy() + ); + + // Convert FUSE nodeid to internal inode + let parent = self.fuse_to_inode(parent_fuse); + + let name_str = name.to_string_lossy(); + + // Don't allow unlinking plugin files (in root directory) + if parent == ROOT_INODE { + let plugin_names = self.plugins.list(); + if plugin_names.iter().any(|p| p == name_str.as_ref()) { + return Err(io::Error::from_raw_os_error(EACCES)); + } + } + + let parent_path = self.get_path_for_inode(parent); + let full_path = self.join_path(&parent_path, &name_str)?; + + // Check if trying to unlink a directory (should use rmdir instead) + if let Some(entry) = self.memdb.lookup_path(&full_path) + && entry.is_dir() + { + return Err(io::Error::from_raw_os_error(libc::EISDIR)); + } + + // C-style broadcast-first: send message and wait for result + if let Some(dfsm) = &self.dfsm { + let result = dfsm + .send_message_sync( + FuseMessage::Delete { path: full_path }, + std::time::Duration::from_secs(10), + ) + .await + .map_err(|e| { + tracing::error!("DFSM send_message_sync failed: {}", e); + io::Error::from_raw_os_error(EIO) + })?; + + if result.result < 0 { + tracing::warn!("Unlink failed with errno: {}", -result.result); + return Err(io::Error::from_raw_os_error(-result.result as i32)); + } + } else { + // No cluster - delete locally + self.memdb + .delete(&full_path, 0, SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs() as u32) + .map_err(|_| io::Error::from_raw_os_error(EACCES))?; + } + + Ok(()) + } + + /// Handle rename operation + async fn handle_rename( + &self, + parent_fuse: u64, + name: &OsStr, + new_parent_fuse: u64, + new_name: &OsStr, + ) -> io::Result<()> { + tracing::debug!( + "rename(parent={}, name={}, new_parent={}, new_name={})", + parent_fuse, + name.to_string_lossy(), + new_parent_fuse, + new_name.to_string_lossy() + ); + + // Convert FUSE nodeids to internal inodes + let parent = self.fuse_to_inode(parent_fuse); + let new_parent = self.fuse_to_inode(new_parent_fuse); + + let parent_path = self.get_path_for_inode(parent); + let name_str = name.to_string_lossy(); + let old_path = self.join_path(&parent_path, &name_str)?; + + let new_parent_path = self.get_path_for_inode(new_parent); + let new_name_str = new_name.to_string_lossy(); + let new_path = self.join_path(&new_parent_path, &new_name_str)?; + + // C-style broadcast-first: send message and wait for result + if let Some(dfsm) = &self.dfsm { + let result = dfsm + .send_message_sync( + FuseMessage::Rename { + from: old_path.clone(), + to: new_path.clone(), + }, + std::time::Duration::from_secs(10), + ) + .await + .map_err(|e| { + tracing::error!("DFSM send_message_sync failed: {}", e); + io::Error::from_raw_os_error(EIO) + })?; + + if result.result < 0 { + tracing::warn!("Rename failed with errno: {}", -result.result); + return Err(io::Error::from_raw_os_error(-result.result as i32)); + } + } else { + // No cluster - rename locally + self.memdb + .rename(&old_path, &new_path, 0, SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs() as u32) + .map_err(|_| io::Error::from_raw_os_error(EACCES))?; + } + + Ok(()) + } + + /// Handle setattr operation + /// + /// Supports: + /// - Truncate (size parameter) + /// - Mtime updates (mtime parameter) - used for lock renewal/release + /// - Mode changes (mode parameter) - validation only, no actual changes + /// - Ownership changes (uid/gid parameters) - validation only, no actual changes + /// + /// C implementation (cfs-plug-memdb.c:393-436) ALWAYS sends DCDB_MESSAGE_CFS_MTIME + /// via DFSM when mtime is updated (line 420-422), in addition to unlock messages + /// + /// chmod/chown (pmxcfs.c:180-214): These operations don't actually change anything, + /// they just validate that the requested changes are allowed (returns -EPERM if not). + async fn handle_setattr( + &self, + ino_fuse: u64, + size: Option, + mtime: Option, + mode: Option, + uid: Option, + gid: Option, + ) -> io::Result { + tracing::debug!( + "setattr(ino={}, size={:?}, mtime={:?})", + ino_fuse, + size, + mtime + ); + + // Convert FUSE nodeid to internal inode + let ino = self.fuse_to_inode(ino_fuse); + let path = self.get_path_for_inode(ino); + + // Handle chmod operation (validation only - C: pmxcfs.c:180-197) + // chmod validates that requested mode is allowed but doesn't actually change anything + if let Some(new_mode) = mode { + let is_private = self.is_private_path(&path); + let mode_bits = new_mode & 0o777; // Extract permission bits only + + // C implementation allows only specific modes: + // - 0600 (rw-------) for private paths + // - 0640 (rw-r-----) for non-private paths + let allowed = if is_private { + mode_bits == 0o600 + } else { + mode_bits == 0o640 + }; + + if !allowed { + tracing::debug!( + "chmod rejected: mode={:o}, path={}, is_private={}", + mode_bits, + path, + is_private + ); + return Err(io::Error::from_raw_os_error(libc::EPERM)); + } + + tracing::debug!( + "chmod validated: mode={:o}, path={}, is_private={}", + mode_bits, + path, + is_private + ); + } + + // Handle chown operation (validation only - C: pmxcfs.c:198-214) + // chown validates that requested ownership is allowed but doesn't actually change anything + if uid.is_some() || gid.is_some() { + // C implementation allows only: + // - uid: 0 (root) or -1 (no change) + // - gid: www-data GID or -1 (no change) + let uid_allowed = match uid { + None => true, + Some(u) => u == 0 || u == u32::MAX, // -1 as u32 = u32::MAX + }; + + let gid_allowed = match gid { + None => true, + Some(g) => g == self.gid || g == u32::MAX, // -1 as u32 = u32::MAX + }; + + if !uid_allowed || !gid_allowed { + tracing::debug!( + "chown rejected: uid={:?}, gid={:?}, allowed_gid={}, path={}", + uid, + gid, + self.gid, + path + ); + return Err(io::Error::from_raw_os_error(libc::EPERM)); + } + + tracing::debug!( + "chown validated: uid={:?}, gid={:?}, allowed_gid={}, path={}", + uid, + gid, + self.gid, + path + ); + } + + // Handle truncate operation + if let Some(new_size) = size { + let current_mtime = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs() as u32; + + // Truncate: clear the file then write empty data to set size + self.memdb + .write(&path, 0, 0, current_mtime, &vec![0u8; new_size as usize], true) + .map_err(|_| io::Error::from_raw_os_error(EACCES))?; + } + + // Handle mtime update (lock renewal/release) + // C implementation (cfs-plug-memdb.c:415-422) ALWAYS sends DCDB_MESSAGE_CFS_MTIME + // via DFSM when mtime is updated, in addition to unlock messages + if let Some(new_mtime) = mtime { + // Check if this is a lock directory + if pmxcfs_memdb::is_lock_path(&path) { + if let Some(entry) = self.memdb.get_entry_by_inode(ino) + && entry.is_dir() + { + // mtime=0 on lock directory = unlock request (C: cfs-plug-memdb.c:411-418) + if new_mtime == 0 { + tracing::debug!("Unlock request for lock directory: {}", path); + let csum = entry.compute_checksum(); + + // If DFSM is available and synced, only send the message - don't delete locally + // The leader will check if expired and send Unlock message if needed + // If DFSM is not available or not synced, delete locally if expired (C: cfs-plug-memdb.c:425-427) + if self.dfsm.as_ref().is_none_or(|d| !d.is_synced()) { + if self.memdb.lock_expired(&path, &csum) { + tracing::info!( + "DFSM not synced - deleting expired lock locally: {}", + path + ); + self.memdb + .delete(&path, 0, SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs() as u32) + .map_err(|_| io::Error::from_raw_os_error(EACCES))?; + } + } else { + // Broadcast unlock request to cluster (C: cfs-plug-memdb.c:417) + tracing::debug!("DFSM synced - sending unlock request to cluster"); + self.dfsm.broadcast(FuseMessage::UnlockRequest { path: path.clone() }); + } + } + } + } + + // C implementation ALWAYS sends MTIME message (lines 420-422), regardless of + // whether it's an unlock request or not. This broadcasts the mtime update to + // all cluster nodes for synchronization. + if let Some(dfsm) = &self.dfsm { + tracing::debug!("Sending MTIME message via DFSM: path={}, mtime={}", path, new_mtime); + let result = dfsm + .send_message_sync( + FuseMessage::Mtime { + path: path.clone(), + mtime: new_mtime, + }, + std::time::Duration::from_secs(10), + ) + .await + .map_err(|e| { + tracing::error!("DFSM send_message_sync failed for MTIME: {}", e); + io::Error::from_raw_os_error(EIO) + })?; + + if result.result < 0 { + tracing::warn!("MTIME failed with errno: {}", -result.result); + return Err(io::Error::from_raw_os_error(-result.result as i32)); + } + } else { + // No cluster - update locally + self.memdb + .set_mtime(&path, 0, new_mtime) + .map_err(|_| io::Error::from_raw_os_error(EACCES))?; + } + } + + // Return current attributes + if let Some(entry) = self.memdb.get_entry_by_inode(ino) { + Ok(self.entry_to_stat(&entry, &path)) + } else { + Err(io::Error::from_raw_os_error(ENOENT)) + } + } + + /// Handle readlink operation - read symbolic link target + fn handle_readlink(&self, ino_fuse: u64) -> io::Result { + tracing::debug!("readlink(ino={})", ino_fuse); + + // Check if this is a plugin (only plugins can be symlinks in pmxcfs) + if ino_fuse >= Self::PLUGIN_INODE_OFFSET { + let plugin_idx = (ino_fuse - Self::PLUGIN_INODE_OFFSET) as usize; + let plugin_names = self.plugins.list(); + if plugin_idx < plugin_names.len() { + let plugin_name = &plugin_names[plugin_idx]; + if let Some(plugin) = self.plugins.get(plugin_name) { + // Read the link target from the plugin + let data = plugin + .read() + .map_err(|_| io::Error::from_raw_os_error(EIO))?; + + // Convert bytes to OsString + let target = std::str::from_utf8(&data) + .map_err(|_| io::Error::from_raw_os_error(EIO))?; + + return Ok(OsString::from(target)); + } + } + } + + // Not a plugin or plugin not found + Err(io::Error::from_raw_os_error(EINVAL)) + } + + /// Handle statfs operation - return filesystem statistics + /// + /// Matches C implementation (memdb.c:1275-1307) + /// Returns fixed filesystem stats based on memdb state + fn handle_statfs(&self) -> io::Result { + tracing::debug!("statfs()"); + + const BLOCKSIZE: u64 = 4096; + + // Get statistics from memdb + let (blocks, bfree, bavail, files, ffree) = self.memdb.statfs(); + + let mut stbuf: libc::statvfs = unsafe { mem::zeroed() }; + + // Block size and counts + stbuf.f_bsize = BLOCKSIZE; // Filesystem block size + stbuf.f_frsize = BLOCKSIZE; // Fragment size (same as block size) + stbuf.f_blocks = blocks; // Total blocks in filesystem + stbuf.f_bfree = bfree; // Free blocks + stbuf.f_bavail = bavail; // Free blocks available to unprivileged user + + // Inode counts + stbuf.f_files = files; // Total file nodes in filesystem + stbuf.f_ffree = ffree; // Free file nodes in filesystem + stbuf.f_favail = ffree; // Free file nodes available to unprivileged user + + // Other fields + stbuf.f_fsid = 0; // Filesystem ID + stbuf.f_flag = 0; // Mount flags + stbuf.f_namemax = 255; // Maximum filename length + + Ok(stbuf) + } +} + +/// Create and mount FUSE filesystem +pub async fn mount_fuse( + mount_path: &Path, + memdb: MemDb, + config: Arc, + dfsm: Option>>, + plugins: Arc, + status: Arc, +) -> Result<(), Error> { + let fs = Arc::new(PmxcfsFilesystem::new(memdb, config, dfsm, plugins, status)); + + let mut fuse = Fuse::builder("pmxcfs")? + .debug() + .options("default_permissions")? // Enable kernel permission checking + .options("allow_other")? // Allow non-root access + .enable_readdir() + .enable_readlink() + .enable_mkdir() + .enable_create() + .enable_write() + .enable_unlink() + .enable_rmdir() + .enable_rename() + .enable_setattr() + .enable_read() + .enable_statfs() + .build()? + .mount(mount_path)?; + + tracing::info!("FUSE filesystem mounted at {}", mount_path.display()); + + // Process FUSE requests + while let Some(request) = fuse.try_next().await? { + let fs = Arc::clone(&fs); + match request { + Request::Lookup(request) => { + match fs.handle_lookup(request.parent, &request.file_name).await { + Ok(entry) => request.reply(&entry)?, + Err(err) => request.io_fail(err)?, + } + } + Request::Getattr(request) => match fs.handle_getattr(request.inode) { + Ok(stat) => request.reply(&stat, TTL)?, + Err(err) => request.io_fail(err)?, + }, + Request::Readlink(request) => match fs.handle_readlink(request.inode) { + Ok(target) => request.reply(&target)?, + Err(err) => request.io_fail(err)?, + }, + Request::Readdir(mut request) => match fs.handle_readdir(&mut request) { + Ok(()) => request.reply()?, + Err(err) => { + if let Some(io_err) = err.downcast_ref::() { + let errno = io_err.raw_os_error().unwrap_or(EIO); + request.fail(errno)?; + } else { + request.io_fail(io::Error::from_raw_os_error(EIO))?; + } + } + }, + Request::Read(request) => { + match fs.handle_read(request.inode, request.offset, request.size) { + Ok(data) => request.reply(&data)?, + Err(err) => request.io_fail(err)?, + } + } + Request::Write(request) => { + match fs.handle_write(request.inode, request.offset, request.data()).await { + Ok(written) => request.reply(written)?, + Err(err) => request.io_fail(err)?, + } + } + Request::Mkdir(request) => { + match fs.handle_mkdir(request.parent, &request.dir_name, request.mode).await { + Ok(entry) => request.reply(&entry)?, + Err(err) => request.io_fail(err)?, + } + } + Request::Rmdir(request) => match fs.handle_rmdir(request.parent, &request.dir_name).await { + Ok(()) => request.reply()?, + Err(err) => request.io_fail(err)?, + }, + Request::Rename(request) => { + match fs.handle_rename( + request.parent, + &request.name, + request.new_parent, + &request.new_name, + ).await { + Ok(()) => request.reply()?, + Err(err) => request.io_fail(err)?, + } + } + Request::Create(request) => { + match fs.handle_create(request.parent, &request.file_name, request.mode).await { + Ok(entry) => request.reply(&entry, 0)?, + Err(err) => request.io_fail(err)?, + } + } + Request::Mknod(request) => { + // Treat mknod same as create + match fs.handle_create(request.parent, &request.file_name, request.mode).await { + Ok(entry) => request.reply(&entry)?, + Err(err) => request.io_fail(err)?, + } + } + Request::Unlink(request) => { + match fs.handle_unlink(request.parent, &request.file_name).await { + Ok(()) => request.reply()?, + Err(err) => request.io_fail(err)?, + } + } + Request::Setattr(request) => { + // Extract mtime if being set + let mtime = request.mtime().map(|set_time| match set_time { + proxmox_fuse::requests::SetTime::Time(duration) => duration.as_secs() as u32, + proxmox_fuse::requests::SetTime::Now => SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs() + as u32, + }); + + // Extract mode, uid, gid for chmod/chown validation (M1, M2) + let mode = request.mode(); + let uid = request.uid(); + let gid = request.gid(); + + match fs.handle_setattr(request.inode, request.size(), mtime, mode, uid, gid).await { + Ok(stat) => request.reply(&stat, TTL)?, + Err(err) => request.io_fail(err)?, + } + } + Request::Open(request) => { + // Plugin files don't support truncation, but can be opened for write + if request.inode >= PmxcfsFilesystem::PLUGIN_INODE_OFFSET { + // Check if plugin is being opened for writing + let is_write = (request.flags & (libc::O_WRONLY | libc::O_RDWR)) != 0; + + if is_write { + // Verify plugin is writable + let plugin_idx = + (request.inode - PmxcfsFilesystem::PLUGIN_INODE_OFFSET) as usize; + let plugin_names = fs.plugins.list(); + + if plugin_idx < plugin_names.len() { + let plugin_name = &plugin_names[plugin_idx]; + if let Some(plugin) = fs.plugins.get(plugin_name) { + // Check if plugin supports write (mode has write bit for owner) + let mode = plugin.mode(); + if (mode & 0o200) == 0 { + // Plugin is read-only + request.io_fail(io::Error::from_raw_os_error(libc::EACCES))?; + continue; + } + } + } + } + + // Verify plugin exists (getattr) + match fs.handle_getattr(request.inode) { + Ok(_) => request.reply(0)?, + Err(err) => request.io_fail(err)?, + } + } else { + // Regular files: handle truncation + if (request.flags & libc::O_TRUNC) != 0 { + match fs.handle_setattr(request.inode, Some(0), None, None, None, None).await { + Ok(_) => request.reply(0)?, + Err(err) => request.io_fail(err)?, + } + } else { + match fs.handle_getattr(request.inode) { + Ok(_) => request.reply(0)?, + Err(err) => request.io_fail(err)?, + } + } + } + } + Request::Release(request) => { + request.reply()?; + } + Request::Forget(_request) => { + // Forget is a notification, no reply needed + } + Request::Statfs(request) => { + match fs.handle_statfs() { + Ok(stbuf) => request.reply(&stbuf)?, + Err(err) => request.io_fail(err)?, + } + } + other => { + tracing::warn!("Unsupported FUSE request: {:?}", other); + bail!("Unsupported FUSE request"); + } + } + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + /// Helper to create a minimal PmxcfsFilesystem for testing + fn create_test_filesystem() -> (PmxcfsFilesystem, TempDir) { + let tmp_dir = TempDir::new().unwrap(); + let db_path = tmp_dir.path().join("test.db"); + + let memdb = MemDb::open(&db_path, true).unwrap(); + let config = pmxcfs_test_utils::create_test_config(false); + let plugins = crate::plugins::init_plugins_for_test("testnode"); + let status = Arc::new(Status::new(config.clone(), None)); + + let fs = PmxcfsFilesystem::new(memdb, config, None, plugins, status); + (fs, tmp_dir) + } + + // ===== Inode Mapping Tests ===== + + #[test] + fn test_fuse_to_inode_mapping() { + let (fs, _tmpdir) = create_test_filesystem(); + + // Root: FUSE nodeid 1 → internal inode 0 + assert_eq!(fs.fuse_to_inode(1), 0); + + // Regular inodes: N → N-1 + assert_eq!(fs.fuse_to_inode(2), 1); + assert_eq!(fs.fuse_to_inode(10), 9); + assert_eq!(fs.fuse_to_inode(100), 99); + + // Plugin inodes (>= PLUGIN_INODE_OFFSET) unchanged + assert_eq!(fs.fuse_to_inode(1000000), 1000000); + assert_eq!(fs.fuse_to_inode(1000001), 1000001); + } + + #[test] + fn test_inode_to_fuse_mapping() { + let (fs, _tmpdir) = create_test_filesystem(); + + // Root: internal inode 0 → FUSE nodeid 1 + assert_eq!(fs.inode_to_fuse(0), 1); + + // Regular inodes: N → N+1 + assert_eq!(fs.inode_to_fuse(1), 2); + assert_eq!(fs.inode_to_fuse(9), 10); + assert_eq!(fs.inode_to_fuse(99), 100); + + // Plugin inodes (>= PLUGIN_INODE_OFFSET) unchanged + assert_eq!(fs.inode_to_fuse(1000000), 1000000); + assert_eq!(fs.inode_to_fuse(1000001), 1000001); + } + + #[test] + fn test_inode_mapping_roundtrip() { + let (fs, _tmpdir) = create_test_filesystem(); + + // Test roundtrip for regular inodes + for inode in 0..1000 { + let fuse = fs.inode_to_fuse(inode); + let back = fs.fuse_to_inode(fuse); + assert_eq!(inode, back, "Roundtrip failed for inode {inode}"); + } + + // Test roundtrip for plugin inodes + for offset in 0..100 { + let inode = 1000000 + offset; + let fuse = fs.inode_to_fuse(inode); + let back = fs.fuse_to_inode(fuse); + assert_eq!(inode, back, "Roundtrip failed for plugin inode {inode}"); + } + } + + // ===== Path Privacy Tests ===== + + #[test] + fn test_is_private_path_priv_root() { + let (fs, _tmpdir) = create_test_filesystem(); + + // Exact "priv" at root + assert!(fs.is_private_path("priv")); + assert!(fs.is_private_path("/priv")); + assert!(fs.is_private_path("///priv")); + + // "priv/" at root + assert!(fs.is_private_path("priv/")); + assert!(fs.is_private_path("/priv/")); + assert!(fs.is_private_path("priv/file.txt")); + assert!(fs.is_private_path("/priv/subdir/file")); + } + + #[test] + fn test_is_private_path_nodes() { + let (fs, _tmpdir) = create_test_filesystem(); + + // Node-specific priv directories + assert!(fs.is_private_path("nodes/node1/priv")); + assert!(fs.is_private_path("/nodes/node1/priv")); + assert!(fs.is_private_path("nodes/node1/priv/")); + assert!(fs.is_private_path("nodes/node1/priv/config")); + assert!(fs.is_private_path("/nodes/node1/priv/subdir/file")); + + // Multiple levels + assert!(fs.is_private_path("nodes/test-node/priv/deep/path/file.txt")); + } + + #[test] + fn test_is_private_path_non_private() { + let (fs, _tmpdir) = create_test_filesystem(); + + // "priv" as substring but not matching pattern + assert!(!fs.is_private_path("private")); + assert!(!fs.is_private_path("privileged")); + assert!(!fs.is_private_path("some/private/path")); + + // Regular paths + assert!(!fs.is_private_path("")); + assert!(!fs.is_private_path("/")); + assert!(!fs.is_private_path("nodes")); + assert!(!fs.is_private_path("nodes/node1")); + assert!(!fs.is_private_path("nodes/node1/qemu-server")); + assert!(!fs.is_private_path("corosync.conf")); + + // "priv" in middle of path component + assert!(!fs.is_private_path("nodes/privileged")); + assert!(!fs.is_private_path("nodes/node1/private")); + } + + #[test] + fn test_is_private_path_edge_cases() { + let (fs, _tmpdir) = create_test_filesystem(); + + // Empty path + assert!(!fs.is_private_path("")); + + // Only slashes + assert!(!fs.is_private_path("/")); + assert!(!fs.is_private_path("//")); + assert!(!fs.is_private_path("///")); + + // "priv" with trailing characters (not slash) + assert!(!fs.is_private_path("priv123")); + assert!(!fs.is_private_path("priv.txt")); + + // Case sensitivity + assert!(!fs.is_private_path("Priv")); + assert!(!fs.is_private_path("PRIV")); + assert!(!fs.is_private_path("nodes/node1/Priv")); + } + + // ===== Error Path Tests ===== + + #[tokio::test] + async fn test_lookup_nonexistent() { + use std::ffi::OsStr; + let (fs, _tmpdir) = create_test_filesystem(); + + // Try to lookup a file that doesn't exist + let result = fs.handle_lookup(1, OsStr::new("nonexistent.txt")).await; + + assert!(result.is_err(), "Lookup of nonexistent file should fail"); + if let Err(e) = result { + assert_eq!(e.raw_os_error(), Some(libc::ENOENT)); + } + } + + #[test] + fn test_getattr_nonexistent_inode() { + let (fs, _tmpdir) = create_test_filesystem(); + + // Try to get attributes for an inode that doesn't exist + let result = fs.handle_getattr(999999); + + assert!(result.is_err(), "Getattr on nonexistent inode should fail"); + if let Err(e) = result { + assert_eq!(e.raw_os_error(), Some(libc::ENOENT)); + } + } + + #[test] + fn test_read_directory_as_file() { + let (fs, _tmpdir) = create_test_filesystem(); + + // Try to read the root directory as if it were a file + let result = fs.handle_read(1, 0, 100); + + assert!(result.is_err(), "Reading directory as file should fail"); + if let Err(e) = result { + assert_eq!(e.raw_os_error(), Some(libc::EISDIR)); + } + } + + #[tokio::test] + async fn test_write_to_nonexistent_file() { + let (fs, _tmpdir) = create_test_filesystem(); + + // Try to write to a file that doesn't exist (should fail with EACCES) + let result = fs.handle_write(999999, 0, b"data").await; + + assert!(result.is_err(), "Writing to nonexistent file should fail"); + if let Err(e) = result { + assert_eq!(e.raw_os_error(), Some(libc::EACCES)); + } + } + + #[tokio::test] + async fn test_unlink_directory_fails() { + use std::ffi::OsStr; + let (fs, _tmpdir) = create_test_filesystem(); + + // Create a directory first by writing a file + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs() as u32; + let _ = fs.memdb.write("/testdir/file.txt", 0, 0, now, b"test", false); + + // Look up testdir to verify it exists as a directory + if let Some(entry) = fs.memdb.lookup_path("/testdir") { + assert!(entry.is_dir(), "testdir should be a directory"); + + // Try to unlink the directory (should fail) + let result = fs.handle_unlink(1, OsStr::new("testdir")).await; + + assert!(result.is_err(), "Unlinking directory should fail"); + // Note: May return EACCES if directory doesn't exist in internal lookup, + // or EISDIR if found as directory + if let Err(e) = result { + let err_code = e.raw_os_error(); + assert!( + err_code == Some(libc::EISDIR) || err_code == Some(libc::EACCES), + "Expected EISDIR or EACCES, got {err_code:?}" + ); + } + } + } + + // ===== Plugin-related Tests ===== + + #[test] + fn test_plugin_inode_range() { + let (fs, _tmpdir) = create_test_filesystem(); + + // Plugin inodes should be >= PLUGIN_INODE_OFFSET (1000000) + let plugin_inode = 1000000; + + // Verify that plugin inodes don't overlap with regular inodes + assert!(plugin_inode >= 1000000); + assert_ne!(fs.fuse_to_inode(plugin_inode), plugin_inode - 1); + assert_eq!(fs.fuse_to_inode(plugin_inode), plugin_inode); + } + + #[test] + fn test_file_type_preservation_in_permissions() { + let (fs, _tmpdir) = create_test_filesystem(); + + // Create a file + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs() as u32; + let _ = fs.memdb.write("/test.txt", 0, 0, now, b"test", false); + + if let Ok(stat) = fs.handle_getattr(fs.inode_to_fuse(1)) { + // Verify that file type bits are preserved (S_IFREG) + assert_eq!(stat.st_mode & libc::S_IFMT, libc::S_IFREG); + } + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/fuse/mod.rs b/src/pmxcfs-rs/pmxcfs/src/fuse/mod.rs new file mode 100644 index 000000000..1157127cd --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/fuse/mod.rs @@ -0,0 +1,4 @@ +mod filesystem; + +pub use filesystem::PmxcfsFilesystem; +pub use filesystem::mount_fuse; diff --git a/src/pmxcfs-rs/pmxcfs/src/ipc/mod.rs b/src/pmxcfs-rs/pmxcfs/src/ipc/mod.rs new file mode 100644 index 000000000..2fe08e753 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/ipc/mod.rs @@ -0,0 +1,16 @@ +//! IPC (Inter-Process Communication) subsystem +//! +//! This module handles libqb-compatible IPC communication between pmxcfs +//! and client applications (e.g., pvestatd, pvesh, etc.). +//! +//! The IPC subsystem consists of: +//! - Operation codes (CfsIpcOp) defining available IPC operations +//! - Request types (IpcRequest) representing parsed client requests +//! - Service handler (IpcHandler) implementing the request processing logic + +mod request; +mod service; + +// Re-export public types +pub use request::{CfsIpcOp, IpcRequest}; +pub use service::IpcHandler; diff --git a/src/pmxcfs-rs/pmxcfs/src/ipc/request.rs b/src/pmxcfs-rs/pmxcfs/src/ipc/request.rs new file mode 100644 index 000000000..4b590dcea --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/ipc/request.rs @@ -0,0 +1,314 @@ +//! IPC request types and parsing +//! +//! This module defines the IPC operation codes and request message types +//! used for communication between pmxcfs and client applications via libqb IPC. + +/// IPC operation codes (must match C version for compatibility) +#[derive(Debug, Clone, Copy, PartialEq, Eq, num_enum::TryFromPrimitive)] +#[repr(i32)] +pub enum CfsIpcOp { + GetFsVersion = 1, + GetClusterInfo = 2, + GetGuestList = 3, + SetStatus = 4, + GetStatus = 5, + GetConfig = 6, + LogClusterMsg = 7, + GetClusterLog = 8, + GetRrdDump = 10, + GetGuestConfigProperty = 11, + VerifyToken = 12, + GetGuestConfigProperties = 13, +} + +/// IPC request message +/// +/// Represents deserialized IPC requests sent from clients via libqb IPC. +/// Each variant corresponds to an IPC operation code and contains the +/// deserialized request parameters. +#[derive(Debug, Clone, PartialEq)] +pub enum IpcRequest { + /// GET_FS_VERSION (op 1): Get filesystem version info + GetFsVersion, + + /// GET_CLUSTER_INFO (op 2): Get cluster member list + GetClusterInfo, + + /// GET_GUEST_LIST (op 3): Get VM/CT list + GetGuestList, + + /// SET_STATUS (op 4): Update node status + SetStatus { name: String, data: Vec }, + + /// GET_STATUS (op 5): Get node status + /// C format: name (256 bytes) + nodename (256 bytes) + GetStatus { + name: String, + node_name: String, + }, + + /// GET_CONFIG (op 6): Read configuration file + GetConfig { path: String }, + + /// LOG_CLUSTER_MSG (op 7): Write to cluster log + LogClusterMsg { + priority: u8, + ident: String, + tag: String, + message: String, + }, + + /// GET_CLUSTER_LOG (op 8): Read cluster log + /// C struct has max_entries + 3 reserved u32s + user string + GetClusterLog { max_entries: usize, user: String }, + + /// GET_RRD_DUMP (op 10): Get RRD data dump + GetRrdDump, + + /// GET_GUEST_CONFIG_PROPERTY (op 11): Get guest config property + GetGuestConfigProperty { vmid: u32, property: String }, + + /// VERIFY_TOKEN (op 12): Verify authentication token + VerifyToken { token: String }, + + /// GET_GUEST_CONFIG_PROPERTIES (op 13): Get multiple guest config properties + GetGuestConfigProperties { vmid: u32, properties: Vec }, +} + +impl IpcRequest { + /// Deserialize an IPC request from message ID and data + pub fn deserialize(msg_id: i32, data: &[u8]) -> anyhow::Result { + let op = CfsIpcOp::try_from(msg_id) + .map_err(|_| anyhow::anyhow!("Unknown IPC operation code: {msg_id}"))?; + + match op { + CfsIpcOp::GetFsVersion => Ok(IpcRequest::GetFsVersion), + + CfsIpcOp::GetClusterInfo => Ok(IpcRequest::GetClusterInfo), + + CfsIpcOp::GetGuestList => Ok(IpcRequest::GetGuestList), + + CfsIpcOp::SetStatus => { + // SET_STATUS: name (256 bytes) + data (rest) + if data.len() < 256 { + anyhow::bail!("SET_STATUS data too short"); + } + + let name = std::ffi::CStr::from_bytes_until_nul(&data[..256]) + .map_err(|_| anyhow::anyhow!("Invalid name in SET_STATUS"))? + .to_str() + .map_err(|_| anyhow::anyhow!("Invalid UTF-8 in SET_STATUS name"))? + .to_string(); + + let status_data = data[256..].to_vec(); + + Ok(IpcRequest::SetStatus { + name, + data: status_data, + }) + } + + CfsIpcOp::GetStatus => { + // GET_STATUS: name (256 bytes) + nodename (256 bytes) + // Matches C struct cfs_status_get_request_header_t (server.c:64-67) + if data.len() < 512 { + anyhow::bail!("GET_STATUS data too short"); + } + + let name = std::ffi::CStr::from_bytes_until_nul(&data[..256]) + .map_err(|_| anyhow::anyhow!("Invalid name in GET_STATUS"))? + .to_str() + .map_err(|_| anyhow::anyhow!("Invalid UTF-8 in GET_STATUS name"))? + .to_string(); + + let node_name = std::ffi::CStr::from_bytes_until_nul(&data[256..512]) + .map_err(|_| anyhow::anyhow!("Invalid node name in GET_STATUS"))? + .to_str() + .map_err(|_| anyhow::anyhow!("Invalid UTF-8 in GET_STATUS node name"))? + .to_string(); + + Ok(IpcRequest::GetStatus { name, node_name }) + } + + CfsIpcOp::GetConfig => { + // GET_CONFIG: path (null-terminated string) + let path = std::ffi::CStr::from_bytes_until_nul(data) + .map_err(|_| anyhow::anyhow!("Invalid path in GET_CONFIG"))? + .to_str() + .map_err(|_| anyhow::anyhow!("Invalid UTF-8 in GET_CONFIG path"))? + .to_string(); + + Ok(IpcRequest::GetConfig { path }) + } + + CfsIpcOp::LogClusterMsg => { + // LOG_CLUSTER_MSG: priority + ident_len + tag_len + strings + // C struct (server.c:69-75): + // uint8_t priority; + // uint8_t ident_len; // Length INCLUDING null terminator + // uint8_t tag_len; // Length INCLUDING null terminator + // char data[]; // ident\0 + tag\0 + message\0 + if data.len() < 3 { + anyhow::bail!("LOG_CLUSTER_MSG data too short"); + } + + let priority = data[0]; + let ident_len = data[1] as usize; + let tag_len = data[2] as usize; + + // Validate lengths (must include null terminator, so >= 1) + if ident_len < 1 || tag_len < 1 { + anyhow::bail!("LOG_CLUSTER_MSG: ident_len or tag_len is 0"); + } + + // Calculate message length (C: datasize - ident_len - tag_len) + let msg_start = 3 + ident_len + tag_len; + if data.len() < msg_start + 1 { + anyhow::bail!("LOG_CLUSTER_MSG data too short for message"); + } + + // Parse ident (null-terminated C string) + // C validates: msg[ident_len - 1] == 0 + let ident_data = &data[3..3 + ident_len]; + if ident_data[ident_len - 1] != 0 { + anyhow::bail!("LOG_CLUSTER_MSG: ident not null-terminated"); + } + let ident = std::ffi::CStr::from_bytes_with_nul(ident_data) + .map_err(|_| anyhow::anyhow!("Invalid ident in LOG_CLUSTER_MSG"))? + .to_str() + .map_err(|_| anyhow::anyhow!("Invalid UTF-8 in ident"))? + .to_string(); + + // Parse tag (null-terminated C string) + // C validates: msg[ident_len + tag_len - 1] == 0 + let tag_data = &data[3 + ident_len..3 + ident_len + tag_len]; + if tag_data[tag_len - 1] != 0 { + anyhow::bail!("LOG_CLUSTER_MSG: tag not null-terminated"); + } + let tag = std::ffi::CStr::from_bytes_with_nul(tag_data) + .map_err(|_| anyhow::anyhow!("Invalid tag in LOG_CLUSTER_MSG"))? + .to_str() + .map_err(|_| anyhow::anyhow!("Invalid UTF-8 in tag"))? + .to_string(); + + // Parse message (rest of data, null-terminated) + // C validates: data[request_size] == 0 (but this is a bug - accesses past buffer) + // We'll be more lenient and just read until end or first null + let msg_data = &data[msg_start..]; + let message = std::ffi::CStr::from_bytes_until_nul(msg_data) + .map_err(|_| anyhow::anyhow!("Invalid message in LOG_CLUSTER_MSG"))? + .to_str() + .map_err(|_| anyhow::anyhow!("Invalid UTF-8 in message"))? + .to_string(); + + Ok(IpcRequest::LogClusterMsg { + priority, + ident, + tag, + message, + }) + } + + CfsIpcOp::GetClusterLog => { + // GET_CLUSTER_LOG: C struct (server.c:77-83): + // uint32_t max_entries; + // uint32_t res1, res2, res3; // reserved, unused + // char user[]; // null-terminated user string for filtering + // Total header: 16 bytes, followed by user string + const HEADER_SIZE: usize = 16; // 4 u32 fields + + if data.len() <= HEADER_SIZE { + // C returns EINVAL if userlen <= 0 + anyhow::bail!("GET_CLUSTER_LOG: missing user string"); + } + + let max_entries = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize; + // Default to 50 if max_entries is 0 (matches C: rh->max_entries ? rh->max_entries : 50) + let max_entries = if max_entries == 0 { 50 } else { max_entries }; + + // Parse user string (null-terminated) + let user = std::ffi::CStr::from_bytes_until_nul(&data[HEADER_SIZE..]) + .map_err(|_| anyhow::anyhow!("Invalid user string in GET_CLUSTER_LOG"))? + .to_str() + .map_err(|_| anyhow::anyhow!("Invalid UTF-8 in user string"))? + .to_string(); + + Ok(IpcRequest::GetClusterLog { max_entries, user }) + } + + CfsIpcOp::GetRrdDump => Ok(IpcRequest::GetRrdDump), + + CfsIpcOp::GetGuestConfigProperty => { + // GET_GUEST_CONFIG_PROPERTY: vmid (u32) + property (null-terminated) + if data.len() < 4 { + anyhow::bail!("GET_GUEST_CONFIG_PROPERTY data too short"); + } + + let vmid = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); + + let property = std::ffi::CStr::from_bytes_until_nul(&data[4..]) + .map_err(|_| anyhow::anyhow!("Invalid property in GET_GUEST_CONFIG_PROPERTY"))? + .to_str() + .map_err(|_| anyhow::anyhow!("Invalid UTF-8 in property"))? + .to_string(); + + Ok(IpcRequest::GetGuestConfigProperty { vmid, property }) + } + + CfsIpcOp::VerifyToken => { + // VERIFY_TOKEN: token (null-terminated string) + let token = std::ffi::CStr::from_bytes_until_nul(data) + .map_err(|_| anyhow::anyhow!("Invalid token in VERIFY_TOKEN"))? + .to_str() + .map_err(|_| anyhow::anyhow!("Invalid UTF-8 in token"))? + .to_string(); + + Ok(IpcRequest::VerifyToken { token }) + } + + CfsIpcOp::GetGuestConfigProperties => { + // GET_GUEST_CONFIG_PROPERTIES: vmid (u32) + num_props (u8) + property list + if data.len() < 5 { + anyhow::bail!("GET_GUEST_CONFIG_PROPERTIES data too short"); + } + + let vmid = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); + let num_props = data[4] as usize; + + if num_props == 0 { + anyhow::bail!("GET_GUEST_CONFIG_PROPERTIES requires at least one property"); + } + + let mut properties = Vec::with_capacity(num_props); + let mut remaining = &data[5..]; + + for i in 0..num_props { + if remaining.is_empty() { + anyhow::bail!("Property {i} is missing"); + } + + let property = std::ffi::CStr::from_bytes_until_nul(remaining) + .map_err(|_| anyhow::anyhow!("Property {i} not null-terminated"))? + .to_str() + .map_err(|_| anyhow::anyhow!("Property {i} is not valid UTF-8"))?; + + // Validate property name starts with lowercase letter + if property.is_empty() || !property.chars().next().unwrap().is_ascii_lowercase() + { + anyhow::bail!("Property {i} does not start with [a-z]"); + } + + properties.push(property.to_string()); + remaining = &remaining[property.len() + 1..]; // +1 for null terminator + } + + // Verify no leftover data + if !remaining.is_empty() { + anyhow::bail!("Leftover data after parsing {num_props} properties"); + } + + Ok(IpcRequest::GetGuestConfigProperties { vmid, properties }) + } + } + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/ipc/service.rs b/src/pmxcfs-rs/pmxcfs/src/ipc/service.rs new file mode 100644 index 000000000..57eaaa1eb --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/ipc/service.rs @@ -0,0 +1,684 @@ +//! IPC Service implementation +//! +//! This module implements the IPC service handler that processes requests +//! from client applications via libqb-compatible IPC. + +use super::IpcRequest; +use async_trait::async_trait; +use pmxcfs_config::Config; +use pmxcfs_ipc::{Handler, Permissions, Request, Response}; +use pmxcfs_memdb::MemDb; +use pmxcfs_status as status; +use std::io::Error as IoError; +use std::sync::Arc; + +/// IPC handler for pmxcfs protocol operations +pub struct IpcHandler { + memdb: MemDb, + status: Arc, + config: Arc, + www_data_gid: u32, +} + +impl IpcHandler { + /// Create a new IPC handler + pub fn new( + memdb: MemDb, + status: Arc, + config: Arc, + www_data_gid: u32, + ) -> Self { + Self { + memdb, + status, + config, + www_data_gid, + } + } +} + +impl IpcHandler { + /// Handle an IPC request and return (error_code, response_data) + async fn handle_request(&self, request: IpcRequest, is_read_only: bool) -> (i32, Vec) { + let result = match request { + IpcRequest::GetFsVersion => self.handle_get_fs_version(), + IpcRequest::GetClusterInfo => self.handle_get_cluster_info(), + IpcRequest::GetGuestList => self.handle_get_guest_list(), + IpcRequest::GetConfig { path } => self.handle_get_config(&path, is_read_only), + IpcRequest::GetStatus { name, node_name } => { + self.handle_get_status(&name, &node_name) + } + IpcRequest::SetStatus { name, data } => { + if is_read_only { + Err(IoError::from_raw_os_error(libc::EPERM)) + } else { + self.handle_set_status(&name, &data).await + } + } + IpcRequest::LogClusterMsg { + priority, + ident, + tag, + message, + } => { + if is_read_only { + Err(IoError::from_raw_os_error(libc::EPERM)) + } else { + self.handle_log_cluster_msg(priority, &ident, &tag, &message) + } + } + IpcRequest::GetClusterLog { max_entries, user } => { + self.handle_get_cluster_log(max_entries, &user) + } + IpcRequest::GetRrdDump => self.handle_get_rrd_dump(), + IpcRequest::GetGuestConfigProperty { vmid, property } => { + self.handle_get_guest_config_property(vmid, &property) + } + IpcRequest::VerifyToken { token } => self.handle_verify_token(&token), + IpcRequest::GetGuestConfigProperties { vmid, properties } => { + self.handle_get_guest_config_properties(vmid, &properties) + } + }; + + match result { + Ok(response_data) => (0, response_data), + Err(e) => { + let error_code = if let Some(os_error) = e.raw_os_error() { + -os_error + } else { + -libc::EIO + }; + tracing::debug!("Request error: {}", e); + (error_code, Vec::new()) + } + } + } + + /// GET_FS_VERSION: Return filesystem version information + fn handle_get_fs_version(&self) -> Result, IoError> { + let version = serde_json::json!({ + "version": 1, + "protocol": 1, + "cluster": self.status.is_quorate(), + }); + Ok(version.to_string().into_bytes()) + } + + /// GET_CLUSTER_INFO: Return cluster member list + fn handle_get_cluster_info(&self) -> Result, IoError> { + let members = self.status.get_members(); + let member_list: Vec = members + .iter() + .map(|m| { + serde_json::json!({ + "nodeid": m.node_id, + "name": format!("node{}", m.node_id), + "ip": "127.0.0.1", + "online": true, + }) + }) + .collect(); + + let info = serde_json::json!({ + "nodelist": member_list, + "quorate": self.status.is_quorate(), + }); + Ok(info.to_string().into_bytes()) + } + + /// GET_GUEST_LIST: Return VM/CT list + fn handle_get_guest_list(&self) -> Result, IoError> { + let vmlist_data = self.status.get_vmlist(); + + // Convert VM list to JSON format matching C implementation + let mut ids = serde_json::Map::new(); + for (vmid, vm_entry) in vmlist_data { + ids.insert( + vmid.to_string(), + serde_json::json!({ + "node": vm_entry.node, + "type": vm_entry.vmtype.to_string(), + "version": vm_entry.version, + }), + ); + } + + let vmlist = serde_json::json!({ + "version": 1, + "ids": ids, + }); + + Ok(vmlist.to_string().into_bytes()) + } + + /// GET_CONFIG: Read configuration file + fn handle_get_config(&self, path: &str, is_read_only: bool) -> Result, IoError> { + // Check if read-only client is trying to access private path + if is_read_only && path.starts_with("priv/") { + return Err(IoError::from_raw_os_error(libc::EPERM)); + } + + // Read from memdb + match self.memdb.read(path, 0, 1024 * 1024) { + Ok(data) => Ok(data), + Err(_) => Err(IoError::from_raw_os_error(libc::ENOENT)), + } + } + + /// GET_STATUS: Get node status + /// + /// Matches C implementation: cfs_create_status_msg(outbuf, nodename, name) + /// where nodename is the node to query and name is the specific status key. + /// + /// C implementation (server.c:233, status.c:1640-1668): + /// - If name is empty: return ENOENT + /// - Local node: look up bare `name` key in node_status (cfs_status.kvhash) + /// - Remote node: resolve nodename→nodeid, look up in kvstore (clnode->kvhash) + fn handle_get_status(&self, name: &str, nodename: &str) -> Result, IoError> { + if name.is_empty() { + return Err(IoError::from_raw_os_error(libc::ENOENT)); + } + + let is_local = nodename.is_empty() || nodename == self.config.nodename(); + + if is_local { + // Local node: look up bare key in node_status (matches C: cfs_status.kvhash[key]) + if let Some(ns) = self.status.get_node_status(name) { + return Ok(ns.data); + } + } else { + // Remote node: resolve nodename→nodeid, look up in kvstore + // (matches C: clnode->kvhash[key] via clinfo->nodes_byname) + if let Some(info) = self.status.get_cluster_info() { + if let Some(&nodeid) = info.nodes_by_name.get(nodename) { + if let Some(data) = self.status.get_node_kv(nodeid, name) { + return Ok(data); + } + } + } + } + + Err(IoError::from_raw_os_error(libc::ENOENT)) + } + + /// SET_STATUS: Update node status + async fn handle_set_status(&self, name: &str, status_data: &[u8]) -> Result, IoError> { + self.status + .set_node_status(name.to_string(), status_data.to_vec()) + .await + .map_err(|_| IoError::from_raw_os_error(libc::EIO))?; + + Ok(Vec::new()) + } + + /// LOG_CLUSTER_MSG: Write to cluster log + fn handle_log_cluster_msg( + &self, + priority: u8, + ident: &str, + tag: &str, + message: &str, + ) -> Result, IoError> { + // Get node name from config (matches C implementation's cfs.nodename) + let node = self.config.nodename().to_string(); + + // Add log entry to cluster log + let timestamp = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map_err(|_| IoError::from_raw_os_error(libc::EIO))? + .as_secs(); + + let entry = status::ClusterLogEntry { + uid: 0, // Will be assigned by cluster log + timestamp, + priority, + tag: tag.to_string(), + pid: std::process::id(), + node, + ident: ident.to_string(), + message: message.to_string(), + }; + + self.status.add_log_entry(entry); + + Ok(Vec::new()) + } + + /// GET_CLUSTER_LOG: Read cluster log + /// + /// The `user` parameter is used for filtering log entries by user. + /// Matches C implementation: cfs_cluster_log_dump(outbuf, user, max) + /// Returns JSON format: {"data": [{entry1}, {entry2}, ...]} + fn handle_get_cluster_log(&self, max_entries: usize, user: &str) -> Result, IoError> { + let entries = self.status.get_log_entries_filtered(max_entries, user); + + // Format as JSON object with "data" array (matches C implementation) + let json_entries: Vec = entries + .iter() + .map(|entry| { + serde_json::json!({ + "uid": entry.uid, + "time": entry.timestamp, + "pri": entry.priority, + "tag": entry.tag, + "pid": entry.pid, + "node": entry.node, + "user": entry.ident, + "msg": entry.message, + }) + }) + .collect(); + + let response = serde_json::json!({ + "data": json_entries + }); + + Ok(response.to_string().into_bytes()) + } + + /// GET_RRD_DUMP: Get RRD data dump in C-compatible text format + fn handle_get_rrd_dump(&self) -> Result, IoError> { + let rrd_dump = self.status.get_rrd_dump(); + Ok(rrd_dump.into_bytes()) + } + + /// GET_GUEST_CONFIG_PROPERTY: Get guest config property + fn handle_get_guest_config_property( + &self, + vmid: u32, + property: &str, + ) -> Result, IoError> { + // Delegate to multi-property handler with single property + self.handle_get_guest_config_properties_impl(&[property], vmid) + } + + /// VERIFY_TOKEN: Verify authentication token + /// + /// Matches C implementation (server.c:399-448): + /// - Empty token → EINVAL + /// - Token containing newline → EINVAL + /// - Exact line match (no trimming), splitting on '\n' only + fn handle_verify_token(&self, token: &str) -> Result, IoError> { + // Reject empty tokens + if token.is_empty() { + return Err(IoError::from_raw_os_error(libc::EINVAL)); + } + + // Reject tokens containing newlines (would break line-based matching) + if token.contains('\n') { + return Err(IoError::from_raw_os_error(libc::EINVAL)); + } + + // Read token.cfg from database + match self.memdb.read("priv/token.cfg", 0, 1024 * 1024) { + Ok(token_data) => { + // Check if token exists in file (one token per line) + // C splits on '\n' only (not '\r\n') and does exact match (no trim) + let token_str = String::from_utf8_lossy(&token_data); + for line in token_str.split('\n') { + if line == token { + return Ok(Vec::new()); // Success + } + } + Err(IoError::from_raw_os_error(libc::ENOENT)) + } + Err(_) => Err(IoError::from_raw_os_error(libc::ENOENT)), + } + } + + /// GET_GUEST_CONFIG_PROPERTIES: Get multiple guest config properties + fn handle_get_guest_config_properties( + &self, + vmid: u32, + properties: &[String], + ) -> Result, IoError> { + // Convert Vec to &[&str] for the impl function + let property_refs: Vec<&str> = properties.iter().map(|s| s.as_str()).collect(); + self.handle_get_guest_config_properties_impl(&property_refs, vmid) + } + + /// Core implementation for getting guest config properties + fn handle_get_guest_config_properties_impl( + &self, + properties: &[&str], + vmid: u32, + ) -> Result, IoError> { + // Validate vmid range + if vmid > 0 && vmid < 100 { + tracing::debug!("vmid out of range: {}", vmid); + return Err(IoError::from_raw_os_error(libc::EINVAL)); + } + + // Build response as a map: vmid -> {property -> value} + let mut response_map: serde_json::Map = serde_json::Map::new(); + + if vmid >= 100 { + // Get specific VM + let vmlist = self.status.get_vmlist(); + + if !vmlist.contains_key(&vmid) { + return Err(IoError::from_raw_os_error(libc::ENOENT)); + } + + let vm_entry = vmlist.get(&vmid).unwrap(); + + // Get config path for this VM + let config_path = format!( + "nodes/{}/{}/{}.conf", + &vm_entry.node, + vm_entry.vmtype.config_dir(), + vmid + ); + + // Read config from memdb + match self.memdb.read(&config_path, 0, 1024 * 1024) { + Ok(config_data) => { + let config_str = String::from_utf8_lossy(&config_data); + let values = extract_properties(&config_str, properties); + + if !values.is_empty() { + response_map + .insert(vmid.to_string(), serde_json::to_value(&values).unwrap()); + } + } + Err(e) => { + tracing::debug!("Failed to read config for VM {}: {}", vmid, e); + return Err(IoError::from_raw_os_error(libc::EIO)); + } + } + } else { + // vmid == 0: Get properties from all VMs + let vmlist = self.status.get_vmlist(); + + for (vm_id, vm_entry) in vmlist.iter() { + let config_path = format!( + "nodes/{}/{}/{}.conf", + &vm_entry.node, + vm_entry.vmtype.config_dir(), + vm_id + ); + + // Read config from memdb + if let Ok(config_data) = self.memdb.read(&config_path, 0, 1024 * 1024) { + let config_str = String::from_utf8_lossy(&config_data); + let values = extract_properties(&config_str, properties); + + if !values.is_empty() { + response_map + .insert(vm_id.to_string(), serde_json::to_value(&values).unwrap()); + } + } + } + } + + // Serialize to JSON with pretty printing (matches C output format) + let json_str = serde_json::to_string_pretty(&response_map).map_err(|e| { + tracing::error!("Failed to serialize JSON: {}", e); + IoError::from_raw_os_error(libc::EIO) + })?; + + Ok(json_str.into_bytes()) + } +} + +/// Extract property values from a VM config file +/// +/// Parses config file line-by-line looking for "property: value" patterns. +/// Matches the C implementation's parsing behavior from status.c:767-796. +/// +/// Format: `^([a-z][a-z_]*\d*):\s*(.+?)\s*$` +/// - Property name must start with lowercase letter +/// - Followed by colon and optional whitespace +/// - Value is trimmed of leading/trailing whitespace +/// - Stops at snapshot sections (lines starting with '[') +/// +/// Returns a map of property names to their values. +fn extract_properties( + config: &str, + properties: &[&str], +) -> std::collections::HashMap { + let mut values = std::collections::HashMap::new(); + + // Parse config line by line + for line in config.lines() { + // Stop at snapshot or pending section markers (matches C implementation) + if line.starts_with('[') { + break; + } + + // Skip empty lines + if line.is_empty() { + continue; + } + + // Find colon separator (required in VM config format) + let Some(colon_pos) = line.find(':') else { + continue; + }; + + // Extract key (property name) + let key = &line[..colon_pos]; + + // Property must start with lowercase letter (matches C regex check) + if key.is_empty() || !key.chars().next().unwrap().is_ascii_lowercase() { + continue; + } + + // Extract value after colon + let value = &line[colon_pos + 1..]; + + // Trim leading and trailing whitespace from value (matches C implementation) + let value = value.trim(); + + // Skip if value is empty after trimming + if value.is_empty() { + continue; + } + + // Check if this is one of the requested properties + if properties.contains(&key) { + values.insert(key.to_string(), value.to_string()); + } + } + + values +} + +#[async_trait] +impl Handler for IpcHandler { + fn authenticate(&self, uid: u32, gid: u32) -> Option { + // Root with gid 0 gets read-write access + // Matches C: (uid == 0 && gid == 0) branch in server.c:111 + if uid == 0 && gid == 0 { + tracing::debug!( + "IPC authentication: uid={}, gid={} - granted ReadWrite (root)", + uid, + gid + ); + return Some(Permissions::ReadWrite); + } + + // www-data group gets read-only access (regardless of uid) + // Matches C: (gid == cfs.gid) branch in server.c:111 + if gid == self.www_data_gid { + tracing::debug!( + "IPC authentication: uid={}, gid={} - granted ReadOnly (www-data group)", + uid, + gid + ); + return Some(Permissions::ReadOnly); + } + + // Reject all other connections with security logging + tracing::warn!( + "IPC authentication failed: uid={}, gid={} - access denied (not root or www-data group)", + uid, + gid + ); + None + } + + async fn handle(&self, request: Request) -> Response { + // Deserialize IPC request from message ID and data + let ipc_request = match IpcRequest::deserialize(request.msg_id, &request.data) { + Ok(req) => req, + Err(e) => { + tracing::warn!( + "Failed to deserialize IPC request (msg_id={}): {}", + request.msg_id, + e + ); + return Response::err(-libc::EINVAL); + } + }; + + let (error_code, data) = self.handle_request(ipc_request, request.is_read_only).await; + + Response { error_code, data } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extract_properties() { + let config = r#" +# VM Configuration +memory: 2048 +cores: 4 +sockets: 1 +cpu: host +boot: order=scsi0;net0 +name: test-vm +onboot: 1 +"#; + + let properties = vec!["memory", "cores", "name", "nonexistent"]; + let result = extract_properties(config, &properties); + + assert_eq!(result.get("memory"), Some(&"2048".to_string())); + assert_eq!(result.get("cores"), Some(&"4".to_string())); + assert_eq!(result.get("name"), Some(&"test-vm".to_string())); + assert_eq!(result.get("nonexistent"), None); + } + + #[test] + fn test_extract_properties_empty_config() { + let config = ""; + let properties = vec!["memory"]; + let result = extract_properties(config, &properties); + assert!(result.is_empty()); + } + + #[test] + fn test_extract_properties_stops_at_snapshot() { + let config = r#" +memory: 2048 +cores: 4 +[snapshot] +memory: 4096 +name: snapshot-value +"#; + let properties = vec!["memory", "cores", "name"]; + let result = extract_properties(config, &properties); + + // Should stop at [snapshot] marker + assert_eq!(result.get("memory"), Some(&"2048".to_string())); + assert_eq!(result.get("cores"), Some(&"4".to_string())); + assert_eq!(result.get("name"), None); // After [snapshot], should not be parsed + } + + #[test] + fn test_extract_properties_with_special_chars() { + let config = r#" +name: test"vm +description: Line1\nLine2 +path: /path/to\file +"#; + + let properties = vec!["name", "description", "path"]; + let result = extract_properties(config, &properties); + + assert_eq!(result.get("name"), Some(&r#"test"vm"#.to_string())); + assert_eq!( + result.get("description"), + Some(&r#"Line1\nLine2"#.to_string()) + ); + assert_eq!(result.get("path"), Some(&r#"/path/to\file"#.to_string())); + } + + #[test] + fn test_extract_properties_whitespace_handling() { + let config = r#" +memory: 2048 +cores:4 +name: test-vm +"#; + + let properties = vec!["memory", "cores", "name"]; + let result = extract_properties(config, &properties); + + // Values should be trimmed of leading/trailing whitespace + assert_eq!(result.get("memory"), Some(&"2048".to_string())); + assert_eq!(result.get("cores"), Some(&"4".to_string())); + assert_eq!(result.get("name"), Some(&"test-vm".to_string())); + } + + #[test] + fn test_extract_properties_invalid_format() { + let config = r#" +Memory: 2048 +CORES: 4 +_private: value +123: value +name value +"#; + + let properties = vec!["Memory", "CORES", "_private", "123", "name"]; + let result = extract_properties(config, &properties); + + // None should match because: + // - "Memory" starts with uppercase + // - "CORES" starts with uppercase + // - "_private" starts with underscore + // - "123" starts with digit + // - "name value" has no colon + assert!(result.is_empty()); + } + + #[test] + fn test_json_serialization_with_serde() { + // Verify that serde_json properly handles escaping + let mut values = std::collections::HashMap::new(); + values.insert("name".to_string(), r#"test"vm"#.to_string()); + values.insert("description".to_string(), "Line1\nLine2".to_string()); + + let json = serde_json::to_string(&values).unwrap(); + + // serde_json should properly escape quotes and newlines + assert!(json.contains(r#"\"test\\\"vm\""#) || json.contains(r#""test\"vm""#)); + assert!(json.contains(r#"\n"#)); + } + + #[test] + fn test_json_pretty_format() { + // Verify pretty printing works + let mut response_map = serde_json::Map::new(); + let mut vm_props = std::collections::HashMap::new(); + vm_props.insert("memory".to_string(), "2048".to_string()); + vm_props.insert("cores".to_string(), "4".to_string()); + + response_map.insert("100".to_string(), serde_json::to_value(&vm_props).unwrap()); + + let json_str = serde_json::to_string_pretty(&response_map).unwrap(); + + // Pretty format should have newlines + assert!(json_str.contains('\n')); + // Should contain the VM ID and properties + assert!(json_str.contains("100")); + assert!(json_str.contains("memory")); + assert!(json_str.contains("2048")); + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/lib.rs b/src/pmxcfs-rs/pmxcfs/src/lib.rs new file mode 100644 index 000000000..06b77a38b --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/lib.rs @@ -0,0 +1,13 @@ +// Library exports for testing and potential library usage + +pub mod cluster_config_service; // Cluster configuration monitoring via CMAP (matching C's confdb.c) +pub mod daemon; // Unified daemon builder with integrated PID file management +pub mod file_lock; // File locking utilities +pub mod fuse; +pub mod ipc; // IPC subsystem (request handling and service) +pub mod logging; // Runtime-adjustable logging (for .debug plugin) +pub mod memdb_callbacks; // DFSM callbacks for memdb (glue between dfsm and memdb) +pub mod plugins; +pub mod quorum_service; // Quorum tracking service (matching C's quorum.c) +pub mod restart_flag; // Restart flag management +pub mod status_callbacks; // DFSM callbacks for status kvstore (glue between dfsm and status) diff --git a/src/pmxcfs-rs/pmxcfs/src/logging.rs b/src/pmxcfs-rs/pmxcfs/src/logging.rs new file mode 100644 index 000000000..637aebb2b --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/logging.rs @@ -0,0 +1,44 @@ +/// Runtime-adjustable logging infrastructure +/// +/// This module provides the ability to change tracing filter levels at runtime, +/// matching the C implementation's behavior where the .debug plugin can dynamically +/// enable/disable debug logging. +use anyhow::Result; +use parking_lot::Mutex; +use std::sync::OnceLock; +use tracing_subscriber::{EnvFilter, reload}; + +/// Type alias for the reload handle +type ReloadHandle = reload::Handle; + +/// Global reload handle for runtime log level adjustment +static LOG_RELOAD_HANDLE: OnceLock> = OnceLock::new(); + +/// Initialize the reload handle (called once during logging setup) +pub fn set_reload_handle(handle: ReloadHandle) -> Result<()> { + LOG_RELOAD_HANDLE + .set(Mutex::new(handle)) + .map_err(|_| anyhow::anyhow!("Failed to set log reload handle - already initialized")) +} + +/// Set debug level at runtime (called by .debug plugin) +/// +/// This changes the tracing filter to either "debug" (level > 0) or "info" (level == 0), +/// matching the C implementation where writing to .debug affects cfs_debug() output. +pub fn set_debug_level(level: u8) -> Result<()> { + let filter = if level > 0 { + EnvFilter::new("debug") + } else { + EnvFilter::new("info") + }; + + if let Some(handle) = LOG_RELOAD_HANDLE.get() { + handle + .lock() + .reload(filter) + .map_err(|e| anyhow::anyhow!("Failed to reload log filter: {e}"))?; + Ok(()) + } else { + Err(anyhow::anyhow!("Log reload handle not initialized")) + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/main.rs b/src/pmxcfs-rs/pmxcfs/src/main.rs new file mode 100644 index 000000000..106af97ba --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/main.rs @@ -0,0 +1,711 @@ +use anyhow::{Context, Result}; +use clap::Parser; +use std::fs; +use std::sync::Arc; +use tracing::{debug, error, info}; +use tracing_subscriber::{EnvFilter, layer::SubscriberExt, reload, util::SubscriberInitExt}; + +use pmxcfs_rs::{ + cluster_config_service::ClusterConfigService, + daemon::{Daemon, DaemonProcess}, + file_lock::FileLock, + fuse, + ipc::IpcHandler, + memdb_callbacks::MemDbCallbacks, + plugins, + quorum_service::QuorumService, + restart_flag::RestartFlag, + status_callbacks::StatusCallbacks, +}; + +use pmxcfs_api_types::PmxcfsError; +use pmxcfs_config::Config; +use pmxcfs_dfsm::{ + Callbacks, ClusterDatabaseService, Dfsm, FuseMessage, KvStoreMessage, StatusSyncService, +}; +use pmxcfs_memdb::MemDb; +use pmxcfs_services::ServiceManager; +use pmxcfs_status as status; + +// Default paths matching the C version +const DEFAULT_MOUNT_DIR: &str = "/etc/pve"; +const DEFAULT_DB_PATH: &str = "/var/lib/pve-cluster/config.db"; +const DEFAULT_VARLIB_DIR: &str = "/var/lib/pve-cluster"; +const DEFAULT_RUN_DIR: &str = "/run/pmxcfs"; + +/// Type alias for the cluster services tuple +type ClusterServices = ( + Arc>, + Arc>, + Arc, +); + +/// Proxmox Cluster File System - Rust implementation +/// +/// This FUSE filesystem uses corosync and sqlite3 to provide a +/// cluster-wide, consistent view of config and other files. +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + /// Turn on debug messages + #[arg(short = 'd', long = "debug")] + debug: bool, + + /// Do not daemonize server + #[arg(short = 'f', long = "foreground")] + foreground: bool, + + /// Force local mode (ignore corosync.conf, force quorum) + #[arg(short = 'l', long = "local")] + local: bool, + + /// Test directory (sets all paths to subdirectories for isolated testing) + #[arg(long = "test-dir")] + test_dir: Option, + + /// Custom mount point + #[arg(long = "mount", default_value = DEFAULT_MOUNT_DIR)] + mount: std::path::PathBuf, + + /// Custom database path + #[arg(long = "db", default_value = DEFAULT_DB_PATH)] + db: std::path::PathBuf, + + /// Custom runtime directory + #[arg(long = "rundir", default_value = DEFAULT_RUN_DIR)] + rundir: std::path::PathBuf, + + /// Cluster name (CPG group name for Corosync isolation) + /// Must match C implementation's DCDB_CPG_GROUP_NAME + #[arg(long = "cluster-name", default_value = "pve_dcdb_v1")] + cluster_name: String, +} + +/// Configuration for all filesystem paths used by pmxcfs +#[derive(Debug, Clone)] +struct PathConfig { + dbfilename: std::path::PathBuf, + lockfile: std::path::PathBuf, + restart_flag_file: std::path::PathBuf, + pid_file: std::path::PathBuf, + mount_dir: std::path::PathBuf, + varlib_dir: std::path::PathBuf, + run_dir: std::path::PathBuf, + pve2_socket_path: std::path::PathBuf, // IPC server socket (libqb-compatible) + corosync_conf_path: std::path::PathBuf, + rrd_dir: std::path::PathBuf, +} + +impl PathConfig { + /// Create PathConfig from command line arguments + fn from_args(args: &Args) -> Self { + if let Some(ref test_dir) = args.test_dir { + // Test mode: all paths under test directory + Self { + dbfilename: test_dir.join("db/config.db"), + lockfile: test_dir.join("db/.pmxcfs.lockfile"), + restart_flag_file: test_dir.join("run/cfs-restart-flag"), + pid_file: test_dir.join("run/pmxcfs.pid"), + mount_dir: test_dir.join("pve"), + varlib_dir: test_dir.join("db"), + run_dir: test_dir.join("run"), + pve2_socket_path: test_dir.join("run/pve2"), + corosync_conf_path: test_dir.join("etc/corosync/corosync.conf"), + rrd_dir: test_dir.join("rrd"), + } + } else { + // Production mode: use provided args (which have defaults from clap) + let varlib_dir = args + .db + .parent() + .map(|p| p.to_path_buf()) + .unwrap_or_else(|| std::path::PathBuf::from(DEFAULT_VARLIB_DIR)); + + Self { + dbfilename: args.db.clone(), + lockfile: varlib_dir.join(".pmxcfs.lockfile"), + restart_flag_file: args.rundir.join("cfs-restart-flag"), + pid_file: args.rundir.join("pmxcfs.pid"), + mount_dir: args.mount.clone(), + varlib_dir, + run_dir: args.rundir.clone(), + pve2_socket_path: std::path::PathBuf::from(DEFAULT_PVE2_SOCKET), + corosync_conf_path: std::path::PathBuf::from(HOST_CLUSTER_CONF_FN), + rrd_dir: std::path::PathBuf::from(DEFAULT_RRD_DIR), + } + } + } +} + +const HOST_CLUSTER_CONF_FN: &str = "/etc/corosync/corosync.conf"; + +const DEFAULT_RRD_DIR: &str = "/var/lib/rrdcached/db"; +const DEFAULT_PVE2_SOCKET: &str = "/var/run/pve2"; + +#[tokio::main] +async fn main() -> Result<()> { + // Parse command line arguments + let args = Args::parse(); + + // Initialize logging + init_logging(args.debug)?; + + // Create path configuration + let paths = PathConfig::from_args(&args); + + info!("Starting pmxcfs (Rust version)"); + debug!("Debug mode: {}", args.debug); + debug!("Foreground mode: {}", args.foreground); + debug!("Local mode: {}", args.local); + + // Log test mode if enabled + if args.test_dir.is_some() { + info!("TEST MODE: Using isolated test directory"); + info!(" Mount: {}", paths.mount_dir.display()); + info!(" Database: {}", paths.dbfilename.display()); + info!(" QB-IPC Socket: {}", paths.pve2_socket_path.display()); + info!(" Run dir: {}", paths.run_dir.display()); + info!(" RRD dir: {}", paths.rrd_dir.display()); + } + + // Get node name (equivalent to uname in C version) + let nodename = get_nodename()?; + info!("Node name: {}", nodename); + + // Resolve node IP + let node_ip = resolve_node_ip(&nodename)?; + info!("Resolved node '{}' to IP '{}'", nodename, node_ip); + + // Get www-data group ID + let www_data_gid = get_www_data_gid()?; + debug!("www-data group ID: {}", www_data_gid); + + // Create configuration + let config = Config::shared( + nodename, + node_ip, + www_data_gid, + args.debug, + args.local, + args.cluster_name.clone(), + ); + + // Set umask (027 = rwxr-x---) + unsafe { + libc::umask(0o027); + } + + // Create required directories + let is_test_mode = args.test_dir.is_some(); + create_directories(www_data_gid, &paths, is_test_mode)?; + + // Acquire lock + let _lock = FileLock::acquire(paths.lockfile.clone()).await?; + + // Initialize status subsystem with config and RRD directory + // This allows get_local_nodename() to work properly by accessing config.nodename() + let status = status::init_with_config_and_rrd(config.clone(), &paths.rrd_dir).await; + + // Check if database exists + let db_exists = paths.dbfilename.exists(); + + // Open or create database + let memdb = MemDb::open(&paths.dbfilename, !db_exists)?; + + // Check for corosync.conf in database + let mut has_corosync_conf = memdb.exists("/corosync.conf")?; + + // Import corosync.conf if it exists on disk but not in database and not in local mode + // This handles both new databases and existing databases that need the config imported + if !has_corosync_conf && !args.local { + // Try test-mode path first, then fall back to production path + // This matches C behavior and handles test environments where only some nodes + // have the test path set up (others use the shared /etc/corosync via volume) + let import_path = if paths.corosync_conf_path.exists() { + &paths.corosync_conf_path + } else { + std::path::Path::new(HOST_CLUSTER_CONF_FN) + }; + + if import_path.exists() { + import_corosync_conf(&memdb, import_path)?; + // Refresh the check after import + has_corosync_conf = memdb.exists("/corosync.conf")?; + } + } + + // Initialize cluster services if needed (matching C's pmxcfs.c) + let (dfsm, status_dfsm, quorum_service) = if has_corosync_conf && !args.local { + info!("Initializing cluster services"); + let (db_dfsm, st_dfsm, quorum) = setup_cluster_services( + &memdb, + config.clone(), + status.clone(), + &paths.corosync_conf_path, + )?; + (Some(db_dfsm), Some(st_dfsm), Some(quorum)) + } else { + if args.local { + info!("Forcing local mode"); + } else { + info!("Using local mode (corosync.conf does not exist)"); + } + status.set_quorate(true); + (None, None, None) + }; + + // Initialize cluster info in status + status.init_cluster(config.cluster_name().to_string()); + + // Initialize plugin registry + let plugins = plugins::init_plugins(config.clone(), status.clone()); + + // Note: Node registration from corosync is handled by ClusterConfigService during + // its initialization, matching C's service_confdb behavior (confdb.c:276) + + // Daemonize if not in foreground mode (using builder pattern) + let (daemon_guard, signal_handle) = if !args.foreground { + let (process, handle) = Daemon::new() + .pid_file(paths.pid_file.clone()) + .group(www_data_gid) + .start_daemon_with_signal()?; + + match process { + DaemonProcess::Parent => { + // Parent exits here after child signals ready + std::process::exit(0); + } + DaemonProcess::Child(guard) => (Some(guard), handle), + } + } else { + (None, None) + }; + + // Mount FUSE filesystem + let fuse_task = setup_fuse( + &paths.mount_dir, + memdb.clone(), + config.clone(), + dfsm.clone(), + plugins, + status.clone(), + )?; + + // Start cluster services using ServiceManager (matching C's pmxcfs.c service initialization) + // If this fails, abort the FUSE task to prevent orphaned mount + let service_manager_handle = match setup_services( + dfsm.as_ref(), + status_dfsm.as_ref(), + quorum_service, + has_corosync_conf, + args.local, + status.clone(), + ) { + Ok(handle) => handle, + Err(e) => { + error!("Failed to setup services: {}", e); + fuse_task.abort(); + return Err(e); + } + }; + + // Scan VM list after database is loaded (matching C's memdb_open behavior) + status.scan_vmlist(&memdb); + + // Setup signal handlers BEFORE starting IPC server to ensure signals are caught + // during the startup sequence. This prevents a race where a signal arriving + // between IPC start and signal handler setup would be missed. + use tokio::signal::unix::{SignalKind, signal}; + let mut sigterm = signal(SignalKind::terminate()) + .map_err(|e| anyhow::anyhow!("Failed to setup SIGTERM handler: {e}"))?; + let mut sigint = signal(SignalKind::interrupt()) + .map_err(|e| anyhow::anyhow!("Failed to setup SIGINT handler: {e}"))?; + + // Initialize and start IPC server (libqb-compatible IPC for C clients) + // If this fails, abort FUSE task to prevent orphaned mount + info!("Initializing IPC server (libqb-compatible)"); + let ipc_handler = IpcHandler::new(memdb.clone(), status.clone(), config.clone(), www_data_gid); + let mut ipc_server = pmxcfs_ipc::Server::new("pve2", ipc_handler); + if let Err(e) = ipc_server.start() { + error!("Failed to start IPC server: {}", e); + fuse_task.abort(); + return Err(e.into()); + } + + info!("pmxcfs started successfully"); + + // Signal parent if daemonized, or write PID file in foreground mode + let _pid_guard = if let Some(handle) = signal_handle { + // Daemon mode: signal parent that we're ready (parent writes PID file and exits) + handle.signal_ready()?; + daemon_guard // Keep guard alive for cleanup on drop + } else { + // Foreground mode: write PID file now and retain guard for cleanup + Some( + Daemon::new() + .pid_file(paths.pid_file.clone()) + .group(www_data_gid) + .start_foreground()?, + ) + }; + + // Remove restart flag (matching C's timing - after all services started) + let _ = fs::remove_file(&paths.restart_flag_file); + + // Wait for shutdown signal (using pre-registered handlers) + tokio::select! { + _ = sigterm.recv() => { + info!("Received SIGTERM"); + } + _ = sigint.recv() => { + info!("Received SIGINT"); + } + } + + info!("Shutting down pmxcfs"); + + // Abort background tasks + fuse_task.abort(); + + // Create restart flag (signals restart, not permanent shutdown) + let _restart_flag = RestartFlag::create(paths.restart_flag_file.clone(), www_data_gid); + + // Stop services + ipc_server.stop(); + + // Stop cluster services via ServiceManager + if let Some(service_manager) = service_manager_handle { + info!("Shutting down cluster services via ServiceManager"); + let _ = service_manager + .shutdown(std::time::Duration::from_secs(5)) + .await; + } + + // Unmount filesystem (matching C's fuse_unmount, using lazy unmount like umount -l) + info!( + "Unmounting FUSE filesystem from {}", + paths.mount_dir.display() + ); + let mount_path_cstr = + std::ffi::CString::new(paths.mount_dir.to_string_lossy().as_ref()).unwrap(); + unsafe { + libc::umount2(mount_path_cstr.as_ptr(), libc::MNT_DETACH); + } + + info!("pmxcfs shutdown complete"); + + Ok(()) +} + +fn init_logging(debug: bool) -> Result<()> { + let filter_level = if debug { "debug" } else { "info" }; + let filter = EnvFilter::new(filter_level); + + // Create reloadable filter layer + let (filter_layer, reload_handle) = reload::Layer::new(filter); + + // Create formatter layer for console output + let fmt_layer = tracing_subscriber::fmt::layer() + .with_target(false) + .with_thread_ids(false) + .with_thread_names(false); + + // Try to connect to journald (systemd journal / syslog integration) + // Matches C implementation's openlog() call (status.c:1360) + // Falls back to console-only logging if journald is unavailable + let subscriber = tracing_subscriber::registry() + .with(filter_layer) + .with(fmt_layer); + + match tracing_journald::layer() { + Ok(journald_layer) => { + // Successfully connected to journald + subscriber.with(journald_layer).init(); + debug!("Logging to journald (syslog) enabled"); + } + Err(e) => { + // Journald not available (e.g., not running under systemd) + // Continue with console logging only + subscriber.init(); + debug!("Journald unavailable ({}), using console logging only", e); + } + } + + // Store reload handle for runtime adjustment (used by .debug plugin) + pmxcfs_rs::logging::set_reload_handle(reload_handle)?; + + Ok(()) +} + +fn get_nodename() -> Result { + let mut utsname = libc::utsname { + sysname: [0; 65], + nodename: [0; 65], + release: [0; 65], + version: [0; 65], + machine: [0; 65], + domainname: [0; 65], + }; + + unsafe { + if libc::uname(&mut utsname) != 0 { + return Err(PmxcfsError::System("Unable to get node name".into()).into()); + } + } + + let nodename_bytes = &utsname.nodename; + let nodename_cstr = unsafe { std::ffi::CStr::from_ptr(nodename_bytes.as_ptr()) }; + let mut nodename = nodename_cstr.to_string_lossy().to_string(); + + // Remove domain part if present (like C version) + if let Some(dot_pos) = nodename.find('.') { + nodename.truncate(dot_pos); + } + + Ok(nodename) +} + +fn resolve_node_ip(nodename: &str) -> Result { + use std::net::ToSocketAddrs; + + let addr_iter = (nodename, 0) + .to_socket_addrs() + .context("Failed to resolve node IP")?; + + for addr in addr_iter { + let ip = addr.ip(); + // Skip loopback addresses + if !ip.is_loopback() { + return Ok(ip); + } + } + + Err(PmxcfsError::Configuration(format!( + "Unable to resolve node name '{nodename}' to a non-loopback IP address" + )) + .into()) +} + +fn get_www_data_gid() -> Result { + use users::get_group_by_name; + + let group = get_group_by_name("www-data") + .ok_or_else(|| PmxcfsError::System("Unable to get www-data group".into()))?; + + Ok(group.gid()) +} + +fn create_directories(gid: u32, paths: &PathConfig, is_test_mode: bool) -> Result<()> { + // Create varlib directory + fs::create_dir_all(&paths.varlib_dir) + .with_context(|| format!("Failed to create {}", paths.varlib_dir.display()))?; + + // Create run directory + fs::create_dir_all(&paths.run_dir) + .with_context(|| format!("Failed to create {}", paths.run_dir.display()))?; + + // Set ownership for run directory (skip in test mode - doesn't require root) + if !is_test_mode { + let run_dir_cstr = + std::ffi::CString::new(paths.run_dir.to_string_lossy().as_ref()).unwrap(); + unsafe { + if libc::chown(run_dir_cstr.as_ptr(), 0, gid as libc::gid_t) != 0 { + return Err(PmxcfsError::System(format!( + "Failed to set ownership on {}", + paths.run_dir.display() + )) + .into()); + } + } + } + + Ok(()) +} + +fn import_corosync_conf(memdb: &MemDb, corosync_conf_path: &std::path::Path) -> Result<()> { + if let Ok(content) = fs::read_to_string(corosync_conf_path) { + info!("Importing corosync.conf from {}", corosync_conf_path.display()); + let mtime = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH)? + .as_secs() as u32; + + memdb.create("/corosync.conf", 0, 0, mtime)?; + memdb.write("/corosync.conf", 0, 0, mtime, content.as_bytes(), false)?; + } + + Ok(()) +} + +/// Initialize cluster services (DFSM, QuorumService) +/// +/// Returns (database_dfsm, status_dfsm, quorum_service) for cluster mode +fn setup_cluster_services( + memdb: &MemDb, + config: Arc, + status: Arc, + corosync_conf_path: &std::path::Path, +) -> Result { + // Sync corosync configuration + memdb.sync_corosync_conf(Some(corosync_conf_path.to_str().unwrap()), true)?; + + // Create main DFSM for database synchronization (pmxcfs_v1 CPG group) + // Note: nodeid will be obtained via cpg_local_get() during init_cpg() + info!("Creating main DFSM instance (pmxcfs_v1)"); + let database_callbacks = MemDbCallbacks::new(memdb.clone(), status.clone()); + let database_dfsm = Arc::new(Dfsm::new( + config.cluster_name().to_string(), + database_callbacks.clone(), + )?); + database_callbacks.set_dfsm(&database_dfsm); + info!("Main DFSM created successfully"); + + // Create status DFSM for ephemeral data synchronization (pve_kvstore_v1 CPG group) + // Note: nodeid will be obtained via cpg_local_get() during init_cpg() + // IMPORTANT: Use protocol version 0 to match C implementation's kvstore DFSM + info!("Creating status DFSM instance (pve_kvstore_v1)"); + let status_callbacks: Arc> = + Arc::new(StatusCallbacks::new(status.clone())); + let status_dfsm = Arc::new(Dfsm::new_with_protocol_version( + "pve_kvstore_v1".to_string(), + status_callbacks, + 0, // Protocol version 0 to match C's kvstore + )?); + info!("Status DFSM created successfully"); + + // Create QuorumService (owns quorum handle, matching C's service_quorum) + info!("Creating QuorumService"); + let quorum_service = Arc::new(QuorumService::new(status)); + info!("QuorumService created successfully"); + + Ok((database_dfsm, status_dfsm, quorum_service)) +} + +/// Setup and mount FUSE filesystem +/// +/// Returns a task handle for the FUSE loop +fn setup_fuse( + mount_path: &std::path::Path, + memdb: MemDb, + config: Arc, + dfsm: Option>>, + plugins: Arc, + status: Arc, +) -> Result> { + // Unmount if already mounted (matching C's umount2(CFSDIR, MNT_FORCE)) + let mount_path_cstr = std::ffi::CString::new(mount_path.to_string_lossy().as_ref()).unwrap(); + unsafe { + libc::umount2(mount_path_cstr.as_ptr(), libc::MNT_FORCE); + } + + // Create mount directory + fs::create_dir_all(mount_path) + .with_context(|| format!("Failed to create mount point {}", mount_path.display()))?; + + // Spawn FUSE filesystem in background task + let mount_path = mount_path.to_path_buf(); + let fuse_task = tokio::spawn(async move { + if let Err(e) = fuse::mount_fuse(&mount_path, memdb, config, dfsm, plugins, status).await { + tracing::error!("FUSE filesystem error: {}", e); + } + }); + + Ok(fuse_task) +} + +/// Setup cluster services (quorum, confdb, dcdb, status sync) +/// +/// Returns a shutdown handle if services were started, None otherwise +fn setup_services( + dfsm: Option<&Arc>>, + status_dfsm: Option<&Arc>>, + quorum_service: Option>, + has_corosync_conf: bool, + force_local: bool, + status: Arc, +) -> Result> { + if dfsm.is_none() && status_dfsm.is_none() && quorum_service.is_none() { + return Ok(None); + } + + let mut manager = ServiceManager::new(); + + // Add ClusterDatabaseService (service_dcdb equivalent) + if let Some(dfsm_instance) = dfsm { + info!("Adding ClusterDatabaseService to ServiceManager"); + manager.add_service(Box::new(ClusterDatabaseService::new(Arc::clone( + dfsm_instance, + ))))?; + } + + // Add StatusSyncService (service_status / kvstore equivalent) + if let Some(status_dfsm_instance) = status_dfsm { + info!("Adding StatusSyncService to ServiceManager"); + manager.add_service(Box::new(StatusSyncService::new(Arc::clone( + status_dfsm_instance, + ))))?; + } + + // Add ClusterConfigService (service_confdb equivalent) - monitors Corosync configuration + if has_corosync_conf && !force_local { + info!("Adding ClusterConfigService to ServiceManager"); + manager.add_service(Box::new(ClusterConfigService::new(status)))?; + } + + // Add QuorumService (service_quorum equivalent) + if let Some(quorum_instance) = quorum_service { + info!("Adding QuorumService to ServiceManager"); + // Extract QuorumService from Arc - ServiceManager will manage it + match Arc::try_unwrap(quorum_instance) { + Ok(service) => { + manager.add_service(Box::new(service))?; + } + Err(_) => { + anyhow::bail!("Cannot unwrap QuorumService Arc - multiple references exist"); + } + } + } + + // Get shutdown token before spawning (for graceful shutdown) + let shutdown_token = manager.shutdown_token(); + + // Spawn ServiceManager in background task + let handle = manager.spawn(); + + Ok(Some(ServiceManagerHandle { + shutdown_token, + task: handle, + })) +} + +/// Handle for managing ServiceManager lifecycle +struct ServiceManagerHandle { + shutdown_token: tokio_util::sync::CancellationToken, + task: tokio::task::JoinHandle<()>, +} + +impl ServiceManagerHandle { + /// Gracefully shutdown the ServiceManager with timeout + /// + /// Signals shutdown via cancellation token, then awaits task completion + /// with a timeout. Matches C's cfs_loop_stop_worker() behavior. + async fn shutdown(self, timeout: std::time::Duration) -> Result<()> { + // Signal graceful shutdown (matches C's stop_worker_flag) + self.shutdown_token.cancel(); + + // Await completion with timeout + match tokio::time::timeout(timeout, self.task).await { + Ok(Ok(())) => { + info!("ServiceManager shut down cleanly"); + Ok(()) + } + Ok(Err(e)) => { + tracing::warn!("ServiceManager task panicked: {}", e); + Err(anyhow::anyhow!("ServiceManager task panicked: {}", e)) + } + Err(_) => { + tracing::warn!("ServiceManager shutdown timed out after {:?}", timeout); + Err(anyhow::anyhow!("ServiceManager shutdown timed out")) + } + } + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/memdb_callbacks.rs b/src/pmxcfs-rs/pmxcfs/src/memdb_callbacks.rs new file mode 100644 index 000000000..02a4a317c --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/memdb_callbacks.rs @@ -0,0 +1,663 @@ +/// DFSM callbacks implementation for memdb synchronization +/// +/// This module implements the DfsmCallbacks trait to integrate the DFSM +/// state machine with the memdb database for cluster-wide synchronization. +use anyhow::{Context, Result}; +use parking_lot::RwLock; +use std::sync::{Arc, Weak}; +use std::time::{SystemTime, UNIX_EPOCH}; + +use pmxcfs_dfsm::{Callbacks, DfsmBroadcast, FuseMessage, NodeSyncInfo}; +use pmxcfs_memdb::{MemDb, MemDbIndex}; + +/// DFSM callbacks for memdb synchronization +pub struct MemDbCallbacks { + memdb: MemDb, + status: Arc, + dfsm: RwLock>>, +} + +impl MemDbCallbacks { + /// Create new callbacks for a memdb instance + pub fn new(memdb: MemDb, status: Arc) -> Arc { + Arc::new(Self { + memdb, + status, + dfsm: RwLock::new(Weak::new()), + }) + } + + /// Set the DFSM instance (called after DFSM is created) + pub fn set_dfsm(&self, dfsm: &Arc>) { + *self.dfsm.write() = Arc::downgrade(dfsm); + } + + /// Get the DFSM instance if available + fn get_dfsm(&self) -> Option>> { + self.dfsm.read().upgrade() + } + + /// Update version counters based on path changes + /// Matches the C implementation's update_node_status_version logic + fn update_version_counters(&self, path: &str) { + // Trim leading slash but use FULL path for version tracking + let path = path.trim_start_matches('/'); + + // Update path-specific version counter (use full path, not just first component) + self.status.increment_path_version(path); + + // Update vmlist version for VM configuration changes + if path.starts_with("qemu-server/") || path.starts_with("lxc/") { + self.status.increment_vmlist_version(); + } + } +} + +impl Callbacks for MemDbCallbacks { + type Message = FuseMessage; + + /// Deliver an application message + /// Returns (message_result, processed) where processed indicates if message was handled + fn deliver_message( + &self, + nodeid: u32, + pid: u32, + fuse_message: FuseMessage, + timestamp: u64, + ) -> Result<(i32, bool)> { + // C-style delivery: ALL nodes (including originator) process messages + // No loopback check needed - the originator waits for this delivery + // and uses the result as the FUSE operation return value + + tracing::debug!( + "MemDbCallbacks: delivering FUSE message from node {}/{} at timestamp {}", + nodeid, + pid, + timestamp + ); + + let mtime = timestamp as u32; + + // Dispatch to dedicated handler for each message type + match fuse_message { + FuseMessage::Create { ref path } => { + let result = self.handle_create(path, mtime); + Ok((result, result >= 0)) + } + FuseMessage::Mkdir { ref path } => { + let result = self.handle_mkdir(path, mtime); + Ok((result, result >= 0)) + } + FuseMessage::Write { + ref path, + offset, + ref data, + } => { + let result = self.handle_write(path, offset, data, mtime); + Ok((result, result >= 0)) + } + FuseMessage::Delete { ref path } => { + let result = self.handle_delete(path); + Ok((result, result >= 0)) + } + FuseMessage::Rename { ref from, ref to } => { + let result = self.handle_rename(from, to); + Ok((result, result >= 0)) + } + FuseMessage::Mtime { ref path, mtime: msg_mtime } => { + // Use mtime from message, not from timestamp (C: dcdb.c:900-901) + let result = self.handle_mtime(path, nodeid, msg_mtime); + Ok((result, result >= 0)) + } + FuseMessage::UnlockRequest { path } => { + self.handle_unlock_request(path)?; + Ok((0, true)) + } + FuseMessage::Unlock { path } => { + self.handle_unlock(path)?; + Ok((0, true)) + } + } + } + + /// Compute state checksum for verification + /// Should compute SHA-256 checksum of current state + fn compute_checksum(&self, output: &mut [u8; 32]) -> Result<()> { + tracing::debug!("MemDbCallbacks: computing database checksum"); + + let checksum = self + .memdb + .compute_database_checksum() + .context("Failed to compute database checksum")?; + + output.copy_from_slice(&checksum); + + tracing::debug!("MemDbCallbacks: checksum = {:016x?}", &checksum[..8]); + Ok(()) + } + + /// Get current state for synchronization + fn get_state(&self) -> Result> { + tracing::debug!("MemDbCallbacks: generating state for synchronization"); + + // Generate MemDbIndex from current database + let index = self + .memdb + .encode_index() + .context("Failed to encode database index")?; + + // Serialize to wire format + let serialized = index.serialize(); + + tracing::info!( + "MemDbCallbacks: state generated - version={}, entries={}, bytes={}", + index.version, + index.size, + serialized.len() + ); + + Ok(serialized) + } + + /// Process state update during synchronization + /// Called when all states have been collected from nodes + fn process_state_update(&self, states: &[NodeSyncInfo]) -> Result { + tracing::info!( + "MemDbCallbacks: processing state update from {} nodes", + states.len() + ); + + // Parse all indices from node states + let mut indices: Vec<(u32, u32, MemDbIndex)> = Vec::new(); + + for node in states { + if let Some(state_data) = &node.state { + match MemDbIndex::deserialize(state_data) { + Ok(index) => { + tracing::info!( + "MemDbCallbacks: node {}/{} - version={}, entries={}, mtime={}", + node.node_id, + node.pid, + index.version, + index.size, + index.mtime + ); + indices.push((node.node_id, node.pid, index)); + } + Err(e) => { + tracing::error!( + "MemDbCallbacks: failed to parse index from node {}/{}: {}", + node.node_id, + node.pid, + e + ); + } + } + } + } + + if indices.is_empty() { + tracing::warn!("MemDbCallbacks: no valid indices from any node"); + return Ok(true); + } + + // Find leader (highest version, or if tie, highest mtime) + // Matches C's dcdb_choose_leader_with_highest_index() + let mut leader_idx = 0; + for i in 1..indices.len() { + let (_, _, current_index) = &indices[i]; + let (_, _, leader_index) = &indices[leader_idx]; + + if current_index > leader_index { + leader_idx = i; + } + } + + let (leader_nodeid, leader_pid, leader_index) = &indices[leader_idx]; + tracing::info!( + "MemDbCallbacks: elected leader: {}/{} (version={}, mtime={})", + leader_nodeid, + leader_pid, + leader_index.version, + leader_index.mtime + ); + + // Build list of synced nodes (those whose index matches leader exactly) + let mut synced_nodes = Vec::new(); + for (nodeid, pid, index) in &indices { + // Check if indices are identical (same version, mtime, and all entries) + let is_synced = index.version == leader_index.version + && index.mtime == leader_index.mtime + && index.size == leader_index.size + && index.entries.len() == leader_index.entries.len() + && index + .entries + .iter() + .zip(leader_index.entries.iter()) + .all(|(a, b)| a.inode == b.inode && a.digest == b.digest); + + if is_synced { + synced_nodes.push((*nodeid, *pid)); + tracing::info!( + "MemDbCallbacks: node {}/{} is synced with leader", + nodeid, + pid + ); + } else { + tracing::info!("MemDbCallbacks: node {}/{} needs updates", nodeid, pid); + } + } + + // Get DFSM instance to check if we're the leader + let dfsm = self.get_dfsm(); + + // Determine if WE are the leader + let we_are_leader = dfsm + .as_ref() + .map(|d| d.get_nodeid() == *leader_nodeid && d.get_pid() == *leader_pid) + .unwrap_or(false); + + // Determine if WE are synced + let we_are_synced = dfsm + .as_ref() + .map(|d| { + let our_nodeid = d.get_nodeid(); + let our_pid = d.get_pid(); + synced_nodes + .iter() + .any(|(nid, pid)| *nid == our_nodeid && *pid == our_pid) + }) + .unwrap_or(false); + + if we_are_leader { + tracing::info!("MemDbCallbacks: we are the leader, sending updates to followers"); + + // Send updates to followers + if let Some(dfsm) = dfsm { + self.send_updates_to_followers(&dfsm, leader_index, &indices)?; + } else { + tracing::error!("MemDbCallbacks: cannot send updates - DFSM not available"); + } + + // Leader is always synced + Ok(true) + } else if we_are_synced { + tracing::info!("MemDbCallbacks: we are synced with leader"); + Ok(true) + } else { + tracing::info!("MemDbCallbacks: we need updates from leader, entering Update mode"); + Ok(false) + } + } + + /// Process incremental update from leader + /// + /// Deserializes a TreeEntry from the wire format and applies it to the local database. + /// Matches C's dcdb_parse_update_inode() function. + fn process_update(&self, nodeid: u32, pid: u32, data: &[u8]) -> Result<()> { + tracing::debug!( + "MemDbCallbacks: processing update from {}/{} ({} bytes)", + nodeid, + pid, + data.len() + ); + + // Deserialize TreeEntry from C wire format + let tree_entry = pmxcfs_memdb::TreeEntry::deserialize_from_update(data) + .context("Failed to deserialize TreeEntry from update message")?; + + tracing::info!( + "MemDbCallbacks: received update for inode {} ({}), version={}", + tree_entry.inode, + tree_entry.name, + tree_entry.version + ); + + // Apply the entry to our local database + self.memdb + .apply_tree_entry(tree_entry) + .context("Failed to apply TreeEntry to database")?; + + tracing::debug!("MemDbCallbacks: update applied successfully"); + Ok(()) + } + + /// Commit synchronized state + fn commit_state(&self) -> Result<()> { + tracing::info!("MemDbCallbacks: committing synchronized state"); + // Database commits are automatic in our implementation + + // Increment all path versions to notify clients of database reload + // Matches C's record_memdb_reload() called in database.c:607 + self.status.increment_all_path_versions(); + + // Recreate VM list after database changes (matching C's bdb_backend_commit_update) + // This ensures VM list is updated whenever the cluster database is synchronized + self.status.scan_vmlist(&self.memdb); + + Ok(()) + } + + /// Called when cluster becomes synced + fn on_synced(&self) { + tracing::info!("MemDbCallbacks: cluster is now fully synchronized"); + } +} + +// Helper methods for MemDbCallbacks (not part of trait) +impl MemDbCallbacks { + /// Handle Create message - create an empty file + /// Returns 0 on success, negative errno on failure + fn handle_create(&self, path: &str, mtime: u32) -> i32 { + match self.memdb.create(path, 0, 0, mtime) { + Ok(_) => { + tracing::info!("MemDbCallbacks: created file '{}'", path); + self.update_version_counters(path); + 0 + } + Err(e) => { + tracing::warn!("MemDbCallbacks: failed to create '{}': {}", path, e); + -libc::EACCES + } + } + } + + /// Handle Mkdir message - create a directory + /// Returns 0 on success, negative errno on failure + fn handle_mkdir(&self, path: &str, mtime: u32) -> i32 { + match self.memdb.create(path, libc::S_IFDIR, 0, mtime) { + Ok(_) => { + tracing::info!("MemDbCallbacks: created directory '{}'", path); + self.update_version_counters(path); + 0 + } + Err(e) => { + tracing::warn!("MemDbCallbacks: failed to mkdir '{}': {}", path, e); + -libc::EACCES + } + } + } + + /// Handle Write message - write data to a file + /// Returns 0 on success, negative errno on failure + fn handle_write(&self, path: &str, offset: u64, data: &[u8], mtime: u32) -> i32 { + // Create file if it doesn't exist + if let Err(e) = self.memdb.exists(path) { + tracing::warn!("MemDbCallbacks: failed to check if '{}' exists: {}", path, e); + return -libc::EIO; + } + + if !self.memdb.exists(path).unwrap_or(false) { + if let Err(e) = self.memdb.create(path, 0, 0, mtime) { + tracing::warn!("MemDbCallbacks: failed to create '{}': {}", path, e); + return -libc::EACCES; + } + } + + // Write data + if !data.is_empty() { + match self.memdb.write(path, offset, 0, mtime, data, false) { + Ok(_) => { + tracing::info!( + "MemDbCallbacks: wrote {} bytes to '{}' at offset {}", + data.len(), + path, + offset + ); + self.update_version_counters(path); + 0 + } + Err(e) => { + tracing::warn!("MemDbCallbacks: failed to write to '{}': {}", path, e); + -libc::EACCES + } + } + } else { + 0 + } + } + + /// Handle Delete message - delete a file or directory + /// Returns 0 on success, negative errno on failure + fn handle_delete(&self, path: &str) -> i32 { + match self.memdb.exists(path) { + Ok(exists) if exists => match self.memdb.delete(path, 0, SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs() as u32) { + Ok(_) => { + tracing::info!("MemDbCallbacks: deleted '{}'", path); + self.update_version_counters(path); + 0 + } + Err(e) => { + tracing::warn!("MemDbCallbacks: failed to delete '{}': {}", path, e); + -libc::EACCES + } + }, + Ok(_) => { + tracing::debug!("MemDbCallbacks: path '{}' already deleted", path); + 0 // Not an error - already deleted + } + Err(e) => { + tracing::warn!("MemDbCallbacks: failed to check if '{}' exists: {}", path, e); + -libc::EIO + } + } + } + + /// Handle Rename message - rename a file or directory + /// Returns 0 on success, negative errno on failure + fn handle_rename(&self, from: &str, to: &str) -> i32 { + match self.memdb.exists(from) { + Ok(exists) if exists => match self.memdb.rename(from, to, 0, SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs() as u32) { + Ok(_) => { + tracing::info!("MemDbCallbacks: renamed '{}' to '{}'", from, to); + self.update_version_counters(from); + self.update_version_counters(to); + 0 + } + Err(e) => { + tracing::warn!("MemDbCallbacks: failed to rename '{}' to '{}': {}", from, to, e); + -libc::EACCES + } + }, + Ok(_) => { + tracing::debug!("MemDbCallbacks: source path '{}' not found for rename", from); + -libc::ENOENT + } + Err(e) => { + tracing::warn!("MemDbCallbacks: failed to check if '{}' exists: {}", from, e); + -libc::EIO + } + } + } + + /// Handle Mtime message - update modification time + /// Returns 0 on success, negative errno on failure + fn handle_mtime(&self, path: &str, nodeid: u32, mtime: u32) -> i32 { + match self.memdb.exists(path) { + Ok(exists) if exists => match self.memdb.set_mtime(path, nodeid, mtime) { + Ok(_) => { + tracing::info!( + "MemDbCallbacks: updated mtime for '{}' from node {}", + path, + nodeid + ); + self.update_version_counters(path); + 0 + } + Err(e) => { + tracing::warn!("MemDbCallbacks: failed to update mtime for '{}': {}", path, e); + -libc::EACCES + } + }, + Ok(_) => { + tracing::debug!("MemDbCallbacks: path '{}' not found for mtime update", path); + -libc::ENOENT + } + Err(e) => { + tracing::warn!("MemDbCallbacks: failed to check if '{}' exists: {}", path, e); + -libc::EIO + } + } + } + + /// Handle UnlockRequest message - check if lock expired and broadcast Unlock if needed + /// + /// Only the leader processes unlock requests (C: dcdb.c:830-838) + fn handle_unlock_request(&self, path: String) -> Result<()> { + tracing::debug!("MemDbCallbacks: processing unlock request for: {}", path); + + // Only the leader (lowest nodeid) should process unlock requests + if let Some(dfsm) = self.get_dfsm() { + if !dfsm.is_leader() { + tracing::debug!("Not leader, ignoring unlock request for: {}", path); + return Ok(()); + } + } else { + tracing::warn!("DFSM not available, cannot process unlock request"); + return Ok(()); + } + + // Get the lock entry to compute checksum + if let Some(entry) = self.memdb.lookup_path(&path) + && entry.is_dir() + && pmxcfs_memdb::is_lock_path(&path) + { + let csum = entry.compute_checksum(); + + // Check if lock expired (C: dcdb.c:834) + if self.memdb.lock_expired(&path, &csum) { + tracing::info!("Lock expired, sending unlock message for: {}", path); + // Send Unlock message to cluster (C: dcdb.c:836) + self.get_dfsm().broadcast(FuseMessage::Unlock { path: path.clone() }); + } else { + tracing::debug!("Lock not expired for: {}", path); + } + } + + Ok(()) + } + + /// Handle Unlock message - delete an expired lock + /// + /// This is broadcast by the leader when a lock expires (C: dcdb.c:834) + fn handle_unlock(&self, path: String) -> Result<()> { + tracing::info!("MemDbCallbacks: processing unlock message for: {}", path); + + // Delete the lock directory + if let Err(e) = self.memdb.delete(&path, 0, SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs() as u32) { + tracing::warn!("Failed to delete lock {}: {}", path, e); + } else { + tracing::info!("Successfully deleted lock: {}", path); + self.update_version_counters(&path); + } + + Ok(()) + } + + /// Send updates to followers (leader only) + /// + /// Compares the leader index with each follower and sends Update messages + /// for entries that differ. Matches C's dcdb_create_and_send_updates(). + fn send_updates_to_followers( + &self, + dfsm: &pmxcfs_dfsm::Dfsm, + leader_index: &MemDbIndex, + all_indices: &[(u32, u32, MemDbIndex)], + ) -> Result<()> { + use std::collections::HashSet; + + // Collect all inodes that need updating across all followers + let mut inodes_to_update: HashSet = HashSet::new(); + let mut any_follower_needs_updates = false; + + for (_nodeid, _pid, follower_index) in all_indices { + // Skip if this is us (the leader) - check if indices are identical + // Must match the same check in process_state_update() + let is_synced = follower_index.version == leader_index.version + && follower_index.mtime == leader_index.mtime + && follower_index.size == leader_index.size + && follower_index.entries.len() == leader_index.entries.len(); + + if is_synced { + continue; + } + + // This follower needs updates + any_follower_needs_updates = true; + + // Find differences between leader and this follower + let diffs = leader_index.find_differences(follower_index); + tracing::debug!( + "MemDbCallbacks: found {} differing inodes for follower", + diffs.len() + ); + inodes_to_update.extend(diffs); + } + + // If no follower needs updates at all, we're done + if !any_follower_needs_updates { + tracing::info!("MemDbCallbacks: no updates needed, all nodes are synced"); + dfsm.send_update_complete()?; + return Ok(()); + } + + tracing::info!( + "MemDbCallbacks: sending updates ({} differing entries)", + inodes_to_update.len() + ); + + // Send Update message for each differing inode + // IMPORTANT: Do NOT send the root directory entry (inode ROOT_INODE)! + // C uses inode 0 for root and never stores it in the database. + // The root exists only in memory and is recreated on database reload. + // Only send regular files and directories (inode > ROOT_INODE). + let mut sent_count = 0; + for inode in inodes_to_update { + // Skip root - it should never be sent as an UPDATE + if inode == pmxcfs_memdb::ROOT_INODE { + tracing::debug!("MemDbCallbacks: skipping root entry (inode {})", inode); + continue; + } + + // Look up the TreeEntry for this inode + match self.memdb.get_entry_by_inode(inode) { + Some(tree_entry) => { + tracing::info!( + "MemDbCallbacks: sending UPDATE for inode {:#018x} (name='{}', parent={:#018x}, type={}, version={}, size={})", + inode, + tree_entry.name, + tree_entry.parent, + tree_entry.entry_type, + tree_entry.version, + tree_entry.size + ); + + if let Err(e) = dfsm.send_update(tree_entry) { + tracing::error!( + "MemDbCallbacks: failed to send update for inode {}: {}", + inode, + e + ); + // Continue sending other updates even if one fails + } else { + sent_count += 1; + } + } + None => { + tracing::error!( + "MemDbCallbacks: cannot find TreeEntry for inode {} in database", + inode + ); + } + } + } + + tracing::info!("MemDbCallbacks: sent {} updates", sent_count); + + // Send UpdateComplete to signal end of updates + dfsm.send_update_complete()?; + tracing::info!("MemDbCallbacks: sent UpdateComplete"); + + Ok(()) + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/plugins/README.md b/src/pmxcfs-rs/pmxcfs/src/plugins/README.md new file mode 100644 index 000000000..53b0249cb --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/plugins/README.md @@ -0,0 +1,203 @@ +# PMXCFS Plugin System + +## Overview + +The plugin system provides dynamic virtual files in the `/etc/pve` filesystem that generate content on-the-fly. These files provide cluster status, configuration, and monitoring data. + +## Plugin Types + +### Function Plugins + +These plugins generate dynamic content when read: + +- `.version` - Cluster version and status information +- `.members` - Cluster membership information +- `.vmlist` - List of VMs and containers +- `.rrd` - Round-robin database dump +- `.clusterlog` - Cluster log entries +- `.debug` - Debug mode toggle + +### Symlink Plugins + +These plugins create symlinks to node-specific directories: + +- `local/` → `nodes/{nodename}/` +- `qemu-server/` → `nodes/{nodename}/qemu-server/` +- `lxc/` → `nodes/{nodename}/lxc/` +- `openvz/` → `nodes/{nodename}/openvz/` (legacy) + +## Plugin File Formats + +### .version Plugin + +**Format**: JSON + +**Fields**: +- `api` - API version (integer) +- `clinfo` - Cluster info version (integer) +- `cluster` - Cluster information object + - `name` - Cluster name (string) + - `nodes` - Number of nodes (integer) + - `quorate` - Quorum status (1 or 0) +- `starttime` - Daemon start time (Unix timestamp) +- `version` - Software version (string) +- `vmlist` - VM list version (integer) + +**Example**: +```json +{ + "api": 1, + "clinfo": 2, + "cluster": { + "name": "pmxcfs", + "nodes": 3, + "quorate": 1 + }, + "starttime": 1699876543, + "version": "9.0.6", + "vmlist": 5 +} +``` + +### .members Plugin + +**Format**: JSON with sections + +**Fields**: +- `cluster` - Cluster information object + - `name` - Cluster name (string) + - `version` - Cluster version (integer) + - `nodes` - Number of nodes (integer) + - `quorate` - Quorum status (1 or 0) +- `nodelist` - Array of node objects + - `id` - Node ID (integer) + - `name` - Node name (string) + - `online` - Online status (1 or 0) + - `ip` - Node IP address (string) + +**Example**: +```json +{ + "cluster": { + "name": "pmxcfs", + "version": 2, + "nodes": 3, + "quorate": 1 + }, + "nodelist": [ + { + "id": 1, + "name": "node1", + "online": 1, + "ip": "192.168.1.10" + }, + { + "id": 2, + "name": "node2", + "online": 1, + "ip": "192.168.1.11" + }, + { + "id": 3, + "name": "node3", + "online": 0, + "ip": "192.168.1.12" + } + ] +} +``` + +### .vmlist Plugin + +**Format**: INI-style with sections + +**Sections**: +- `[qemu]` - QEMU/KVM virtual machines +- `[lxc]` - Linux containers + +**Entry Format**: `VMIDNODEVERSION` +- `VMID` - VM/container ID (integer) +- `NODE` - Node name where the VM is defined (string) +- `VERSION` - Configuration version (integer) + +**Example**: +``` +[qemu] +100 node1 2 +101 node2 1 + +[lxc] +200 node1 1 +201 node3 2 +``` + +### .rrd Plugin + +**Format**: Text format with schema-based key-value pairs (one per line) + +**Line Format**: `{schema}/{id}:{timestamp}:{field1}:{field2}:...` +- `schema` - RRD schema name (e.g., `pve-node-9.0`, `pve-vm-9.0`, `pve-storage-9.0`) +- `id` - Resource identifier (node name, VMID, or storage name) +- `timestamp` - Unix timestamp +- `fields` - Colon-separated metric values + +Schemas include node metrics, VM metrics, and storage metrics with appropriate fields for each type. + +### .clusterlog Plugin + +**Format**: JSON with data array + +**Fields**: +- `data` - Array of log entry objects + - `time` - Unix timestamp (integer) + - `node` - Node name (string) + - `priority` - Syslog priority (integer) + - `ident` - Process identifier (string) + - `tag` - Log tag (string) + - `message` - Log message (string) + +**Example**: +```json +{ + "data": [ + { + "time": 1699876543, + "node": "node1", + "priority": 6, + "ident": "pvedaemon", + "tag": "task", + "message": "Started VM 100" + } + ] +} +``` + +### .debug Plugin + +**Format**: Plain text (single character) + +**Values**: +- `0` - Debug mode disabled +- `1` - Debug mode enabled + +**Behavior**: +- Reading returns current debug state +- Writing `1` enables debug logging +- Writing `0` disables debug logging + +## Implementation Details + +### Registry + +The plugin registry (`registry.rs`) maintains all plugin definitions and handles lookups. + +### Plugin Trait + +All plugins implement a common trait that defines: +- `get_content()` - Generate plugin content +- `set_content()` - Handle writes (for `.debug` plugin) +- `get_attr()` - Return file attributes + +### Integration with FUSE + +Plugins are integrated into the FUSE filesystem layer and appear as regular files in `/etc/pve`. diff --git a/src/pmxcfs-rs/pmxcfs/src/plugins/clusterlog.rs b/src/pmxcfs-rs/pmxcfs/src/plugins/clusterlog.rs new file mode 100644 index 000000000..d9fd59c44 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/plugins/clusterlog.rs @@ -0,0 +1,293 @@ +/// .clusterlog Plugin - Cluster Log Entries +/// +/// This plugin provides cluster log entries in JSON format matching C implementation: +/// ```json +/// { +/// "data": [ +/// {"uid": 1, "time": 1234567890, "pri": 6, "tag": "cluster", "pid": 0, "node": "node1", "user": "root", "msg": "starting cluster log"} +/// ] +/// } +/// ``` +/// +/// The format is compatible with the C implementation which uses clog_dump_json +/// to write JSON data to clients. +/// +/// Default max_entries: 50 (matching C implementation) +use pmxcfs_status::Status; +use serde_json::json; +use std::sync::Arc; + +use super::Plugin; + +/// Clusterlog plugin - provides cluster log entries +pub struct ClusterlogPlugin { + status: Arc, + max_entries: usize, +} + +impl ClusterlogPlugin { + pub fn new(status: Arc) -> Self { + Self { + status, + max_entries: 50, + } + } + + /// Create with custom entry limit + #[allow(dead_code)] // Used in tests for custom entry limits + pub fn new_with_limit(status: Arc, max_entries: usize) -> Self { + Self { + status, + max_entries, + } + } + + /// Generate clusterlog content (C-compatible JSON format) + fn generate_content(&self) -> String { + let entries = self.status.get_log_entries(self.max_entries); + + // Convert to JSON format matching C implementation + // C format: {"data": [{"uid": ..., "time": ..., "pri": ..., "tag": ..., "pid": ..., "node": ..., "user": ..., "msg": ...}]} + let data: Vec<_> = entries + .iter() + .enumerate() + .map(|(idx, entry)| { + json!({ + "uid": idx + 1, // Sequential ID starting from 1 + "time": entry.timestamp, // Unix timestamp + "pri": entry.priority, // Priority level (numeric) + "tag": entry.tag, // Tag field + "pid": 0, // Process ID (we don't track this, set to 0) + "node": entry.node, // Node name + "user": entry.ident, // User/ident field + "msg": entry.message // Log message + }) + }) + .collect(); + + let result = json!({ + "data": data + }); + + // Convert to JSON string with formatting + serde_json::to_string_pretty(&result).unwrap_or_else(|_| "{}".to_string()) + } +} + +impl Plugin for ClusterlogPlugin { + fn name(&self) -> &str { + ".clusterlog" + } + + fn read(&self) -> anyhow::Result> { + Ok(self.generate_content().into_bytes()) + } + + fn mode(&self) -> u32 { + 0o440 + } +} + +#[cfg(test)] +mod tests { + use super::*; + use pmxcfs_status as status; + use std::time::{SystemTime, UNIX_EPOCH}; + + /// Test helper: add a log message to the cluster log + fn add_log_message( + status: &status::Status, + node: String, + priority: u8, + ident: String, + tag: String, + message: String, + ) { + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(); + let entry = status::ClusterLogEntry { + uid: 0, + timestamp, + priority, + tag, + pid: 0, + node, + ident, + message, + }; + status.add_log_entry(entry); + } + + #[tokio::test] + async fn test_clusterlog_format() { + // Initialize status subsystem without RRD persistence (not needed for test) + let config = pmxcfs_test_utils::create_test_config(false); + let status = status::init_with_config(config); + + // Test that it returns valid JSON + let plugin = ClusterlogPlugin::new(status); + let result = plugin.generate_content(); + + // Should be valid JSON + assert!( + serde_json::from_str::(&result).is_ok(), + "Should return valid JSON" + ); + } + + #[tokio::test] + async fn test_clusterlog_with_entries() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = status::init_with_config(config); + + // Clear any existing log entries from other tests + status.clear_cluster_log(); + + // Add some log entries + add_log_message( + &status, + "node1".to_string(), + 6, // Info priority + "pmxcfs".to_string(), + "cluster".to_string(), + "Node joined cluster".to_string(), + ); + + add_log_message( + &status, + "node2".to_string(), + 4, // Warning priority + "pvestatd".to_string(), + "status".to_string(), + "High load detected".to_string(), + ); + + // Get clusterlog + let plugin = ClusterlogPlugin::new(status); + let result = plugin.generate_content(); + + // Parse JSON + let json: serde_json::Value = serde_json::from_str(&result).expect("Should be valid JSON"); + + // Verify structure + assert!(json.get("data").is_some(), "Should have 'data' field"); + let data = json["data"].as_array().expect("data should be array"); + + // Should have at least 2 entries + assert!(data.len() >= 2, "Should have at least 2 entries"); + + // Verify first entry has all required fields + let first_entry = &data[0]; + assert!(first_entry.get("uid").is_some(), "Should have uid"); + assert!(first_entry.get("time").is_some(), "Should have time"); + assert!(first_entry.get("pri").is_some(), "Should have pri"); + assert!(first_entry.get("tag").is_some(), "Should have tag"); + assert!(first_entry.get("pid").is_some(), "Should have pid"); + assert!(first_entry.get("node").is_some(), "Should have node"); + assert!(first_entry.get("user").is_some(), "Should have user"); + assert!(first_entry.get("msg").is_some(), "Should have msg"); + + // Verify uid starts at 1 + assert_eq!( + first_entry["uid"].as_u64().unwrap(), + 1, + "First uid should be 1" + ); + } + + #[tokio::test] + async fn test_clusterlog_entry_limit() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = status::init_with_config(config); + + // Add 10 log entries + for i in 0..10 { + add_log_message( + &status, + format!("node{i}"), + 6, + "test".to_string(), + "test".to_string(), + format!("Test message {i}"), + ); + } + + // Request only 5 entries + let plugin = ClusterlogPlugin::new_with_limit(status, 5); + let result = plugin.generate_content(); + let json: serde_json::Value = serde_json::from_str(&result).unwrap(); + let data = json["data"].as_array().unwrap(); + + // Should have at most 5 entries + assert!(data.len() <= 5, "Should respect entry limit"); + } + + #[tokio::test] + async fn test_clusterlog_field_types() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = status::init_with_config(config); + + add_log_message( + &status, + "testnode".to_string(), + 5, + "testident".to_string(), + "testtag".to_string(), + "Test message content".to_string(), + ); + + let plugin = ClusterlogPlugin::new(status); + let result = plugin.generate_content(); + let json: serde_json::Value = serde_json::from_str(&result).unwrap(); + let data = json["data"].as_array().unwrap(); + + if let Some(entry) = data.first() { + // uid should be number + assert!(entry["uid"].is_u64(), "uid should be number"); + + // time should be number + assert!(entry["time"].is_u64(), "time should be number"); + + // pri should be number + assert!(entry["pri"].is_u64(), "pri should be number"); + + // tag should be string + assert!(entry["tag"].is_string(), "tag should be string"); + assert_eq!(entry["tag"].as_str().unwrap(), "testtag"); + + // pid should be number (0) + assert!(entry["pid"].is_u64(), "pid should be number"); + assert_eq!(entry["pid"].as_u64().unwrap(), 0); + + // node should be string + assert!(entry["node"].is_string(), "node should be string"); + assert_eq!(entry["node"].as_str().unwrap(), "testnode"); + + // user should be string + assert!(entry["user"].is_string(), "user should be string"); + assert_eq!(entry["user"].as_str().unwrap(), "testident"); + + // msg should be string + assert!(entry["msg"].is_string(), "msg should be string"); + assert_eq!(entry["msg"].as_str().unwrap(), "Test message content"); + } + } + + #[tokio::test] + async fn test_clusterlog_empty() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = status::init_with_config(config); + + // Get clusterlog without any entries (or clear existing ones) + let plugin = ClusterlogPlugin::new_with_limit(status, 0); + let result = plugin.generate_content(); + let json: serde_json::Value = serde_json::from_str(&result).unwrap(); + + // Should have data field with empty array + assert!(json.get("data").is_some()); + let data = json["data"].as_array().unwrap(); + assert_eq!(data.len(), 0, "Should have empty data array"); + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/plugins/debug.rs b/src/pmxcfs-rs/pmxcfs/src/plugins/debug.rs new file mode 100644 index 000000000..a8e2c8851 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/plugins/debug.rs @@ -0,0 +1,145 @@ +/// .debug Plugin - Debug Level Control +/// +/// This plugin provides read/write access to debug settings, matching the C implementation. +/// Format: "0\n" or "1\n" (debug level as text) +/// +/// When written, this actually changes the tracing filter level at runtime, +/// matching the C implementation's behavior where cfs.debug controls cfs_debug() macro output. +use anyhow::Result; +use pmxcfs_config::Config; +use std::sync::Arc; + +use super::Plugin; + +/// Debug plugin - provides debug level control +pub struct DebugPlugin { + config: Arc, +} + +impl DebugPlugin { + pub fn new(config: Arc) -> Self { + Self { config } + } + + /// Generate debug setting content (read operation) + fn generate_content(&self) -> String { + let level = self.config.debug_level(); + format!("{level}\n") + } + + /// Handle debug plugin write operation + /// + /// This changes the tracing filter level at runtime to match C implementation behavior. + /// In C, writing to .debug sets cfs.debug which controls cfs_debug() macro output. + fn handle_write(&self, data: &str) -> Result<()> { + let level: u8 = data + .trim() + .parse() + .map_err(|_| anyhow::anyhow!("Invalid debug level: must be a number"))?; + + // Update debug level in config + self.config.set_debug_level(level); + + // Actually change the tracing filter level at runtime + // This matches C implementation where cfs.debug controls logging + if let Err(e) = crate::logging::set_debug_level(level) { + tracing::error!("Failed to update log level: {}", e); + // Don't fail - just log error. The level is still stored. + } + + if level > 0 { + tracing::info!("Debug mode enabled (level {})", level); + tracing::debug!("Debug logging is now active"); + } else { + tracing::info!("Debug mode disabled"); + } + + Ok(()) + } +} + +impl Plugin for DebugPlugin { + fn name(&self) -> &str { + ".debug" + } + + fn read(&self) -> anyhow::Result> { + Ok(self.generate_content().into_bytes()) + } + + fn write(&self, data: &[u8]) -> Result<()> { + let text = std::str::from_utf8(data)?; + self.handle_write(text) + } + + fn mode(&self) -> u32 { + 0o640 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_debug_read() { + let config = Arc::new(Config::new( + "test".to_string(), + "127.0.0.1".parse().unwrap(), + 33, + false, + false, + "pmxcfs".to_string(), + )); + let plugin = DebugPlugin::new(config); + let result = plugin.generate_content(); + assert_eq!(result, "0\n"); + } + + #[test] + fn test_debug_write() { + let config = Arc::new(Config::new( + "test".to_string(), + "127.0.0.1".parse().unwrap(), + 33, + false, + false, + "pmxcfs".to_string(), + )); + + let plugin = DebugPlugin::new(config.clone()); + let result = plugin.handle_write("1"); + // Note: This will fail to actually change the log level if the reload handle + // hasn't been initialized (which is expected in unit tests without full setup). + // The function should still succeed - it just warns about not being able to reload. + assert!(result.is_ok()); + + // Verify the stored level changed + assert_eq!(config.debug_level(), 1); + + // Test setting it back to 0 + let result = plugin.handle_write("0"); + assert!(result.is_ok()); + assert_eq!(config.debug_level(), 0); + } + + #[test] + fn test_invalid_debug_level() { + let config = Arc::new(Config::new( + "test".to_string(), + "127.0.0.1".parse().unwrap(), + 33, + false, + false, + "pmxcfs".to_string(), + )); + + let plugin = DebugPlugin::new(config.clone()); + + let result = plugin.handle_write("invalid"); + assert!(result.is_err()); + + let result = plugin.handle_write(""); + assert!(result.is_err()); + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/plugins/members.rs b/src/pmxcfs-rs/pmxcfs/src/plugins/members.rs new file mode 100644 index 000000000..6a584a45f --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/plugins/members.rs @@ -0,0 +1,198 @@ +/// .members Plugin - Cluster Member Information +/// +/// This plugin provides information about cluster members in JSON format: +/// { +/// "nodename": "node1", +/// "version": 5, +/// "cluster": { +/// "name": "mycluster", +/// "version": 1, +/// "nodes": 3, +/// "quorate": 1 +/// }, +/// "nodelist": { +/// "node1": { "id": 1, "online": 1, "ip": "192.168.1.10" }, +/// "node2": { "id": 2, "online": 1, "ip": "192.168.1.11" } +/// } +/// } +use pmxcfs_config::Config; +use pmxcfs_status::Status; +use serde_json::json; +use std::sync::Arc; + +use super::Plugin; + +/// Members plugin - provides cluster member information +pub struct MembersPlugin { + config: Arc, + status: Arc, +} + +impl MembersPlugin { + pub fn new(config: Arc, status: Arc) -> Self { + Self { config, status } + } + + /// Generate members information content + fn generate_content(&self) -> String { + let nodename = self.config.nodename(); + let cluster_name = self.config.cluster_name(); + + // Get cluster info from status (matches C's cfs_status access) + let cluster_info = self.status.get_cluster_info(); + let cluster_version = self.status.get_cluster_version(); + + // Get quorum status and members from status + let quorate = self.status.is_quorate(); + + // Get cluster members (for online status tracking) + let members = self.status.get_members(); + + // Create a set of online node IDs from current members + let mut online_nodes = std::collections::HashSet::new(); + for member in &members { + online_nodes.insert(member.node_id); + } + + // Count unique nodes + let node_count = online_nodes.len(); + + // Build nodelist from cluster_info + let mut nodelist = serde_json::Map::new(); + + if let Some(cluster_info) = cluster_info { + // Add all registered nodes to nodelist + for (name, node_id) in &cluster_info.nodes_by_name { + if let Some(node) = cluster_info.nodes_by_id.get(node_id) { + let is_online = online_nodes.contains(&node.node_id); + let node_info = json!({ + "id": node.node_id, + "online": if is_online { 1 } else { 0 }, + "ip": node.ip + }); + nodelist.insert(name.clone(), node_info); + } + } + + // Build the complete response + let response = json!({ + "nodename": nodename, + "version": cluster_version, + "cluster": { + "name": cluster_info.cluster_name, + "version": 1, // Cluster format version (always 1) + "nodes": node_count.max(1), // At least 1 (ourselves) + "quorate": if quorate { 1 } else { 0 } + }, + "nodelist": nodelist + }); + + response.to_string() + } else { + // No cluster info yet, return minimal response with just local node + let node_info = json!({ + "id": 0, // Unknown ID + "online": 1, // Assume online since we're running + "ip": self.config.node_ip() + }); + + let mut nodelist = serde_json::Map::new(); + nodelist.insert(nodename.to_string(), node_info); + + let response = json!({ + "nodename": nodename, + "version": cluster_version, + "cluster": { + "name": cluster_name, + "version": 1, + "nodes": 1, + "quorate": if quorate { 1 } else { 0 } + }, + "nodelist": nodelist + }); + + response.to_string() + } + } +} + +impl Plugin for MembersPlugin { + fn name(&self) -> &str { + ".members" + } + + fn read(&self) -> anyhow::Result> { + Ok(self.generate_content().into_bytes()) + } + + fn mode(&self) -> u32 { + 0o440 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_members_format() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config); + + let config = Arc::new(Config::new( + "testnode".to_string(), + "127.0.0.1".parse().unwrap(), + 33, + false, + false, + "testcluster".to_string(), + )); + + // Initialize cluster + status.init_cluster("testcluster".to_string()); + + let plugin = MembersPlugin::new(config, status); + let result = plugin.generate_content(); + let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); + + // Should have nodename + assert_eq!(parsed["nodename"], "testnode"); + + // Should have version + assert!(parsed["version"].is_number()); + + // Should have cluster info + assert_eq!(parsed["cluster"]["name"], "testcluster"); + assert!(parsed["cluster"]["nodes"].is_number()); + assert!(parsed["cluster"]["quorate"].is_number()); + + // Should have nodelist (might be empty without actual cluster members) + assert!(parsed["nodelist"].is_object()); + } + + #[tokio::test] + async fn test_members_no_cluster() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config); + + let config = Arc::new(Config::new( + "standalone".to_string(), + "192.168.1.100".parse().unwrap(), + 33, + false, + false, + "testcluster".to_string(), + )); + + // Don't set cluster info - should still work + let plugin = MembersPlugin::new(config, status); + let result = plugin.generate_content(); + let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); + + // Should have minimal response + assert_eq!(parsed["nodename"], "standalone"); + assert!(parsed["cluster"].is_object()); + assert!(parsed["nodelist"].is_object()); + assert!(parsed["nodelist"]["standalone"].is_object()); + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/plugins/mod.rs b/src/pmxcfs-rs/pmxcfs/src/plugins/mod.rs new file mode 100644 index 000000000..9af9f8024 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/plugins/mod.rs @@ -0,0 +1,30 @@ +/// Plugin system for special files and dynamic content +/// +/// This module implements plugins for: +/// - func: Dynamic files generated by callbacks (.version, .members, etc.) +/// - link: Symbolic links +/// +/// Each plugin is implemented in its own source file: +/// - version.rs: .version plugin - cluster version information +/// - members.rs: .members plugin - cluster member list +/// - vmlist.rs: .vmlist plugin - VM/CT list +/// - rrd.rs: .rrd plugin - system metrics +/// - clusterlog.rs: .clusterlog plugin - cluster log entries +/// - debug.rs: .debug plugin - debug level control +mod clusterlog; +mod debug; +mod members; +mod registry; +mod rrd; +mod types; +mod version; +mod vmlist; + +// Re-export core types (only Plugin trait is used outside this module) +pub use types::Plugin; + +// Re-export registry +pub use registry::{PluginRegistry, init_plugins}; + +#[cfg(test)] +pub use registry::init_plugins_for_test; diff --git a/src/pmxcfs-rs/pmxcfs/src/plugins/registry.rs b/src/pmxcfs-rs/pmxcfs/src/plugins/registry.rs new file mode 100644 index 000000000..425af9d2e --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/plugins/registry.rs @@ -0,0 +1,305 @@ +/// Plugin registry and initialization +use parking_lot::RwLock; +use std::collections::HashMap; +use std::sync::Arc; + +use super::clusterlog::ClusterlogPlugin; +use super::debug::DebugPlugin; +use super::members::MembersPlugin; +use super::rrd::RrdPlugin; +use super::types::{LinkPlugin, Plugin}; +use super::version::VersionPlugin; +use super::vmlist::VmlistPlugin; + +/// Plugin registry +pub struct PluginRegistry { + plugins: RwLock>>, +} + +impl Default for PluginRegistry { + fn default() -> Self { + Self::new() + } +} + +impl PluginRegistry { + pub fn new() -> Self { + Self { + plugins: RwLock::new(HashMap::new()), + } + } + + /// Register a plugin + pub fn register(&self, plugin: Arc) { + let name = plugin.name().to_string(); + self.plugins.write().insert(name, plugin); + } + + /// Get a plugin by name + pub fn get(&self, name: &str) -> Option> { + self.plugins.read().get(name).cloned() + } + + /// Check if a path is a plugin + pub fn is_plugin(&self, name: &str) -> bool { + self.plugins.read().contains_key(name) + } + + /// List all plugin names + pub fn list(&self) -> Vec { + self.plugins.read().keys().cloned().collect() + } +} + +/// Initialize the plugin registry with default plugins +pub fn init_plugins( + config: Arc, + status: Arc, +) -> Arc { + tracing::info!("Initializing plugin system for node: {}", config.nodename()); + + let registry = Arc::new(PluginRegistry::new()); + + // .version - cluster version information + let version_plugin = Arc::new(VersionPlugin::new(config.clone(), status.clone())); + registry.register(version_plugin); + + // .members - cluster member list + let members_plugin = Arc::new(MembersPlugin::new(config.clone(), status.clone())); + registry.register(members_plugin); + + // .vmlist - VM list + let vmlist_plugin = Arc::new(VmlistPlugin::new(status.clone())); + registry.register(vmlist_plugin); + + // .rrd - RRD data + let rrd_plugin = Arc::new(RrdPlugin::new(status.clone())); + registry.register(rrd_plugin); + + // .clusterlog - cluster log + let clusterlog_plugin = Arc::new(ClusterlogPlugin::new(status.clone())); + registry.register(clusterlog_plugin); + + // .debug - debug settings (read/write) + let debug_plugin = Arc::new(DebugPlugin::new(config.clone())); + registry.register(debug_plugin); + + // Symbolic link plugins - point to nodes/{nodename}/ subdirectories + // These provide convenient access to node-specific directories from the root + let nodename = config.nodename(); + + // local -> nodes/{nodename}/local + let local_link = Arc::new(LinkPlugin::new("local", format!("nodes/{nodename}"))); + registry.register(local_link); + + // qemu-server -> nodes/{nodename}/qemu-server + let qemu_link = Arc::new(LinkPlugin::new( + "qemu-server", + format!("nodes/{nodename}/qemu-server"), + )); + registry.register(qemu_link); + + // openvz -> nodes/{nodename}/openvz (legacy support) + let openvz_link = Arc::new(LinkPlugin::new( + "openvz", + format!("nodes/{nodename}/openvz"), + )); + registry.register(openvz_link); + + // lxc -> nodes/{nodename}/lxc + let lxc_link = Arc::new(LinkPlugin::new("lxc", format!("nodes/{nodename}/lxc"))); + registry.register(lxc_link); + + tracing::info!( + "Registered {} plugins ({} func plugins, 4 link plugins)", + registry.list().len(), + registry.list().len() - 4 + ); + + registry +} + +#[cfg(test)] +/// Test-only helper to create a plugin registry with a simple nodename +pub fn init_plugins_for_test(nodename: &str) -> Arc { + use pmxcfs_config::Config; + + // Create config with the specified nodename for testing + let config = Config::shared( + nodename.to_string(), + "127.0.0.1".parse().unwrap(), + 33, // www-data gid + false, + false, + "pmxcfs".to_string(), + ); + let status = pmxcfs_status::init_with_config(config.clone()); + + init_plugins(config, status) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_registry_func_plugins_exist() { + let registry = init_plugins_for_test("testnode"); + + let func_plugins = vec![ + ".version", + ".members", + ".vmlist", + ".rrd", + ".clusterlog", + ".debug", + ]; + + for plugin_name in func_plugins { + assert!( + registry.is_plugin(plugin_name), + "{plugin_name} should be registered" + ); + + let plugin = registry.get(plugin_name); + assert!(plugin.is_some(), "{plugin_name} should be accessible"); + assert_eq!(plugin.unwrap().name(), plugin_name); + } + } + + #[test] + fn test_registry_link_plugins_exist() { + let registry = init_plugins_for_test("testnode"); + + let link_plugins = vec!["local", "qemu-server", "openvz", "lxc"]; + + for plugin_name in link_plugins { + assert!( + registry.is_plugin(plugin_name), + "{plugin_name} link should be registered" + ); + + let plugin = registry.get(plugin_name); + assert!(plugin.is_some(), "{plugin_name} link should be accessible"); + assert_eq!(plugin.unwrap().name(), plugin_name); + } + } + + #[test] + fn test_registry_link_targets_use_nodename() { + // Test with different nodenames + let test_cases = vec![ + ("node1", "nodes/node1"), + ("pve-test", "nodes/pve-test"), + ("cluster-node-03", "nodes/cluster-node-03"), + ]; + + for (nodename, expected_local_target) in test_cases { + let registry = init_plugins_for_test(nodename); + + // Test local link + let local = registry.get("local").expect("local link should exist"); + let data = local.read().expect("should read link target"); + let target = String::from_utf8(data).expect("target should be UTF-8"); + assert_eq!( + target, expected_local_target, + "local link should point to nodes/{nodename} for {nodename}" + ); + + // Test qemu-server link + let qemu = registry + .get("qemu-server") + .expect("qemu-server link should exist"); + let data = qemu.read().expect("should read link target"); + let target = String::from_utf8(data).expect("target should be UTF-8"); + assert_eq!( + target, + format!("nodes/{nodename}/qemu-server"), + "qemu-server link should include nodename" + ); + + // Test lxc link + let lxc = registry.get("lxc").expect("lxc link should exist"); + let data = lxc.read().expect("should read link target"); + let target = String::from_utf8(data).expect("target should be UTF-8"); + assert_eq!( + target, + format!("nodes/{nodename}/lxc"), + "lxc link should include nodename" + ); + + // Test openvz link (legacy) + let openvz = registry.get("openvz").expect("openvz link should exist"); + let data = openvz.read().expect("should read link target"); + let target = String::from_utf8(data).expect("target should be UTF-8"); + assert_eq!( + target, + format!("nodes/{nodename}/openvz"), + "openvz link should include nodename" + ); + } + } + + #[test] + fn test_registry_nonexistent_plugin() { + let registry = init_plugins_for_test("testnode"); + + assert!(!registry.is_plugin(".nonexistent")); + assert!(registry.get(".nonexistent").is_none()); + } + + #[test] + fn test_registry_plugin_modes() { + let registry = init_plugins_for_test("testnode"); + + // .debug should be writable (0o640) + let debug = registry.get(".debug").expect(".debug should exist"); + assert_eq!(debug.mode(), 0o640, ".debug should have writable mode"); + + // All other func plugins should be read-only (0o440) + let readonly_plugins = vec![".version", ".members", ".vmlist", ".rrd", ".clusterlog"]; + for plugin_name in readonly_plugins { + let plugin = registry.get(plugin_name).unwrap(); + assert_eq!(plugin.mode(), 0o440, "{plugin_name} should be read-only"); + } + + // Link plugins should have 0o777 + let links = vec!["local", "qemu-server", "openvz", "lxc"]; + for link_name in links { + let link = registry.get(link_name).unwrap(); + assert_eq!(link.mode(), 0o777, "{link_name} should have 777 mode"); + } + } + + #[test] + fn test_link_plugins_are_symlinks() { + let registry = init_plugins_for_test("testnode"); + + // Link plugins should be identified as symlinks + let link_plugins = vec!["local", "qemu-server", "openvz", "lxc"]; + for link_name in link_plugins { + let link = registry.get(link_name).unwrap(); + assert!( + link.is_symlink(), + "{link_name} should be identified as a symlink" + ); + } + + // Func plugins should NOT be identified as symlinks + let func_plugins = vec![ + ".version", + ".members", + ".vmlist", + ".rrd", + ".clusterlog", + ".debug", + ]; + for plugin_name in func_plugins { + let plugin = registry.get(plugin_name).unwrap(); + assert!( + !plugin.is_symlink(), + "{plugin_name} should NOT be identified as a symlink" + ); + } + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/plugins/rrd.rs b/src/pmxcfs-rs/pmxcfs/src/plugins/rrd.rs new file mode 100644 index 000000000..406b76727 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/plugins/rrd.rs @@ -0,0 +1,97 @@ +/// .rrd Plugin - RRD (Round-Robin Database) Metrics +/// +/// This plugin provides system metrics in text format matching C implementation: +/// ```text +/// pve2-node/nodename:timestamp:uptime:loadavg:maxcpu:cpu:iowait:memtotal:memused:... +/// pve2.3-vm/100:timestamp:status:uptime:... +/// ``` +/// +/// The format is compatible with the C implementation which uses rrd_update +/// to write data to RRD files on disk. +/// +/// Data aging: Entries older than 5 minutes are automatically removed. +use pmxcfs_status::Status; +use std::sync::Arc; + +use super::Plugin; + +/// RRD plugin - provides system metrics +pub struct RrdPlugin { + status: Arc, +} + +impl RrdPlugin { + pub fn new(status: Arc) -> Self { + Self { status } + } + + /// Generate RRD content (C-compatible text format) + fn generate_content(&self) -> String { + // Get RRD dump in text format from status module + // Format: "key:data\n" for each entry + // The status module handles data aging (removes entries >5 minutes old) + self.status.get_rrd_dump() + } +} + +impl Plugin for RrdPlugin { + fn name(&self) -> &str { + ".rrd" + } + + fn read(&self) -> anyhow::Result> { + Ok(self.generate_content().into_bytes()) + } + + fn mode(&self) -> u32 { + 0o440 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_rrd_empty() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config); + + let plugin = RrdPlugin::new(status); + let result = plugin.generate_content(); + // Empty RRD data should return just NUL terminator (C compatibility) + assert_eq!(result, "\0"); + } + + #[tokio::test] + async fn test_rrd_with_data() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config); + + // Add some RRD data with proper schema + // Note: RRD file creation will fail (no rrdcached in tests), but in-memory storage works + // Node RRD (pve2 format): timestamp + 12 values + // (loadavg, maxcpu, cpu, iowait, memtotal, memused, swaptotal, swapused, roottotal, rootused, netin, netout) + let _ = status.set_rrd_data( + "pve2-node/testnode".to_string(), + "1234567890:0.5:4:1.2:0.25:8000000000:4000000000:2000000000:100000000:10000000000:5000000000:1000000:500000".to_string(), + ).await; // May fail if rrdcached not running, but in-memory storage succeeds + + // VM RRD (pve2.3 format): timestamp + 10 values + // (maxcpu, cpu, maxmem, mem, maxdisk, disk, netin, netout, diskread, diskwrite) + let _ = status + .set_rrd_data( + "pve2.3-vm/100".to_string(), + "1234567890:4:2.5:4096:2048:100000:50000:1000000:500000:10000:5000".to_string(), + ) + .await; // May fail if rrdcached not running, but in-memory storage succeeds + + let plugin = RrdPlugin::new(status); + let result = plugin.generate_content(); + + // Should contain both entries (from in-memory storage) + assert!(result.contains("pve2-node/testnode")); + assert!(result.contains("pve2.3-vm/100")); + assert!(result.contains("1234567890")); + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/plugins/types.rs b/src/pmxcfs-rs/pmxcfs/src/plugins/types.rs new file mode 100644 index 000000000..fb013b1a2 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/plugins/types.rs @@ -0,0 +1,112 @@ +/// Core plugin types and trait definitions +use anyhow::Result; + +/// Plugin trait for special file handlers +/// +/// Note: We can't use `const NAME: &'static str` as an associated constant because +/// it would make the trait not object-safe (dyn Plugin wouldn't work). Instead, +/// each implementation provides the name via the name() method. +pub trait Plugin: Send + Sync { + /// Get plugin name + fn name(&self) -> &str; + + /// Read content from this plugin + fn read(&self) -> Result>; + + /// Write content to this plugin (if supported) + fn write(&self, _data: &[u8]) -> Result<()> { + Err(anyhow::anyhow!("Write not supported for this plugin")) + } + + /// Get file mode + fn mode(&self) -> u32; + + /// Check if this is a symbolic link + fn is_symlink(&self) -> bool { + false + } +} + +/// Link plugin - symbolic links +pub struct LinkPlugin { + name: &'static str, + target: String, +} + +impl LinkPlugin { + pub fn new(name: &'static str, target: impl Into) -> Self { + Self { + name, + target: target.into(), + } + } +} + +impl Plugin for LinkPlugin { + fn name(&self) -> &str { + self.name + } + + fn read(&self) -> Result> { + Ok(self.target.as_bytes().to_vec()) + } + + fn mode(&self) -> u32 { + 0o777 // Symbolic links + } + + fn is_symlink(&self) -> bool { + true + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // ===== LinkPlugin Tests ===== + + #[test] + fn test_link_plugin_creation() { + let plugin = LinkPlugin::new("testlink", "/target/path"); + assert_eq!(plugin.name(), "testlink"); + assert!(plugin.is_symlink()); + } + + #[test] + fn test_link_plugin_read_target() { + let target = "/path/to/target"; + let plugin = LinkPlugin::new("mylink", target); + + let result = plugin.read().unwrap(); + assert_eq!(result, target.as_bytes()); + } + + #[test] + fn test_link_plugin_mode() { + let plugin = LinkPlugin::new("link", "/target"); + assert_eq!( + plugin.mode(), + 0o777, + "Symbolic links should have mode 0o777" + ); + } + + #[test] + fn test_link_plugin_write_not_supported() { + let plugin = LinkPlugin::new("readonly", "/target"); + let result = plugin.write(b"test data"); + + assert!(result.is_err(), "LinkPlugin should not support write"); + assert!(result.unwrap_err().to_string().contains("not supported")); + } + + #[test] + fn test_link_plugin_with_unicode_target() { + let target = "/path/with/üñïçödé/target"; + let plugin = LinkPlugin::new("unicode", target); + + let result = plugin.read().unwrap(); + assert_eq!(String::from_utf8(result).unwrap(), target); + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/plugins/version.rs b/src/pmxcfs-rs/pmxcfs/src/plugins/version.rs new file mode 100644 index 000000000..bf3ab5874 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/plugins/version.rs @@ -0,0 +1,178 @@ +/// .version Plugin - Cluster Version Information +/// +/// This plugin provides comprehensive version information in JSON format: +/// { +/// "starttime": 1234567890, +/// "clinfo": 5, +/// "vmlist": 12, +/// "qemu-server": 3, +/// "lxc": 2, +/// "nodes": 1 +/// } +/// +/// All version counters are now maintained in the Status module (status/mod.rs) +/// to match the C implementation where they are stored in cfs_status. +use pmxcfs_config::Config; +use pmxcfs_status::Status; +use serde_json::json; +use std::sync::Arc; + +use super::Plugin; + +/// Version plugin - provides cluster version information +pub struct VersionPlugin { + config: Arc, + status: Arc, +} + +impl VersionPlugin { + pub fn new(config: Arc, status: Arc) -> Self { + Self { config, status } + } + + /// Generate version information content + fn generate_content(&self) -> String { + // Get cluster state from status (matches C's cfs_status access) + let members = self.status.get_members(); + let quorate = self.status.is_quorate(); + + // Count unique nodes + let mut unique_nodes = std::collections::HashSet::new(); + for member in &members { + unique_nodes.insert(member.node_id); + } + let node_count = unique_nodes.len().max(1); // At least 1 (ourselves) + + // Build base response with all version counters + let mut response = serde_json::Map::new(); + + // Basic version info + response.insert("version".to_string(), json!(env!("CARGO_PKG_VERSION"))); + response.insert("api".to_string(), json!(1)); + + // Daemon start time (from Status) + response.insert("starttime".to_string(), json!(self.status.get_start_time())); + + // Cluster info version (from Status) + response.insert( + "clinfo".to_string(), + json!(self.status.get_cluster_version()), + ); + + // VM list version (from Status) + response.insert( + "vmlist".to_string(), + json!(self.status.get_vmlist_version()), + ); + + // MemDB path versions (from Status) + // These are the paths that clients commonly monitor for changes + let path_versions = self.status.get_all_path_versions(); + for (path, version) in path_versions { + if version > 0 { + response.insert(path, json!(version)); + } + } + + // Cluster info (legacy format for compatibility) + response.insert( + "cluster".to_string(), + json!({ + "name": self.config.cluster_name(), + "nodes": node_count, + "quorate": if quorate { 1 } else { 0 } + }), + ); + + serde_json::Value::Object(response).to_string() + } +} + +impl Plugin for VersionPlugin { + fn name(&self) -> &str { + ".version" + } + + fn read(&self) -> anyhow::Result> { + Ok(self.generate_content().into_bytes()) + } + + fn mode(&self) -> u32 { + 0o440 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_version_format() { + // Create Status instance without RRD persistence (not needed for test) + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config); + + // Create Config instance + let config = Arc::new(Config::new( + "testnode".to_string(), + "127.0.0.1".parse().unwrap(), + 33, + false, + false, + "testcluster".to_string(), + )); + + // Initialize cluster + status.init_cluster("testcluster".to_string()); + + let plugin = VersionPlugin::new(config, status); + let result = plugin.generate_content(); + let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); + + // Should have version + assert!(parsed["version"].is_string()); + + // Should have api + assert_eq!(parsed["api"], 1); + + // Should have starttime + assert!(parsed["starttime"].is_number()); + + // Should have clinfo and vmlist + assert!(parsed["clinfo"].is_number()); + assert!(parsed["vmlist"].is_number()); + + // Should have cluster info + assert_eq!(parsed["cluster"]["name"], "testcluster"); + assert!(parsed["cluster"]["nodes"].is_number()); + assert!(parsed["cluster"]["quorate"].is_number()); + } + + #[tokio::test] + async fn test_increment_versions() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config); + + let initial_clinfo = status.get_cluster_version(); + status.increment_cluster_version(); + assert_eq!(status.get_cluster_version(), initial_clinfo + 1); + + let initial_vmlist = status.get_vmlist_version(); + status.increment_vmlist_version(); + assert_eq!(status.get_vmlist_version(), initial_vmlist + 1); + } + + #[tokio::test] + async fn test_path_versions() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config); + + // Use actual paths from memdb_change_array + status.increment_path_version("corosync.conf"); + status.increment_path_version("corosync.conf"); + assert!(status.get_path_version("corosync.conf") >= 2); + + status.increment_path_version("user.cfg"); + assert!(status.get_path_version("user.cfg") >= 1); + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/plugins/vmlist.rs b/src/pmxcfs-rs/pmxcfs/src/plugins/vmlist.rs new file mode 100644 index 000000000..0ccab7752 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/plugins/vmlist.rs @@ -0,0 +1,120 @@ +/// .vmlist Plugin - Virtual Machine List +/// +/// This plugin provides VM/CT list in JSON format: +/// { +/// "version": 1, +/// "ids": { +/// "100": { "node": "node1", "type": "qemu", "version": 1 }, +/// "101": { "node": "node2", "type": "lxc", "version": 1 } +/// } +/// } +use pmxcfs_status::Status; +use serde_json::json; +use std::sync::Arc; + +use super::Plugin; + +/// Vmlist plugin - provides VM/CT list +pub struct VmlistPlugin { + status: Arc, +} + +impl VmlistPlugin { + pub fn new(status: Arc) -> Self { + Self { status } + } + + /// Generate vmlist content + fn generate_content(&self) -> String { + let vmlist = self.status.get_vmlist(); + let vmlist_version = self.status.get_vmlist_version(); + + // Convert to JSON format expected by Proxmox + // Format: {"version":N,"ids":{vmid:{"node":"nodename","type":"qemu|lxc","version":M}}} + let mut ids = serde_json::Map::new(); + + for (vmid, entry) in vmlist { + let vm_obj = json!({ + "node": entry.node, + "type": entry.vmtype.to_string(), + "version": entry.version + }); + + ids.insert(vmid.to_string(), vm_obj); + } + + json!({ + "version": vmlist_version, + "ids": ids + }) + .to_string() + } +} + +impl Plugin for VmlistPlugin { + fn name(&self) -> &str { + ".vmlist" + } + + fn read(&self) -> anyhow::Result> { + Ok(self.generate_content().into_bytes()) + } + + fn mode(&self) -> u32 { + 0o440 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_vmlist_format() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config); + + let plugin = VmlistPlugin::new(status); + let result = plugin.generate_content(); + let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); + + // Should have version + assert!(parsed["version"].is_number()); + + // Should have ids object + assert!(parsed["ids"].is_object()); + } + + #[tokio::test] + async fn test_vmlist_versions() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config); + + // Register a VM + status.register_vm(100, pmxcfs_status::VmType::Qemu, "node1".to_string()); + + let plugin = VmlistPlugin::new(status.clone()); + let result = plugin.generate_content(); + let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); + + // Root version should be >= 1 + assert!(parsed["version"].as_u64().unwrap() >= 1); + + // VM should have version 1 + assert_eq!(parsed["ids"]["100"]["version"], 1); + assert_eq!(parsed["ids"]["100"]["type"], "qemu"); + assert_eq!(parsed["ids"]["100"]["node"], "node1"); + + // Update the VM - version should increment + status.register_vm(100, pmxcfs_status::VmType::Qemu, "node1".to_string()); + + let result2 = plugin.generate_content(); + let parsed2: serde_json::Value = serde_json::from_str(&result2).unwrap(); + + // Root version should have incremented + assert!(parsed2["version"].as_u64().unwrap() > parsed["version"].as_u64().unwrap()); + + // VM version should have incremented to 2 + assert_eq!(parsed2["ids"]["100"]["version"], 2); + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/quorum_service.rs b/src/pmxcfs-rs/pmxcfs/src/quorum_service.rs new file mode 100644 index 000000000..81b25f060 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/quorum_service.rs @@ -0,0 +1,207 @@ +//! Quorum service for cluster membership tracking +//! +//! This service tracks quorum status via Corosync quorum API and updates Status. +//! It implements the Service trait for automatic retry and lifecycle management. + +use async_trait::async_trait; +use parking_lot::RwLock; +use pmxcfs_services::{Service, ServiceError}; +use rust_corosync::{self as corosync, CsError, NodeId, quorum}; +use std::sync::Arc; + +use pmxcfs_status::Status; + +/// Quorum service (matching C's service_quorum) +/// +/// Tracks cluster quorum status and member list changes. Automatically +/// retries connection if Corosync is unavailable or restarts. +pub struct QuorumService { + quorum_handle: RwLock>, + status: Arc, + /// Context pointer for callbacks (leaked Arc) + context_ptr: RwLock>, +} + +impl QuorumService { + /// Create a new quorum service + pub fn new(status: Arc) -> Self { + Self { + quorum_handle: RwLock::new(None), + status, + context_ptr: RwLock::new(None), + } + } + + /// Check if cluster is quorate (delegates to Status) + pub fn is_quorate(&self) -> bool { + self.status.is_quorate() + } +} + +#[async_trait] +impl Service for QuorumService { + fn name(&self) -> &str { + "quorum" + } + + async fn initialize(&mut self) -> pmxcfs_services::Result { + tracing::info!("Initializing quorum tracking"); + + // Quorum notification callback + fn quorum_notification( + handle: &quorum::Handle, + quorate: bool, + ring_id: quorum::RingId, + member_list: Vec, + ) { + tracing::info!( + "Quorum notification: quorate={}, ring_id=({},{}), members={:?}", + quorate, + u32::from(ring_id.nodeid), + ring_id.seq, + member_list + ); + + if quorate { + tracing::info!("Cluster is now quorate with {} members", member_list.len()); + } else { + tracing::warn!("Cluster lost quorum"); + } + + // Retrieve QuorumService from handle context + let context = match quorum::context_get(*handle) { + Ok(ctx) => ctx, + Err(e) => { + tracing::error!( + "Failed to get quorum context: {} - quorum status not updated", + e + ); + return; + } + }; + + if context == 0 { + tracing::error!("BUG: Quorum context is null - quorum status not updated"); + return; + } + + // Safety: We stored a valid Arc pointer in initialize() + unsafe { + let service_ptr = context as *const QuorumService; + let service = &*service_ptr; + service.status.set_quorate(quorate); + } + } + + // Nodelist change notification callback + fn nodelist_notification( + _handle: &quorum::Handle, + ring_id: quorum::RingId, + member_list: Vec, + joined_list: Vec, + left_list: Vec, + ) { + tracing::info!( + "Nodelist change: ring_id=({},{}), members={:?}, joined={:?}, left={:?}", + u32::from(ring_id.nodeid), + ring_id.seq, + member_list, + joined_list, + left_list + ); + } + + let model_data = quorum::ModelData::ModelV1(quorum::Model1Data { + flags: quorum::Model1Flags::None, + quorum_notification_fn: Some(quorum_notification), + nodelist_notification_fn: Some(nodelist_notification), + }); + + // Initialize quorum connection + let (handle, _quorum_type) = quorum::initialize(&model_data, 0).map_err(|e| { + ServiceError::InitializationFailed(format!("quorum_initialize failed: {e:?}")) + })?; + + // Store self pointer as context for callbacks + // We create a stable pointer that won't move - it's a pointer to self + // which is already on the heap as part of the Box + let self_ptr = self as *const Self as u64; + quorum::context_set(handle, self_ptr).map_err(|e| { + quorum::finalize(handle).ok(); + ServiceError::InitializationFailed(format!("Failed to set quorum context: {e:?}")) + })?; + + *self.context_ptr.write() = Some(self_ptr); + tracing::debug!("Stored QuorumService context: 0x{:x}", self_ptr); + + // Start tracking + quorum::trackstart(handle, corosync::TrackFlags::Changes).map_err(|e| { + quorum::finalize(handle).ok(); + ServiceError::InitializationFailed(format!("quorum_trackstart failed: {e:?}")) + })?; + + // Get file descriptor for event monitoring + let fd = quorum::fd_get(handle).map_err(|e| { + quorum::finalize(handle).ok(); + ServiceError::InitializationFailed(format!("quorum_fd_get failed: {e:?}")) + })?; + + // Dispatch once to get initial state + if let Err(e) = quorum::dispatch(handle, corosync::DispatchFlags::One) { + tracing::warn!("Initial quorum dispatch failed: {:?}", e); + } + + *self.quorum_handle.write() = Some(handle); + + tracing::info!("Quorum tracking initialized successfully with fd {}", fd); + Ok(fd) + } + + async fn dispatch(&mut self) -> pmxcfs_services::Result { + let handle = self.quorum_handle.read().ok_or_else(|| { + ServiceError::DispatchFailed("Quorum handle not initialized".to_string()) + })?; + + // Dispatch all pending events + match quorum::dispatch(handle, corosync::DispatchFlags::All) { + Ok(_) => Ok(true), + Err(CsError::CsErrTryAgain) => { + // TRY_AGAIN is expected, continue normally + Ok(true) + } + Err(CsError::CsErrLibrary) | Err(CsError::CsErrBadHandle) => { + // Connection lost, need to reinitialize + tracing::warn!( + "Quorum connection lost (library error), requesting reinitialization" + ); + Ok(false) + } + Err(e) => { + tracing::error!("Quorum dispatch failed: {:?}", e); + Err(ServiceError::DispatchFailed(format!( + "quorum_dispatch failed: {e:?}" + ))) + } + } + } + + async fn finalize(&mut self) -> pmxcfs_services::Result<()> { + tracing::info!("Finalizing quorum service"); + + // Clear quorate status + self.status.set_quorate(false); + + // Finalize quorum handle + if let Some(handle) = self.quorum_handle.write().take() + && let Err(e) = quorum::finalize(handle) + { + tracing::warn!("Error finalizing quorum: {:?}", e); + } + + // Clear context pointer + *self.context_ptr.write() = None; + + tracing::info!("Quorum service finalized"); + Ok(()) + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/restart_flag.rs b/src/pmxcfs-rs/pmxcfs/src/restart_flag.rs new file mode 100644 index 000000000..3c897b3af --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/restart_flag.rs @@ -0,0 +1,60 @@ +//! Restart flag management +//! +//! This module provides RAII-based restart flag management. The flag is +//! created on shutdown to signal that pmxcfs is restarting (not stopping). + +use std::ffi::CString; +use std::fs::File; +use std::io::Write; +use std::path::{Path, PathBuf}; +use tracing::{info, warn}; + +/// RAII wrapper for restart flag +/// +/// Creates a flag file on construction to signal pmxcfs restart. +/// The file is NOT automatically removed (it's consumed by the next startup). +pub struct RestartFlag; + +impl RestartFlag { + /// Create a restart flag file + /// + /// This signals that pmxcfs is restarting (not permanently shutting down). + /// + /// # Arguments + /// + /// * `path` - Path where the restart flag should be created + /// * `gid` - Group ID to set for the file + pub fn create(path: PathBuf, gid: u32) -> Self { + // Create the restart flag file + match File::create(&path) { + Ok(mut file) => { + if let Err(e) = file.flush() { + warn!(error = %e, path = %path.display(), "Failed to flush restart flag"); + } + + // Set ownership (root:gid) + Self::set_ownership(&path, gid); + info!(path = %path.display(), "Created restart flag"); + } + Err(e) => { + warn!(error = %e, path = %path.display(), "Failed to create restart flag"); + } + } + + Self + } + + /// Set file ownership to root:gid + fn set_ownership(path: &Path, gid: u32) { + let path_str = path.to_string_lossy(); + if let Ok(path_cstr) = CString::new(path_str.as_ref()) { + // Safety: chown is called with a valid C string and valid UID/GID + unsafe { + if libc::chown(path_cstr.as_ptr(), 0, gid as libc::gid_t) != 0 { + let error = std::io::Error::last_os_error(); + warn!(error = %error, "Failed to change ownership of restart flag"); + } + } + } + } +} diff --git a/src/pmxcfs-rs/pmxcfs/src/status_callbacks.rs b/src/pmxcfs-rs/pmxcfs/src/status_callbacks.rs new file mode 100644 index 000000000..918ebee1a --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/src/status_callbacks.rs @@ -0,0 +1,352 @@ +//! DFSM Callbacks for Status Synchronization (kvstore) +//! +//! This module implements the DfsmCallbacks trait for the status kvstore DFSM instance. +//! It handles synchronization of ephemeral status data across the cluster: +//! - Key-value status updates from nodes (RRD data, IP addresses, etc.) +//! - Cluster log entries +//! +//! Equivalent to C implementation's kvstore DFSM callbacks in status.c +//! +//! Note: The kvstore DFSM doesn't use FuseMessage like the main database DFSM. +//! It uses raw CPG messages for lightweight status synchronization. +//! Most DfsmCallbacks methods are stubbed since status data is ephemeral and +//! doesn't require the full database synchronization machinery. + +use pmxcfs_dfsm::{Callbacks, KvStoreMessage, NodeSyncInfo}; +use pmxcfs_status::Status; +use std::sync::Arc; +use tracing::{debug, warn}; + +/// Callbacks for status synchronization DFSM (kvstore) +/// +/// This implements the DfsmCallbacks trait but only uses basic CPG event handling. +/// Most methods are stubbed since kvstore doesn't use database synchronization. +pub struct StatusCallbacks { + status: Arc, +} + +impl StatusCallbacks { + /// Create new status callbacks + pub fn new(status: Arc) -> Self { + Self { status } + } +} + +impl Callbacks for StatusCallbacks { + type Message = KvStoreMessage; + + /// Deliver a message - handles KvStore messages for status synchronization + /// + /// The kvstore DFSM handles KvStore messages (UPDATE, LOG, etc.) for + /// ephemeral status data synchronization across the cluster. + fn deliver_message( + &self, + nodeid: u32, + pid: u32, + kvstore_message: KvStoreMessage, + timestamp: u64, + ) -> anyhow::Result<(i32, bool)> { + debug!(nodeid, pid, timestamp, "Delivering KvStore message"); + + // Handle different KvStore message types + match kvstore_message { + KvStoreMessage::Update { key, value } => { + debug!(key, value_len = value.len(), "KvStore UPDATE"); + + // Store the key-value data for this node (matches C's cfs_kvstore_node_set) + self.status.set_node_kv(nodeid, key, value); + Ok((0, true)) + } + KvStoreMessage::Log { + time, + priority, + node, + ident, + tag, + message, + } => { + debug!( + time, priority, %node, %ident, %tag, %message, + "KvStore LOG" + ); + + // Add log entry to cluster log + if let Err(e) = self + .status + .add_remote_cluster_log(time, priority, node, ident, tag, message) + { + warn!(error = %e, "Failed to add cluster log entry"); + } + + Ok((0, true)) + } + KvStoreMessage::UpdateComplete => { + debug!("KvStore UpdateComplete"); + Ok((0, true)) + } + } + } + + /// Compute checksum (not used by kvstore - ephemeral data doesn't need checksums) + fn compute_checksum(&self, output: &mut [u8; 32]) -> anyhow::Result<()> { + // Status data is ephemeral and doesn't use checksums + output.fill(0); + Ok(()) + } + + /// Get state for synchronization (returns cluster log state) + /// + /// Returns the cluster log in C-compatible binary format (clog_base_t). + /// This enables mixed C/Rust cluster operation - C nodes can deserialize + /// the state we send, and we can deserialize states from C nodes. + fn get_state(&self) -> anyhow::Result> { + debug!("Status kvstore: get_state called - serializing cluster log"); + self.status.get_cluster_log_state() + } + + /// Process state update (handles cluster log state sync) + /// + /// Deserializes cluster log states from remote nodes and merges them with + /// the local log. This enables cluster-wide log synchronization in mixed + /// C/Rust clusters. + fn process_state_update(&self, states: &[NodeSyncInfo]) -> anyhow::Result { + debug!( + "Status kvstore: process_state_update called with {} states", + states.len() + ); + + if states.is_empty() { + return Ok(true); + } + + self.status.merge_cluster_log_states(states)?; + Ok(true) + } + + /// Process incremental update (not used by kvstore) + /// + /// Kvstore uses direct CPG messages (UPDATE, LOG) instead of incremental sync + fn process_update(&self, _nodeid: u32, _pid: u32, _data: &[u8]) -> anyhow::Result<()> { + warn!("Status kvstore: received unexpected process_update call"); + Ok(()) + } + + /// Commit state (no-op for kvstore - ephemeral data, no database commit) + fn commit_state(&self) -> anyhow::Result<()> { + // No commit needed for ephemeral status data + Ok(()) + } + + /// Called when cluster becomes synced + fn on_synced(&self) { + debug!("Status kvstore: cluster synced"); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use pmxcfs_dfsm::KvStoreMessage; + use pmxcfs_status::ClusterLogEntry; + + #[test] + fn test_kvstore_update_message_handling() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = Arc::new(Status::new(config, None)); + let callbacks = StatusCallbacks::new(status.clone()); + + // Initialize cluster and register node 2 + status.init_cluster("test-cluster".to_string()); + status.register_node(2, "node2".to_string(), "192.168.1.11".to_string()); + + // Simulate receiving a kvstore UPDATE message from node 2 + let key = "test-key".to_string(); + let value = b"test-value".to_vec(); + let message = KvStoreMessage::Update { + key: key.clone(), + value: value.clone(), + }; + + let result = callbacks.deliver_message(2, 1000, message, 12345); + assert!(result.is_ok(), "deliver_message should succeed"); + + let (res, continue_processing) = result.unwrap(); + assert_eq!(res, 0, "Result code should be 0 for success"); + assert!(continue_processing, "Should continue processing"); + + // Verify the data was stored in kvstore + let stored_value = status.get_node_kv(2, &key); + assert_eq!( + stored_value, + Some(value), + "Should store the key-value pair for node 2" + ); + } + + #[test] + fn test_kvstore_update_multiple_nodes() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = Arc::new(Status::new(config, None)); + let callbacks = StatusCallbacks::new(status.clone()); + + // Initialize cluster and register nodes + status.init_cluster("test-cluster".to_string()); + status.register_node(1, "node1".to_string(), "192.168.1.10".to_string()); + status.register_node(2, "node2".to_string(), "192.168.1.11".to_string()); + + // Store data from multiple nodes + let msg1 = KvStoreMessage::Update { + key: "ip".to_string(), + value: b"192.168.1.10".to_vec(), + }; + let msg2 = KvStoreMessage::Update { + key: "ip".to_string(), + value: b"192.168.1.11".to_vec(), + }; + + callbacks.deliver_message(1, 1000, msg1, 12345).unwrap(); + callbacks.deliver_message(2, 1001, msg2, 12346).unwrap(); + + // Verify each node's data is stored separately + assert_eq!( + status.get_node_kv(1, "ip"), + Some(b"192.168.1.10".to_vec()), + "Node 1 IP should be stored" + ); + assert_eq!( + status.get_node_kv(2, "ip"), + Some(b"192.168.1.11".to_vec()), + "Node 2 IP should be stored" + ); + } + + #[test] + fn test_kvstore_log_message_handling() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = Arc::new(Status::new(config, None)); + let callbacks = StatusCallbacks::new(status.clone()); + + // Clear any existing log entries + status.clear_cluster_log(); + + // Simulate receiving a LOG message + let message = KvStoreMessage::Log { + time: 1234567890, + priority: 6, // LOG_INFO + node: "node1".to_string(), + ident: "pmxcfs".to_string(), + tag: "cluster".to_string(), + message: "Test log entry".to_string(), + }; + + let result = callbacks.deliver_message(1, 1000, message, 12345); + assert!(result.is_ok(), "LOG message delivery should succeed"); + + // Verify the log entry was added + let log_entries = status.get_log_entries(10); + assert_eq!(log_entries.len(), 1, "Should have 1 log entry"); + assert_eq!(log_entries[0].node, "node1"); + assert_eq!(log_entries[0].message, "Test log entry"); + assert_eq!(log_entries[0].priority, 6); + } + + #[test] + fn test_kvstore_update_complete_message() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = Arc::new(Status::new(config, None)); + let callbacks = StatusCallbacks::new(status.clone()); + + let message = KvStoreMessage::UpdateComplete; + + let result = callbacks.deliver_message(1, 1000, message, 12345); + assert!(result.is_ok(), "UpdateComplete should succeed"); + + let (res, continue_processing) = result.unwrap(); + assert_eq!(res, 0); + assert!(continue_processing); + } + + #[test] + fn test_compute_checksum_returns_zeros() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = Arc::new(Status::new(config, None)); + let callbacks = StatusCallbacks::new(status); + + let mut checksum = [0u8; 32]; + let result = callbacks.compute_checksum(&mut checksum); + + assert!(result.is_ok(), "compute_checksum should succeed"); + assert_eq!( + checksum, [0u8; 32], + "Checksum should be all zeros for ephemeral data" + ); + } + + #[test] + fn test_get_state_returns_cluster_log() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = Arc::new(Status::new(config, None)); + let callbacks = StatusCallbacks::new(status.clone()); + + // Add a log entry first + status.clear_cluster_log(); + let entry = ClusterLogEntry { + uid: 0, + timestamp: 1234567890, + priority: 6, + tag: "test".to_string(), + pid: 0, + node: "node1".to_string(), + ident: "pmxcfs".to_string(), + message: "Test message".to_string(), + }; + status.add_log_entry(entry); + + // Get state should return serialized cluster log + let result = callbacks.get_state(); + assert!(result.is_ok(), "get_state should succeed"); + + let state = result.unwrap(); + assert!( + !state.is_empty(), + "State should not be empty when cluster log has entries" + ); + } + + #[test] + fn test_process_state_update_with_empty_states() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = Arc::new(Status::new(config, None)); + let callbacks = StatusCallbacks::new(status); + + let states: Vec = vec![]; + let result = callbacks.process_state_update(&states); + + assert!(result.is_ok(), "Empty state update should succeed"); + assert!(result.unwrap(), "Should return true for empty states"); + } + + #[test] + fn test_process_update_logs_warning() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = Arc::new(Status::new(config, None)); + let callbacks = StatusCallbacks::new(status); + + // process_update is not used by kvstore, but should not fail + let result = callbacks.process_update(1, 1000, &[1, 2, 3]); + assert!( + result.is_ok(), + "process_update should succeed even though not used" + ); + } + + #[test] + fn test_commit_state_is_noop() { + let config = pmxcfs_test_utils::create_test_config(false); + let status = Arc::new(Status::new(config, None)); + let callbacks = StatusCallbacks::new(status); + + let result = callbacks.commit_state(); + assert!(result.is_ok(), "commit_state should succeed (no-op)"); + } +} diff --git a/src/pmxcfs-rs/pmxcfs/tests/common/mod.rs b/src/pmxcfs-rs/pmxcfs/tests/common/mod.rs new file mode 100644 index 000000000..a134c948b --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/tests/common/mod.rs @@ -0,0 +1,221 @@ +//! Common test utilities for pmxcfs integration tests +//! +//! This module provides shared test setup and helper functions to ensure +//! consistency across all integration tests and reduce code duplication. + +use anyhow::Result; +use pmxcfs_config::Config; +use pmxcfs_memdb::MemDb; +use pmxcfs_status::Status; +use std::sync::Arc; +use tempfile::TempDir; + +// Test constants +pub const TEST_MTIME: u32 = 1234567890; +pub const TEST_NODE_NAME: &str = "testnode"; +pub const TEST_CLUSTER_NAME: &str = "test-cluster"; +pub const TEST_WWW_DATA_GID: u32 = 33; + +/// Creates a standard test configuration +/// +/// # Arguments +/// * `local_mode` - Whether to run in local mode (no cluster) +/// +/// # Returns +/// Arc-wrapped Config suitable for testing +pub fn create_test_config(local_mode: bool) -> Arc { + Config::shared( + TEST_NODE_NAME.to_string(), + "127.0.0.1".parse().unwrap(), + TEST_WWW_DATA_GID, + false, // debug mode + local_mode, + TEST_CLUSTER_NAME.to_string(), + ) +} + +/// Creates a test database with standard directory structure +/// +/// Creates the following directories: +/// - /nodes/{nodename}/qemu-server +/// - /nodes/{nodename}/lxc +/// - /nodes/{nodename}/priv +/// - /priv/lock/qemu-server +/// - /priv/lock/lxc +/// - /qemu-server +/// - /lxc +/// +/// # Returns +/// (TempDir, MemDb) - The temp directory must be kept alive for database to persist +pub fn create_test_db() -> Result<(TempDir, MemDb)> { + let temp_dir = TempDir::new()?; + let db_path = temp_dir.path().join("test.db"); + let db = MemDb::open(&db_path, true)?; + + // Create standard directory structure + let now = TEST_MTIME; + + // Node-specific directories + db.create("/nodes", libc::S_IFDIR, 0, now)?; + db.create(&format!("/nodes/{}", TEST_NODE_NAME), libc::S_IFDIR, 0, now)?; + db.create( + &format!("/nodes/{}/qemu-server", TEST_NODE_NAME), libc::S_IFDIR, 0, + now, + )?; + db.create( + &format!("/nodes/{}/lxc", TEST_NODE_NAME), libc::S_IFDIR, 0, + now, + )?; + db.create( + &format!("/nodes/{}/priv", TEST_NODE_NAME), libc::S_IFDIR, 0, + now, + )?; + + // Global directories + db.create("/priv", libc::S_IFDIR, 0, now)?; + db.create("/priv/lock", libc::S_IFDIR, 0, now)?; + db.create("/priv/lock/qemu-server", libc::S_IFDIR, 0, now)?; + db.create("/priv/lock/lxc", libc::S_IFDIR, 0, now)?; + db.create("/qemu-server", libc::S_IFDIR, 0, now)?; + db.create("/lxc", libc::S_IFDIR, 0, now)?; + + Ok((temp_dir, db)) +} + +/// Creates a minimal test database (no standard directories) +/// +/// Use this when you want full control over database structure +/// +/// # Returns +/// (TempDir, MemDb) - The temp directory must be kept alive for database to persist +#[allow(dead_code)] +pub fn create_minimal_test_db() -> Result<(TempDir, MemDb)> { + let temp_dir = TempDir::new()?; + let db_path = temp_dir.path().join("test.db"); + let db = MemDb::open(&db_path, true)?; + Ok((temp_dir, db)) +} + +/// Creates a test status instance +/// +/// NOTE: This uses the global Status singleton. Be aware that tests using this +/// will share the same Status instance and may interfere with each other if run +/// in parallel. Consider running Status-dependent tests serially using: +/// `#[serial]` attribute from the `serial_test` crate. +/// +/// # Returns +/// Arc-wrapped Status instance +pub fn create_test_status() -> Arc { + pmxcfs_status::init() +} + +/// Clears all VMs from the status subsystem +/// +/// Useful for ensuring clean state before tests that register VMs. +/// +/// # Arguments +/// * `status` - The status instance to clear +#[allow(dead_code)] +pub fn clear_test_vms(status: &Arc) { + let existing_vms: Vec = status.get_vmlist().keys().copied().collect(); + for vmid in existing_vms { + status.delete_vm(vmid); + } +} + +/// Creates test VM configuration content +/// +/// # Arguments +/// * `vmid` - VM ID +/// * `cores` - Number of CPU cores +/// * `memory` - Memory in MB +/// +/// # Returns +/// Configuration file content as bytes +#[allow(dead_code)] +pub fn create_vm_config(vmid: u32, cores: u32, memory: u32) -> Vec { + format!( + "name: test-vm-{}\ncores: {}\nmemory: {}\nbootdisk: scsi0\n", + vmid, cores, memory + ) + .into_bytes() +} + +/// Creates test CT (container) configuration content +/// +/// # Arguments +/// * `vmid` - Container ID +/// * `cores` - Number of CPU cores +/// * `memory` - Memory in MB +/// +/// # Returns +/// Configuration file content as bytes +#[allow(dead_code)] +pub fn create_ct_config(vmid: u32, cores: u32, memory: u32) -> Vec { + format!( + "cores: {}\nmemory: {}\nrootfs: local:100/vm-{}-disk-0.raw\n", + cores, memory, vmid + ) + .into_bytes() +} + +/// Creates a test lock path for a VM config +/// +/// # Arguments +/// * `vmid` - VM ID +/// * `vm_type` - "qemu-server" or "lxc" +/// +/// # Returns +/// Lock path in format `/priv/lock/{vm_type}/{vmid}.conf` +pub fn create_lock_path(vmid: u32, vm_type: &str) -> String { + format!("/priv/lock/{}/{}.conf", vm_type, vmid) +} + +/// Creates a test config path for a VM +/// +/// # Arguments +/// * `vmid` - VM ID +/// * `vm_type` - "qemu-server" or "lxc" +/// +/// # Returns +/// Config path in format `/{vm_type}/{vmid}.conf` +pub fn create_config_path(vmid: u32, vm_type: &str) -> String { + format!("/{}/{}.conf", vm_type, vmid) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_create_test_config() { + let config = create_test_config(true); + assert_eq!(config.nodename(), TEST_NODE_NAME); + assert_eq!(config.cluster_name(), TEST_CLUSTER_NAME); + assert!(config.is_local_mode()); + } + + #[test] + fn test_create_test_db() -> Result<()> { + let (_temp_dir, db) = create_test_db()?; + + // Verify standard directories exist + assert!(db.exists("/nodes")?, "Should have /nodes"); + assert!(db.exists("/qemu-server")?, "Should have /qemu-server"); + assert!(db.exists("/priv/lock")?, "Should have /priv/lock"); + + Ok(()) + } + + #[test] + fn test_path_helpers() { + assert_eq!( + create_lock_path(100, "qemu-server"), + "/priv/lock/qemu-server/100.conf" + ); + assert_eq!( + create_config_path(100, "qemu-server"), + "/qemu-server/100.conf" + ); + } +} diff --git a/src/pmxcfs-rs/pmxcfs/tests/fuse_basic_test.rs b/src/pmxcfs-rs/pmxcfs/tests/fuse_basic_test.rs new file mode 100644 index 000000000..0fb77d639 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/tests/fuse_basic_test.rs @@ -0,0 +1,216 @@ +/// Basic FUSE subsystem test +/// +/// This test verifies core FUSE functionality without actually mounting +/// to avoid test complexity and timeouts +use anyhow::Result; +use pmxcfs_config::Config; +use pmxcfs_memdb::MemDb; +use pmxcfs_rs::plugins; +use tempfile::TempDir; + +#[test] +fn test_fuse_subsystem_components() -> Result<()> { + let temp_dir = TempDir::new()?; + let db_path = temp_dir.path().join("test.db"); + + // 1. Create memdb with test data + let memdb = MemDb::open(&db_path, true)?; + + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH)? + .as_secs() as u32; + + memdb.create("/testdir", libc::S_IFDIR, 0, now)?; + memdb.create("/testdir/file1.txt", libc::S_IFREG, 0, now)?; + memdb.write("/testdir/file1.txt", 0, 0, now, b"Hello pmxcfs!", false)?; + + // 2. Create config + println!("\n2. Creating FUSE configuration..."); + let config = Config::shared( + "testnode".to_string(), + "127.0.0.1".parse().unwrap(), + 1000, + false, + true, + "test-cluster".to_string(), + ); + + // 3. Initialize status and plugins + println!("\n3. Initializing status and plugin registry..."); + let status = pmxcfs_status::init_with_config(config.clone()); + status.set_quorate(true); + let plugins = plugins::init_plugins(config.clone(), status); + let plugin_list = plugins.list(); + println!(" Available plugins: {:?}", plugin_list); + assert!(plugin_list.len() > 0, "Should have some plugins"); + + // 4. Verify plugin functionality + for plugin_name in &plugin_list { + if let Some(plugin) = plugins.get(plugin_name) { + match plugin.read() { + Ok(data) => { + println!( + " [OK] Plugin '{}' readable ({} bytes)", + plugin_name, + data.len() + ); + } + Err(e) => { + println!(" [WARN] Plugin '{}' error: {}", plugin_name, e); + } + } + } + } + + // 5. Verify memdb data is accessible + println!("\n5. Verifying memdb data accessibility..."); + assert!(memdb.exists("/testdir")?, "testdir should exist"); + assert!( + memdb.exists("/testdir/file1.txt")?, + "file1.txt should exist" + ); + + let data = memdb.read("/testdir/file1.txt", 0, 1024)?; + assert_eq!(&data[..], b"Hello pmxcfs!"); + + // 6. Test write operations + let new_data = b"Modified!"; + memdb.write("/testdir/file1.txt", 0, 0, now, new_data, true)?; + let data = memdb.read("/testdir/file1.txt", 0, 1024)?; + assert_eq!(&data[..], b"Modified!"); + + // 7. Test directory operations + memdb.create("/newdir", libc::S_IFDIR, 0, now)?; + memdb.create("/newdir/newfile.txt", libc::S_IFREG, 0, now)?; + memdb.write("/newdir/newfile.txt", 0, 0, now, b"New content", false)?; + + let entries = memdb.readdir("/")?; + let dir_names: Vec<&String> = entries.iter().map(|e| &e.name).collect(); + println!(" Root entries: {:?}", dir_names); + assert!( + dir_names.iter().any(|n| n == &"testdir"), + "testdir should be in root" + ); + assert!( + dir_names.iter().any(|n| n == &"newdir"), + "newdir should be in root" + ); + + // 8. Test deletion + memdb.delete("/newdir/newfile.txt", 0, 1000)?; + memdb.delete("/newdir", 0, 1000)?; + assert!(!memdb.exists("/newdir")?, "newdir should be deleted"); + + Ok(()) +} + +#[test] +fn test_fuse_private_path_detection() -> Result<()> { + // This tests the logic that would be used in the FUSE filesystem + // to determine if paths should have restricted permissions + + let test_cases = vec![ + ("/priv", true, "root priv should be private"), + ("/priv/test", true, "priv subdir should be private"), + ("/nodes/node1/priv", true, "node priv should be private"), + ( + "/nodes/node1/priv/data", + true, + "node priv subdir should be private", + ), + ( + "/nodes/node1/config", + false, + "node config should not be private", + ), + ("/testdir", false, "testdir should not be private"), + ( + "/private", + false, + "private (not priv) should not be private", + ), + ]; + + for (path, expected, description) in test_cases { + let is_private = is_private_path(path); + assert_eq!(is_private, expected, "Failed for {}: {}", path, description); + } + + Ok(()) +} + +// Helper function matching the logic in filesystem.rs +fn is_private_path(path: &str) -> bool { + let path = path.trim_start_matches('/'); + + // Check if path starts with "priv" or "priv/" + if path.starts_with("priv") && (path.len() == 4 || path.as_bytes().get(4) == Some(&b'/')) { + return true; + } + + // Check for "nodes/*/priv" or "nodes/*/priv/*" pattern + if let Some(after_nodes) = path.strip_prefix("nodes/") { + if let Some(slash_pos) = after_nodes.find('/') { + let after_nodename = &after_nodes[slash_pos..]; + + if after_nodename.starts_with("/priv") { + let priv_end = slash_pos + 5; + if after_nodes.len() == priv_end + || after_nodes.as_bytes().get(priv_end) == Some(&b'/') + { + return true; + } + } + } + } + + false +} + +#[test] +fn test_fuse_inode_path_mapping() -> Result<()> { + let temp_dir = TempDir::new()?; + let db_path = temp_dir.path().join("test.db"); + let memdb = MemDb::open(&db_path, true)?; + + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH)? + .as_secs() as u32; + + // Create nested directory structure + memdb.create("/a", libc::S_IFDIR, 0, now)?; + memdb.create("/a/b", libc::S_IFDIR, 0, now)?; + memdb.create("/a/b/c", libc::S_IFDIR, 0, now)?; + memdb.create("/a/b/c/file.txt", libc::S_IFREG, 0, now)?; + memdb.write("/a/b/c/file.txt", 0, 0, now, b"deep file", false)?; + + // Verify we can look up deep paths + let entry = memdb + .lookup_path("/a/b/c/file.txt") + .ok_or_else(|| anyhow::anyhow!("Failed to lookup deep path"))?; + + println!(" Inode: {}", entry.inode); + println!(" Size: {}", entry.size); + assert!(entry.inode > 1, "Should have valid inode"); + assert_eq!(entry.size, 9, "File size should match"); + + // Verify parent relationships + println!("\n3. Verifying parent relationships..."); + let c_entry = memdb + .lookup_path("/a/b/c") + .ok_or_else(|| anyhow::anyhow!("Failed to lookup /a/b/c"))?; + let b_entry = memdb + .lookup_path("/a/b") + .ok_or_else(|| anyhow::anyhow!("Failed to lookup /a/b"))?; + + assert_eq!( + entry.parent, c_entry.inode, + "file.txt parent should be c directory" + ); + assert_eq!( + c_entry.parent, b_entry.inode, + "c parent should be b directory" + ); + + Ok(()) +} diff --git a/src/pmxcfs-rs/pmxcfs/tests/fuse_cluster_test.rs b/src/pmxcfs-rs/pmxcfs/tests/fuse_cluster_test.rs new file mode 100644 index 000000000..007fa5f75 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/tests/fuse_cluster_test.rs @@ -0,0 +1,220 @@ +/// FUSE Cluster Synchronization Tests +/// +/// Tests for pmxcfs FUSE operations that trigger DFSM broadcasts +/// and synchronize across cluster nodes. These tests verify that +/// file operations made through FUSE properly propagate to other nodes. +use anyhow::Result; +use pmxcfs_dfsm::{Callbacks, Dfsm, FuseMessage, NodeSyncInfo}; +use pmxcfs_memdb::MemDb; +use pmxcfs_rs::fuse; +use pmxcfs_rs::plugins; +use std::fs; +use std::io::Write; +use std::sync::{Arc, Mutex}; +use std::time::Duration; +use tempfile::TempDir; + +/// Verify that FUSE filesystem successfully mounted, panic if not +async fn verify_fuse_mounted(path: &std::path::Path) { + // Use spawn_blocking to avoid blocking the async runtime + let path_buf = path.to_path_buf(); + let read_result = tokio::task::spawn_blocking(move || std::fs::read_dir(&path_buf)) + .await + .expect("spawn_blocking failed"); + + if read_result.is_ok() { + return; // Mount succeeded + } + + // Double-check with mount command + use std::process::Command; + let output = Command::new("mount").output().ok(); + let is_mounted = if let Some(output) = output { + let mount_output = String::from_utf8_lossy(&output.stdout); + mount_output.contains(&path.display().to_string()) + } else { + false + }; + + if !is_mounted { + panic!("FUSE mount failed.\nCheck /etc/fuse.conf for user_allow_other setting."); + } +} + +/// Test callbacks for DFSM - minimal implementation for testing +struct TestDfsmCallbacks { + memdb: MemDb, + broadcasts: Arc>>, // Track broadcast operations +} + +impl TestDfsmCallbacks { + fn new(memdb: MemDb) -> Self { + Self { + memdb, + broadcasts: Arc::new(Mutex::new(Vec::new())), + } + } + + #[allow(dead_code)] + fn get_broadcasts(&self) -> Vec { + self.broadcasts.lock().unwrap().clone() + } +} + +impl Callbacks for TestDfsmCallbacks { + type Message = FuseMessage; + + fn deliver_message( + &self, + _nodeid: u32, + _pid: u32, + message: FuseMessage, + _timestamp: u64, + ) -> Result<(i32, bool)> { + // Track the broadcast for testing + let msg_desc = match &message { + FuseMessage::Write { path, .. } => format!("write:{}", path), + FuseMessage::Create { path } => format!("create:{}", path), + FuseMessage::Mkdir { path } => format!("mkdir:{}", path), + FuseMessage::Delete { path } => format!("delete:{}", path), + FuseMessage::Rename { from, to } => format!("rename:{}→{}", from, to), + _ => "other".to_string(), + }; + self.broadcasts.lock().unwrap().push(msg_desc); + Ok((0, true)) + } + + fn compute_checksum(&self, output: &mut [u8; 32]) -> Result<()> { + *output = self.memdb.compute_database_checksum()?; + Ok(()) + } + + fn process_state_update(&self, _states: &[NodeSyncInfo]) -> Result { + Ok(true) // Indicate we're in sync for testing + } + + fn process_update(&self, _nodeid: u32, _pid: u32, _data: &[u8]) -> Result<()> { + Ok(()) + } + + fn commit_state(&self) -> Result<()> { + Ok(()) + } + + fn on_synced(&self) {} + + fn get_state(&self) -> Result> { + // Return empty state for testing + Ok(Vec::new()) + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +#[ignore = "Requires FUSE mount permissions (user_allow_other in /etc/fuse.conf)"] +async fn test_fuse_write_triggers_broadcast() -> Result<()> { + let temp_dir = TempDir::new()?; + let db_path = temp_dir.path().join("test.db"); + let mount_path = temp_dir.path().join("mnt"); + + fs::create_dir_all(&mount_path)?; + + let memdb = MemDb::open(&db_path, true)?; + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config.clone()); + status.set_quorate(true); + + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH)? + .as_secs() as u32; + + // Create test directory + memdb.create("/testdir", libc::S_IFDIR, 0, now)?; + + // Create DFSM instance with test callbacks + let callbacks = Arc::new(TestDfsmCallbacks::new(memdb.clone())); + let dfsm = Arc::new(Dfsm::new("test-cluster".to_string(), callbacks.clone())?); + + let plugins = plugins::init_plugins(config.clone(), status.clone()); + + // Spawn FUSE mount with DFSM + let mount_path_clone = mount_path.clone(); + let memdb_clone = memdb.clone(); + let dfsm_clone = dfsm.clone(); + let fuse_task = tokio::spawn(async move { + if let Err(e) = fuse::mount_fuse( + &mount_path_clone, + memdb_clone, + config, + Some(dfsm_clone), + plugins, + status, + ) + .await + { + eprintln!("FUSE mount error: {}", e); + } + }); + + tokio::time::sleep(Duration::from_millis(2000)).await; + verify_fuse_mounted(&mount_path).await; + + // Test: Write to file via FUSE should trigger broadcast + let test_file = mount_path.join("testdir/broadcast-test.txt"); + let mut file = fs::File::create(&test_file)?; + file.write_all(b"test data for broadcast")?; + drop(file); + println!("[OK] File written via FUSE"); + + // Give time for broadcast + tokio::time::sleep(Duration::from_millis(100)).await; + + // Verify file exists in memdb + assert!( + memdb.exists("/testdir/broadcast-test.txt")?, + "File should exist in memdb" + ); + let data = memdb.read("/testdir/broadcast-test.txt", 0, 1024)?; + assert_eq!(&data[..], b"test data for broadcast"); + println!("[OK] File data verified in memdb"); + + // Cleanup + fs::remove_file(&test_file)?; + fuse_task.abort(); + tokio::time::sleep(Duration::from_millis(100)).await; + let _ = std::process::Command::new("fusermount3") + .arg("-u") + .arg(&mount_path) + .output(); + + Ok(()) +} + +/// Additional FUSE + DFSM tests can be added here following the same pattern +#[test] +fn test_dfsm_callbacks_implementation() { + // Verify our test callbacks work correctly + let temp_dir = TempDir::new().unwrap(); + let db_path = temp_dir.path().join("test.db"); + let memdb = MemDb::open(&db_path, true).unwrap(); + + let callbacks = TestDfsmCallbacks::new(memdb); + + // Test checksum computation + let mut checksum = [0u8; 32]; + assert!(callbacks.compute_checksum(&mut checksum).is_ok()); + + // Test message delivery tracking + let result = callbacks.deliver_message( + 1, + 100, + FuseMessage::Create { + path: "/test".to_string(), + }, + 12345, + ); + assert!(result.is_ok()); + + let broadcasts = callbacks.get_broadcasts(); + assert_eq!(broadcasts.len(), 1); + assert_eq!(broadcasts[0], "create:/test"); +} diff --git a/src/pmxcfs-rs/pmxcfs/tests/fuse_integration_test.rs b/src/pmxcfs-rs/pmxcfs/tests/fuse_integration_test.rs new file mode 100644 index 000000000..0e9f80076 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/tests/fuse_integration_test.rs @@ -0,0 +1,414 @@ +/// Integration tests for FUSE filesystem with proxmox-fuse-rs +/// +/// These tests verify that the FUSE subsystem works correctly after +/// migrating from fuser to proxmox-fuse-rs +use anyhow::Result; +use pmxcfs_memdb::MemDb; +use pmxcfs_rs::fuse; +use pmxcfs_rs::plugins; +use std::fs; +use std::io::{Read, Write}; +use std::time::Duration; +use tempfile::TempDir; + +/// Verify that FUSE filesystem successfully mounted, panic if not +fn verify_fuse_mounted(path: &std::path::Path) { + use std::process::Command; + + let output = Command::new("mount").output().ok(); + + let is_mounted = if let Some(output) = output { + let mount_output = String::from_utf8_lossy(&output.stdout); + mount_output.contains(&format!(" {} ", path.display())) + } else { + false + }; + + if !is_mounted { + panic!( + "FUSE mount failed (likely permissions issue).\n\ + To run FUSE integration tests, either:\n\ + 1. Run with sudo: sudo -E cargo test --test fuse_integration_test\n\ + 2. Enable user_allow_other in /etc/fuse.conf and add your user to the 'fuse' group\n\ + 3. Or skip these tests: cargo test --test fuse_basic_test" + ); + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +#[ignore = "Requires FUSE mount permissions (run with sudo or configure /etc/fuse.conf)"] +async fn test_fuse_mount_and_basic_operations() -> Result<()> { + let temp_dir = TempDir::new()?; + let db_path = temp_dir.path().join("test.db"); + let mount_path = temp_dir.path().join("mnt"); + + // Create mount point + fs::create_dir_all(&mount_path)?; + + // Create database + let memdb = MemDb::open(&db_path, true)?; + + // Create some test data in memdb + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH)? + .as_secs() as u32; + + memdb.create("/testdir", libc::S_IFDIR, 0, now)?; + memdb.create("/testdir/file1.txt", libc::S_IFREG, 0, now)?; + memdb.write("/testdir/file1.txt", 0, 0, now, b"Hello from pmxcfs!", false)?; + + memdb.create("/nodes", libc::S_IFDIR, 0, now)?; + memdb.create("/nodes/testnode", libc::S_IFDIR, 0, now)?; + memdb.create("/nodes/testnode/config", libc::S_IFREG, 0, now)?; + memdb.write( + "/nodes/testnode/config", 0, 0, now, b"test=configuration", false, + )?; + + // Create config and plugins (no RRD persistence needed for test) + let config = pmxcfs_test_utils::create_test_config(false); + let plugins = { + let test_status = pmxcfs_status::init_with_config(config.clone()); + plugins::init_plugins(config.clone(), test_status) + }; + + // Create status for FUSE (set quorate for tests) + let status = pmxcfs_status::init_with_config(config.clone()); + status.set_quorate(true); + + // Spawn FUSE mount in background + println!("\n2. Mounting FUSE filesystem..."); + let mount_path_clone = mount_path.clone(); + let memdb_clone = memdb.clone(); + let config_clone = config.clone(); + let plugins_clone = plugins.clone(); + let status_clone = status.clone(); + + let fuse_task = tokio::spawn(async move { + if let Err(e) = fuse::mount_fuse( + &mount_path_clone, + memdb_clone, + config_clone, + None, // no cluster + plugins_clone, + status_clone, + ) + .await + { + eprintln!("FUSE mount error: {}", e); + } + }); + + // Give FUSE time to initialize and check if mount succeeded + tokio::time::sleep(Duration::from_millis(500)).await; + + // Verify FUSE mounted successfully + verify_fuse_mounted(&mount_path); + + // Test 1: Check if mount point is accessible + let root_entries = fs::read_dir(&mount_path)?; + let mut entry_names: Vec = root_entries + .filter_map(|e| e.ok()) + .map(|e| e.file_name().to_string_lossy().to_string()) + .collect(); + entry_names.sort(); + + println!(" Root directory entries: {:?}", entry_names); + assert!( + entry_names.contains(&"testdir".to_string()), + "testdir should be visible" + ); + assert!( + entry_names.contains(&"nodes".to_string()), + "nodes should be visible" + ); + + // Test 2: Read existing file + let file_path = mount_path.join("testdir/file1.txt"); + let mut file = fs::File::open(&file_path)?; + let mut contents = String::new(); + file.read_to_string(&mut contents)?; + assert_eq!(contents, "Hello from pmxcfs!"); + println!(" Read: '{}'", contents); + + // Test 3: Write to existing file + let mut file = fs::OpenOptions::new() + .write(true) + .truncate(true) + .open(&file_path)?; + file.write_all(b"Modified content!")?; + drop(file); + + // Verify write + let mut file = fs::File::open(&file_path)?; + let mut contents = String::new(); + file.read_to_string(&mut contents)?; + assert_eq!(contents, "Modified content!"); + println!(" After write: '{}'", contents); + + // Test 4: Create new file + let new_file_path = mount_path.join("testdir/newfile.txt"); + eprintln!("DEBUG: About to create file at {:?}", new_file_path); + let mut new_file = match fs::File::create(&new_file_path) { + Ok(f) => { + eprintln!("DEBUG: File created OK"); + f + } + Err(e) => { + eprintln!("DEBUG: File create FAILED: {:?}", e); + return Err(e.into()); + } + }; + eprintln!("DEBUG: Writing content"); + new_file.write_all(b"New file content")?; + eprintln!("DEBUG: Content written"); + drop(new_file); + eprintln!("DEBUG: File closed"); + + // Verify creation + let mut file = fs::File::open(&new_file_path)?; + let mut contents = String::new(); + file.read_to_string(&mut contents)?; + assert_eq!(contents, "New file content"); + println!(" Created and verified: newfile.txt"); + + // Test 5: Create directory + let new_dir_path = mount_path.join("newdir"); + fs::create_dir(&new_dir_path)?; + + // Verify directory exists + assert!(new_dir_path.exists()); + assert!(new_dir_path.is_dir()); + + // Test 6: List directory + let testdir_entries = fs::read_dir(mount_path.join("testdir"))?; + let mut file_names: Vec = testdir_entries + .filter_map(|e| e.ok()) + .map(|e| e.file_name().to_string_lossy().to_string()) + .collect(); + file_names.sort(); + + println!(" testdir entries: {:?}", file_names); + assert!( + file_names.contains(&"file1.txt".to_string()), + "file1.txt should exist" + ); + assert!( + file_names.contains(&"newfile.txt".to_string()), + "newfile.txt should exist" + ); + + // Test 7: Get file metadata + let metadata = fs::metadata(&file_path)?; + println!(" File size: {} bytes", metadata.len()); + println!(" Is file: {}", metadata.is_file()); + println!(" Is dir: {}", metadata.is_dir()); + assert!(metadata.is_file()); + assert!(!metadata.is_dir()); + + // Test 8: Test plugin files + let plugin_files = vec![".version", ".members", ".vmlist", ".rrd", ".clusterlog"]; + + for plugin_name in &plugin_files { + let plugin_path = mount_path.join(plugin_name); + if plugin_path.exists() { + match fs::File::open(&plugin_path) { + Ok(mut file) => { + let mut contents = Vec::new(); + file.read_to_end(&mut contents)?; + println!( + " [OK] Plugin '{}' readable ({} bytes)", + plugin_name, + contents.len() + ); + } + Err(e) => { + println!( + " [WARN] Plugin '{}' exists but not readable: {}", + plugin_name, e + ); + } + } + } else { + println!(" ℹ Plugin '{}' not present", plugin_name); + } + } + + // Test 9: Delete file + fs::remove_file(&new_file_path)?; + assert!(!new_file_path.exists()); + + // Test 10: Delete directory + fs::remove_dir(&new_dir_path)?; + assert!(!new_dir_path.exists()); + + // Test 11: Verify changes persisted to memdb + println!("\n13. Verifying memdb persistence..."); + assert!( + memdb.exists("/testdir/file1.txt")?, + "file1.txt should exist in memdb" + ); + assert!( + !memdb.exists("/testdir/newfile.txt")?, + "newfile.txt should be deleted from memdb" + ); + assert!( + !memdb.exists("/newdir")?, + "newdir should be deleted from memdb" + ); + + let read_data = memdb.read("/testdir/file1.txt", 0, 1024)?; + assert_eq!( + &read_data[..], + b"Modified content!", + "File content should be updated in memdb" + ); + + // Cleanup: unmount filesystem + fuse_task.abort(); + tokio::time::sleep(Duration::from_millis(100)).await; + + // Force unmount + let _ = std::process::Command::new("umount") + .arg("-l") + .arg(&mount_path) + .output(); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +#[ignore = "Requires FUSE mount permissions (run with sudo or configure /etc/fuse.conf)"] +async fn test_fuse_concurrent_operations() -> Result<()> { + let temp_dir = TempDir::new()?; + let db_path = temp_dir.path().join("test.db"); + let mount_path = temp_dir.path().join("mnt"); + + fs::create_dir_all(&mount_path)?; + + let memdb = MemDb::open(&db_path, true)?; + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config.clone()); + status.set_quorate(true); + let plugins = plugins::init_plugins(config.clone(), status.clone()); + + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH)? + .as_secs() as u32; + + memdb.create("/testdir", libc::S_IFDIR, 0, now)?; + + // Spawn FUSE mount + let mount_path_clone = mount_path.clone(); + let memdb_clone = memdb.clone(); + let fuse_task = tokio::spawn(async move { + let _ = fuse::mount_fuse( + &mount_path_clone, + memdb_clone, + config, + None, + plugins, + status, + ) + .await; + }); + + tokio::time::sleep(Duration::from_millis(500)).await; + + // Verify FUSE mounted successfully + verify_fuse_mounted(&mount_path); + + // Create multiple files concurrently + let mut tasks = vec![]; + for i in 0..5 { + let mount = mount_path.clone(); + let task = tokio::task::spawn_blocking(move || -> Result<()> { + let file_path = mount.join(format!("testdir/file{}.txt", i)); + let mut file = fs::File::create(&file_path)?; + file.write_all(format!("Content {}", i).as_bytes())?; + Ok(()) + }); + tasks.push(task); + } + + // Wait for all tasks + for task in tasks { + task.await??; + } + + // Read all files and verify + for i in 0..5 { + let file_path = mount_path.join(format!("testdir/file{}.txt", i)); + let mut file = fs::File::open(&file_path)?; + let mut contents = String::new(); + file.read_to_string(&mut contents)?; + assert_eq!(contents, format!("Content {}", i)); + } + + // Cleanup + fuse_task.abort(); + tokio::time::sleep(Duration::from_millis(100)).await; + let _ = std::process::Command::new("umount") + .arg("-l") + .arg(&mount_path) + .output(); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +#[ignore = "Requires FUSE mount permissions (run with sudo or configure /etc/fuse.conf)"] +async fn test_fuse_error_handling() -> Result<()> { + let temp_dir = TempDir::new()?; + let db_path = temp_dir.path().join("test.db"); + let mount_path = temp_dir.path().join("mnt"); + + fs::create_dir_all(&mount_path)?; + + let memdb = MemDb::open(&db_path, true)?; + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config.clone()); + status.set_quorate(true); + let plugins = plugins::init_plugins(config.clone(), status.clone()); + + // Spawn FUSE mount + let mount_path_clone = mount_path.clone(); + let memdb_clone = memdb.clone(); + let fuse_task = tokio::spawn(async move { + let _ = fuse::mount_fuse( + &mount_path_clone, + memdb_clone, + config, + None, + plugins, + status, + ) + .await; + }); + + tokio::time::sleep(Duration::from_millis(500)).await; + + // Verify FUSE mounted successfully + verify_fuse_mounted(&mount_path); + + let result = fs::File::open(mount_path.join("nonexistent.txt")); + assert!(result.is_err(), "Should fail to open non-existent file"); + + let result = fs::remove_file(mount_path.join("nonexistent.txt")); + assert!(result.is_err(), "Should fail to delete non-existent file"); + + let result = fs::create_dir(mount_path.join("nonexistent/subdir")); + assert!( + result.is_err(), + "Should fail to create dir in non-existent parent" + ); + + // Cleanup + fuse_task.abort(); + tokio::time::sleep(Duration::from_millis(100)).await; + let _ = std::process::Command::new("umount") + .arg("-l") + .arg(&mount_path) + .output(); + + Ok(()) +} diff --git a/src/pmxcfs-rs/pmxcfs/tests/fuse_locks_test.rs b/src/pmxcfs-rs/pmxcfs/tests/fuse_locks_test.rs new file mode 100644 index 000000000..71b603955 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/tests/fuse_locks_test.rs @@ -0,0 +1,377 @@ +/// FUSE Lock Operations Tests +/// +/// Tests for pmxcfs lock operations through the FUSE interface. +/// Locks are implemented as directories under /priv/lock/ and use +/// setattr(mtime) for renewal and release operations. +use anyhow::Result; +use pmxcfs_memdb::MemDb; +use pmxcfs_rs::fuse; +use pmxcfs_rs::plugins; +use std::fs; +use std::os::unix::fs::MetadataExt; +use std::time::Duration; +use tempfile::TempDir; + +/// Verify that FUSE filesystem successfully mounted, panic if not +fn verify_fuse_mounted(path: &std::path::Path) { + use std::process::Command; + + let output = Command::new("mount").output().ok(); + + let is_mounted = if let Some(output) = output { + let mount_output = String::from_utf8_lossy(&output.stdout); + mount_output.contains(&format!(" {} ", path.display())) + } else { + false + }; + + if !is_mounted { + panic!( + "FUSE mount failed (likely permissions issue).\n\ + To run FUSE integration tests, either:\n\ + 1. Run with sudo: sudo -E cargo test --test fuse_locks_test\n\ + 2. Enable user_allow_other in /etc/fuse.conf\n\ + 3. Or skip these tests: cargo test --lib" + ); + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +#[ignore = "Requires FUSE mount permissions (run with sudo or configure /etc/fuse.conf)"] +async fn test_lock_creation_and_access() -> Result<()> { + let temp_dir = TempDir::new()?; + let db_path = temp_dir.path().join("test.db"); + let mount_path = temp_dir.path().join("mnt"); + + fs::create_dir_all(&mount_path)?; + + let memdb = MemDb::open(&db_path, true)?; + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config.clone()); + status.set_quorate(true); + let plugins = plugins::init_plugins(config.clone(), status.clone()); + + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH)? + .as_secs() as u32; + + // Create lock directory structure in memdb + memdb.create("/priv", libc::S_IFDIR, 0, now)?; + memdb.create("/priv/lock", libc::S_IFDIR, 0, now)?; + + // Spawn FUSE mount + let mount_path_clone = mount_path.clone(); + let memdb_clone = memdb.clone(); + let fuse_task = tokio::spawn(async move { + let _ = fuse::mount_fuse( + &mount_path_clone, + memdb_clone, + config, + None, // no cluster + plugins, + status, + ) + .await; + }); + + tokio::time::sleep(Duration::from_millis(500)).await; + verify_fuse_mounted(&mount_path); + + // Test 1: Create lock directory via FUSE (mkdir) + let lock_path = mount_path.join("priv/lock/test-resource"); + fs::create_dir(&lock_path)?; + println!("[OK] Lock directory created via FUSE"); + + // Test 2: Verify lock exists and is a directory + assert!(lock_path.exists(), "Lock should exist"); + assert!(lock_path.is_dir(), "Lock should be a directory"); + println!("[OK] Lock directory accessible"); + + // Test 3: Verify lock is in memdb + assert!( + memdb.exists("/priv/lock/test-resource")?, + "Lock should exist in memdb" + ); + println!("[OK] Lock persisted to memdb"); + + // Test 4: Verify lock path detection + assert!( + pmxcfs_memdb::is_lock_path("/priv/lock/test-resource"), + "Path should be detected as lock path" + ); + println!("[OK] Lock path correctly identified"); + + // Test 5: List locks via FUSE readdir + let lock_dir_entries: Vec<_> = fs::read_dir(mount_path.join("priv/lock"))? + .filter_map(|e| e.ok()) + .map(|e| e.file_name().to_string_lossy().to_string()) + .collect(); + assert!( + lock_dir_entries.contains(&"test-resource".to_string()), + "Lock should appear in directory listing" + ); + println!("[OK] Lock visible in readdir"); + + // Cleanup + fs::remove_dir(&lock_path)?; + fuse_task.abort(); + tokio::time::sleep(Duration::from_millis(100)).await; + let _ = std::process::Command::new("umount") + .arg("-l") + .arg(&mount_path) + .output(); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +#[ignore = "Requires FUSE mount permissions (run with sudo or configure /etc/fuse.conf)"] +async fn test_lock_renewal_via_mtime_update() -> Result<()> { + let temp_dir = TempDir::new()?; + let db_path = temp_dir.path().join("test.db"); + let mount_path = temp_dir.path().join("mnt"); + + fs::create_dir_all(&mount_path)?; + + let memdb = MemDb::open(&db_path, true)?; + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config.clone()); + status.set_quorate(true); + let plugins = plugins::init_plugins(config.clone(), status.clone()); + + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH)? + .as_secs() as u32; + + // Create lock directory structure + memdb.create("/priv", libc::S_IFDIR, 0, now)?; + memdb.create("/priv/lock", libc::S_IFDIR, 0, now)?; + + // Spawn FUSE mount + let mount_path_clone = mount_path.clone(); + let memdb_clone = memdb.clone(); + let fuse_task = tokio::spawn(async move { + let _ = fuse::mount_fuse( + &mount_path_clone, + memdb_clone, + config, + None, + plugins, + status, + ) + .await; + }); + + tokio::time::sleep(Duration::from_millis(500)).await; + verify_fuse_mounted(&mount_path); + + // Create lock via FUSE + let lock_path = mount_path.join("priv/lock/renewal-test"); + fs::create_dir(&lock_path)?; + println!("[OK] Lock directory created"); + + // Get initial metadata + let metadata1 = fs::metadata(&lock_path)?; + let mtime1 = metadata1.mtime(); + println!(" Initial mtime: {}", mtime1); + + // Wait a moment + tokio::time::sleep(Duration::from_millis(100)).await; + + // Test lock renewal: update mtime using filetime crate + // (This simulates the lock renewal mechanism used by Proxmox VE) + use filetime::{FileTime, set_file_mtime}; + let new_time = FileTime::now(); + set_file_mtime(&lock_path, new_time)?; + println!("[OK] Lock mtime updated (renewal)"); + + // Verify mtime was updated + let metadata2 = fs::metadata(&lock_path)?; + let mtime2 = metadata2.mtime(); + println!(" Updated mtime: {}", mtime2); + + // Note: Due to filesystem timestamp granularity, we just verify the operation succeeded + // The actual lock renewal logic is tested at the memdb level + println!("[OK] Lock renewal operation completed"); + + // Cleanup + fs::remove_dir(&lock_path)?; + fuse_task.abort(); + tokio::time::sleep(Duration::from_millis(100)).await; + let _ = std::process::Command::new("umount") + .arg("-l") + .arg(&mount_path) + .output(); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +#[ignore = "Requires FUSE mount permissions (run with sudo or configure /etc/fuse.conf)"] +async fn test_lock_unlock_via_mtime_zero() -> Result<()> { + let temp_dir = TempDir::new()?; + let db_path = temp_dir.path().join("test.db"); + let mount_path = temp_dir.path().join("mnt"); + + fs::create_dir_all(&mount_path)?; + + let memdb = MemDb::open(&db_path, true)?; + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config.clone()); + status.set_quorate(true); + let plugins = plugins::init_plugins(config.clone(), status.clone()); + + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH)? + .as_secs() as u32; + + // Create lock directory structure + memdb.create("/priv", libc::S_IFDIR, 0, now)?; + memdb.create("/priv/lock", libc::S_IFDIR, 0, now)?; + + // Spawn FUSE mount (without DFSM so unlock happens locally) + let mount_path_clone = mount_path.clone(); + let memdb_clone = memdb.clone(); + let fuse_task = tokio::spawn(async move { + let _ = fuse::mount_fuse( + &mount_path_clone, + memdb_clone, + config, + None, + plugins, + status, + ) + .await; + }); + + tokio::time::sleep(Duration::from_millis(500)).await; + verify_fuse_mounted(&mount_path); + + // Create lock via FUSE + let lock_path = mount_path.join("priv/lock/unlock-test"); + fs::create_dir(&lock_path)?; + println!("[OK] Lock directory created"); + + // Verify lock exists + assert!(lock_path.exists(), "Lock should exist"); + assert!( + memdb.exists("/priv/lock/unlock-test")?, + "Lock should exist in memdb" + ); + + // Test unlock: set mtime to 0 (Unix epoch) + // This is the unlock signal in pmxcfs + use filetime::{FileTime, set_file_mtime}; + let zero_time = FileTime::from_unix_time(0, 0); + set_file_mtime(&lock_path, zero_time)?; + println!("[OK] Lock unlock requested (mtime=0)"); + + // Give time for unlock processing + tokio::time::sleep(Duration::from_millis(200)).await; + + // When no DFSM, lock should be deleted locally if expired + // Since we just created it, it won't be expired, so it should still exist + // (This matches the C behavior: only delete if lock_expired() returns true) + assert!( + lock_path.exists(), + "Lock should still exist (not expired yet)" + ); + println!("[OK] Unlock handled correctly (lock not expired, kept)"); + + // Cleanup + fs::remove_dir(&lock_path)?; + fuse_task.abort(); + tokio::time::sleep(Duration::from_millis(100)).await; + let _ = std::process::Command::new("umount") + .arg("-l") + .arg(&mount_path) + .output(); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +#[ignore = "Requires FUSE mount permissions (run with sudo or configure /etc/fuse.conf)"] +async fn test_multiple_locks() -> Result<()> { + let temp_dir = TempDir::new()?; + let db_path = temp_dir.path().join("test.db"); + let mount_path = temp_dir.path().join("mnt"); + + fs::create_dir_all(&mount_path)?; + + let memdb = MemDb::open(&db_path, true)?; + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config.clone()); + status.set_quorate(true); + let plugins = plugins::init_plugins(config.clone(), status.clone()); + + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH)? + .as_secs() as u32; + + // Create lock directory structure + memdb.create("/priv", libc::S_IFDIR, 0, now)?; + memdb.create("/priv/lock", libc::S_IFDIR, 0, now)?; + + // Spawn FUSE mount + let mount_path_clone = mount_path.clone(); + let memdb_clone = memdb.clone(); + let fuse_task = tokio::spawn(async move { + let _ = fuse::mount_fuse( + &mount_path_clone, + memdb_clone, + config, + None, + plugins, + status, + ) + .await; + }); + + tokio::time::sleep(Duration::from_millis(500)).await; + verify_fuse_mounted(&mount_path); + + // Test: Create multiple locks simultaneously + let lock_names = vec!["vm-100-disk-0", "vm-101-disk-0", "vm-102-disk-0"]; + + for name in &lock_names { + let lock_path = mount_path.join(format!("priv/lock/{}", name)); + fs::create_dir(&lock_path)?; + println!("[OK] Lock '{}' created", name); + } + + // Verify all locks exist + let lock_dir_entries: Vec<_> = fs::read_dir(mount_path.join("priv/lock"))? + .filter_map(|e| e.ok()) + .map(|e| e.file_name().to_string_lossy().to_string()) + .collect(); + + for name in &lock_names { + assert!( + lock_dir_entries.contains(&name.to_string()), + "Lock '{}' should be in directory listing", + name + ); + assert!( + memdb.exists(&format!("/priv/lock/{}", name))?, + "Lock '{}' should exist in memdb", + name + ); + } + println!("[OK] All locks accessible"); + + // Cleanup + for name in &lock_names { + let lock_path = mount_path.join(format!("priv/lock/{}", name)); + fs::remove_dir(&lock_path)?; + } + + fuse_task.abort(); + tokio::time::sleep(Duration::from_millis(100)).await; + let _ = std::process::Command::new("umount") + .arg("-l") + .arg(&mount_path) + .output(); + + Ok(()) +} diff --git a/src/pmxcfs-rs/pmxcfs/tests/local_integration.rs b/src/pmxcfs-rs/pmxcfs/tests/local_integration.rs new file mode 100644 index 000000000..9f19f5802 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/tests/local_integration.rs @@ -0,0 +1,277 @@ +// Local integration tests that don't require containers +// Tests for MemDb functionality and basic plugin integration + +mod common; + +use anyhow::Result; +use pmxcfs_memdb::MemDb; +use pmxcfs_rs::plugins; + +use common::*; + +/// Test basic MemDb CRUD operations +#[test] +fn test_memdb_create_read_write() -> Result<()> { + let (_temp_dir, memdb) = create_minimal_test_db()?; + + // Create a file + memdb.create("/test-file.txt", libc::S_IFREG, 0, TEST_MTIME)?; + + // Write content + let content = b"Hello, World!"; + memdb.write("/test-file.txt", 0, 0, TEST_MTIME, content, false)?; + + // Read it back + let data = memdb.read("/test-file.txt", 0, 1024)?; + assert_eq!(data, content, "File content should match"); + + Ok(()) +} + +/// Test directory operations +#[test] +fn test_memdb_directories() -> Result<()> { + let (_temp_dir, memdb) = create_minimal_test_db()?; + + // Create directory structure + memdb.create("/nodes", libc::S_IFDIR, 0, TEST_MTIME)?; + memdb.create("/nodes/testnode", libc::S_IFDIR, 0, TEST_MTIME)?; + memdb.create("/nodes/testnode/qemu-server", libc::S_IFDIR, 0, TEST_MTIME)?; + + // List directory + let entries = memdb.readdir("/nodes/testnode")?; + assert_eq!(entries.len(), 1, "Should have 1 entry"); + assert_eq!(entries[0].name, "qemu-server"); + + // Verify directory exists + assert!(memdb.exists("/nodes")?); + assert!(memdb.exists("/nodes/testnode")?); + assert!(memdb.exists("/nodes/testnode/qemu-server")?); + + Ok(()) +} + +/// Test file operations: rename and delete +#[test] +fn test_memdb_file_operations() -> Result<()> { + let (_temp_dir, memdb) = create_minimal_test_db()?; + + // Create and write file + memdb.create("/old-name.txt", libc::S_IFREG, 0, TEST_MTIME)?; + memdb.write("/old-name.txt", 0, 0, TEST_MTIME, b"test", false)?; + + // Test rename + memdb.rename("/old-name.txt", "/new-name.txt", 0, 1000)?; + assert!(!memdb.exists("/old-name.txt")?, "Old name should not exist"); + assert!(memdb.exists("/new-name.txt")?, "New name should exist"); + + // Verify content survived rename + let data = memdb.read("/new-name.txt", 0, 1024)?; + assert_eq!(data, b"test"); + + // Test delete + memdb.delete("/new-name.txt", 0, 1000)?; + assert!(!memdb.exists("/new-name.txt")?, "File should be deleted"); + + Ok(()) +} + +/// Test database persistence across reopens +#[test] +fn test_memdb_persistence() -> Result<()> { + let temp_dir = tempfile::TempDir::new()?; + let db_path = temp_dir.path().join("persist.db"); + + // Create and populate database + { + let memdb = MemDb::open(&db_path, true)?; + memdb.create("/persistent.txt", libc::S_IFREG, 0, TEST_MTIME)?; + memdb.write("/persistent.txt", 0, 0, TEST_MTIME, b"persistent data", false)?; + } + + // Reopen database and verify data persists + { + let memdb = MemDb::open(&db_path, false)?; + let data = memdb.read("/persistent.txt", 0, 1024)?; + assert_eq!( + data, b"persistent data", + "Data should persist across reopens" + ); + } + + Ok(()) +} + +/// Test write with offset (partial write/append) +#[test] +fn test_memdb_write_offset() -> Result<()> { + let (_temp_dir, memdb) = create_minimal_test_db()?; + + memdb.create("/offset-test.txt", libc::S_IFREG, 0, TEST_MTIME)?; + + // Write at offset 0 + memdb.write("/offset-test.txt", 0, 0, TEST_MTIME, b"Hello", false)?; + + // Write at offset 5 (append) + memdb.write("/offset-test.txt", 5, 0, TEST_MTIME, b", World!", false)?; + + // Read full content + let data = memdb.read("/offset-test.txt", 0, 1024)?; + assert_eq!(data, b"Hello, World!"); + + Ok(()) +} + +/// Test write with truncation +/// +/// Now tests CORRECT behavior after fixing the API bug. +/// truncate=true should clear the file before writing. +#[test] +fn test_memdb_write_truncate() -> Result<()> { + let (_temp_dir, memdb) = create_minimal_test_db()?; + + memdb.create("/truncate-test.txt", libc::S_IFREG, 0, TEST_MTIME)?; + + // Write initial content + memdb.write("/truncate-test.txt", 0, 0, TEST_MTIME, b"Hello, World!", false)?; + + // Overwrite with truncate=true (should clear first, then write) + memdb.write("/truncate-test.txt", 0, 0, TEST_MTIME, b"Hi", true)?; + + // Should only have "Hi" + let data = memdb.read("/truncate-test.txt", 0, 1024)?; + assert_eq!(data, b"Hi", "Truncate should clear file before writing"); + + Ok(()) +} + +/// Test file size limit (C implementation limits to 1MB) +#[test] +fn test_memdb_file_size_limit() -> Result<()> { + let (_temp_dir, memdb) = create_minimal_test_db()?; + + memdb.create("/large.bin", libc::S_IFREG, 0, TEST_MTIME)?; + + // Exactly 1MB should be accepted + let one_mb = vec![0u8; 1024 * 1024]; + assert!( + memdb + .write("/large.bin", 0, 0, TEST_MTIME, &one_mb, false) + .is_ok(), + "1MB file should be accepted" + ); + + // Over 1MB should fail + let over_one_mb = vec![0u8; 1024 * 1024 + 1]; + assert!( + memdb + .write("/large.bin", 0, 0, TEST_MTIME, &over_one_mb, false) + .is_err(), + "Over 1MB file should be rejected" + ); + + Ok(()) +} + +/// Test plugin initialization and basic functionality +#[test] +fn test_plugin_initialization() -> Result<()> { + let config = create_test_config(true); + let status = create_test_status(); + + let plugin_registry = plugins::init_plugins(config, status); + + // Verify plugins are registered + let plugin_list = plugin_registry.list(); + assert!(!plugin_list.is_empty(), "Should have plugins registered"); + + // Verify expected plugins exist + assert!( + plugin_registry.get(".version").is_some(), + "Should have .version plugin" + ); + assert!( + plugin_registry.get(".vmlist").is_some(), + "Should have .vmlist plugin" + ); + assert!( + plugin_registry.get(".rrd").is_some(), + "Should have .rrd plugin" + ); + assert!( + plugin_registry.get(".members").is_some(), + "Should have .members plugin" + ); + assert!( + plugin_registry.get(".clusterlog").is_some(), + "Should have .clusterlog plugin" + ); + + Ok(()) +} + +/// Test .version plugin output +#[test] +fn test_version_plugin() -> Result<()> { + let config = create_test_config(true); + let status = create_test_status(); + let plugins = plugins::init_plugins(config, status); + + let version_plugin = plugins + .get(".version") + .expect(".version plugin should exist"); + + let version_data = version_plugin.read()?; + let version_str = String::from_utf8_lossy(&version_data); + + // Verify it's valid JSON + let version_json: serde_json::Value = serde_json::from_slice(&version_data)?; + assert!(version_json.is_object(), "Version should be JSON object"); + + // Verify it contains expected fields + assert!( + version_str.contains("version"), + "Should contain 'version' field" + ); + + Ok(()) +} + +/// Test error case: reading non-existent file +#[test] +fn test_memdb_error_nonexistent_file() { + let (_temp_dir, memdb) = create_minimal_test_db().unwrap(); + + let result = memdb.read("/does-not-exist.txt", 0, 1024); + assert!(result.is_err(), "Reading non-existent file should fail"); +} + +/// Test error case: creating file in non-existent directory +#[test] +fn test_memdb_error_no_parent_directory() { + let (_temp_dir, memdb) = create_minimal_test_db().unwrap(); + + let result = memdb.create("/nonexistent/file.txt", libc::S_IFREG, 0, TEST_MTIME); + assert!( + result.is_err(), + "Creating file in non-existent directory should fail" + ); +} + +/// Test error case: writing to non-existent file +#[test] +fn test_memdb_error_write_nonexistent() { + let (_temp_dir, memdb) = create_minimal_test_db().unwrap(); + + let result = memdb.write("/does-not-exist.txt", 0, 0, TEST_MTIME, b"test", false); + assert!(result.is_err(), "Writing to non-existent file should fail"); +} + +/// Test error case: deleting non-existent file +#[test] +fn test_memdb_error_delete_nonexistent() { + let (_temp_dir, memdb) = create_minimal_test_db().unwrap(); + + let result = memdb.delete("/does-not-exist.txt", 0, 1000); + assert!(result.is_err(), "Deleting non-existent file should fail"); +} diff --git a/src/pmxcfs-rs/pmxcfs/tests/quorum_behavior.rs b/src/pmxcfs-rs/pmxcfs/tests/quorum_behavior.rs new file mode 100644 index 000000000..d397ad099 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/tests/quorum_behavior.rs @@ -0,0 +1,274 @@ +/// Quorum-Dependent Behavior Tests +/// +/// Tests for pmxcfs behavior that changes based on quorum state. +/// These tests verify plugin behavior (especially symlinks) and +/// operations that should be blocked/allowed based on quorum. +/// +/// Note: These tests do NOT require FUSE mounting - they test the +/// plugin layer directly, which is accessible without root permissions. +mod common; + +use anyhow::Result; +use common::*; +use pmxcfs_rs::plugins; + +/// Test .members plugin behavior with and without quorum +/// +/// According to C implementation: +/// - With quorum: .members is regular file containing member list +/// - Without quorum: .members becomes symlink to /etc/pve/error (ENOTCONN) +#[test] +fn test_members_plugin_quorum_behavior() -> Result<()> { + let config = create_test_config(true); + let status = create_test_status(); + let plugins = plugins::init_plugins(config, status.clone()); + + let members_plugin = plugins + .get(".members") + .expect(".members plugin should exist"); + + // Test 1: With quorum, .members should be accessible + status.set_quorate(true); + + let data = members_plugin.read()?; + assert!(!data.is_empty(), "With quorum, .members should return data"); + + // Verify it's valid JSON + let members_json: serde_json::Value = serde_json::from_slice(&data)?; + assert!( + members_json.is_object() || members_json.is_array(), + ".members should contain valid JSON" + ); + + // Test 2: Without quorum, behavior changes + // Note: Current implementation may not fully implement symlink behavior + // This test documents actual behavior + status.set_quorate(false); + + let result = members_plugin.read(); + // In local mode, .members might still be readable + // In cluster mode without quorum, it should error or return error indication + match result { + Ok(data) => { + // If readable, should still be valid structure + assert!(!data.is_empty(), "Data should not be empty if readable"); + } + Err(_) => { + // Expected in non-local mode without quorum + } + } + + Ok(()) +} + +/// Test .vmlist plugin behavior with and without quorum +#[test] +fn test_vmlist_plugin_quorum_behavior() -> Result<()> { + let config = create_test_config(true); + let status = create_test_status(); + let plugins = plugins::init_plugins(config, status.clone()); + + // Register a test VM + clear_test_vms(&status); + status.register_vm(100, pmxcfs_status::VmType::Qemu, TEST_NODE_NAME.to_string()); + + let vmlist_plugin = plugins.get(".vmlist").expect(".vmlist plugin should exist"); + + // Test 1: With quorum, .vmlist works normally + status.set_quorate(true); + + let data = vmlist_plugin.read()?; + let vmlist_str = String::from_utf8(data)?; + + // Verify valid JSON + let vmlist_json: serde_json::Value = serde_json::from_str(&vmlist_str)?; + assert!(vmlist_json.is_object(), ".vmlist should be JSON object"); + + // Verify our test VM is present + assert!( + vmlist_str.contains("\"100\""), + "Should contain registered VM 100" + ); + + // Test 2: Without quorum (in local mode, should still work) + status.set_quorate(false); + + let result = vmlist_plugin.read(); + // In local mode, vmlist should still be accessible + assert!( + result.is_ok(), + "In local mode, .vmlist should work without quorum" + ); + + Ok(()) +} + +/// Test .version plugin is unaffected by quorum state +#[test] +fn test_version_plugin_unaffected_by_quorum() -> Result<()> { + let config = create_test_config(true); + let status = create_test_status(); + let plugins = plugins::init_plugins(config, status.clone()); + + let version_plugin = plugins + .get(".version") + .expect(".version plugin should exist"); + + // Test with quorum + status.set_quorate(true); + let data_with = version_plugin.read()?; + let version_with: serde_json::Value = serde_json::from_slice(&data_with)?; + assert!(version_with.is_object(), "Version should be JSON object"); + assert!( + version_with.get("version").is_some(), + "Should have version field" + ); + + // Test without quorum + status.set_quorate(false); + let data_without = version_plugin.read()?; + let version_without: serde_json::Value = serde_json::from_slice(&data_without)?; + assert!(version_without.is_object(), "Version should be JSON object"); + assert!( + version_without.get("version").is_some(), + "Should have version field" + ); + + // Version should be same regardless of quorum + assert_eq!( + version_with.get("version"), + version_without.get("version"), + "Version should be same with/without quorum" + ); + + Ok(()) +} + +/// Test .rrd plugin behavior +#[test] +fn test_rrd_plugin_functionality() -> Result<()> { + let config = create_test_config(true); + let status = create_test_status(); + let plugins = plugins::init_plugins(config, status.clone()); + + let rrd_plugin = plugins.get(".rrd").expect(".rrd plugin should exist"); + + status.set_quorate(true); + + // RRD plugin should be readable (may be empty initially) + let data = rrd_plugin.read()?; + // Data should be valid (even if empty) + let rrd_str = String::from_utf8(data)?; + // Empty or contains RRD data lines + assert!(rrd_str.is_empty() || rrd_str.lines().count() > 0); + + Ok(()) +} + +/// Test .clusterlog plugin behavior +#[test] +fn test_clusterlog_plugin_functionality() -> Result<()> { + let config = create_test_config(true); + let status = create_test_status(); + let plugins = plugins::init_plugins(config, status.clone()); + + let log_plugin = plugins + .get(".clusterlog") + .expect(".clusterlog plugin should exist"); + + status.set_quorate(true); + + // Clusterlog should be readable + let data = log_plugin.read()?; + // Should be valid text (even if empty) + let _log_str = String::from_utf8(data)?; + + Ok(()) +} + +/// Test quorum state changes work correctly +#[test] +fn test_quorum_state_transitions() -> Result<()> { + let config = create_test_config(true); + let status = create_test_status(); + let _plugins = plugins::init_plugins(config, status.clone()); + + // Test state transitions + status.set_quorate(false); + assert!( + !status.is_quorate(), + "Should not be quorate after set_quorate(false)" + ); + + status.set_quorate(true); + assert!( + status.is_quorate(), + "Should be quorate after set_quorate(true)" + ); + + status.set_quorate(false); + assert!(!status.is_quorate(), "Should not be quorate again"); + + // Multiple calls to same state should be idempotent + status.set_quorate(true); + status.set_quorate(true); + assert!( + status.is_quorate(), + "Multiple set_quorate(true) should work" + ); + + Ok(()) +} + +/// Test plugin registry lists all expected plugins +#[test] +fn test_plugin_registry_completeness() -> Result<()> { + let config = create_test_config(true); + let status = create_test_status(); + let plugins = plugins::init_plugins(config, status); + + let plugin_list = plugins.list(); + + // Verify minimum expected plugins exist + let expected_plugins = vec![".version", ".members", ".vmlist", ".rrd", ".clusterlog"]; + + for plugin_name in expected_plugins { + assert!( + plugin_list.contains(&plugin_name.to_string()), + "Plugin registry should contain {}", + plugin_name + ); + } + + assert!(!plugin_list.is_empty(), "Should have at least some plugins"); + assert!( + plugin_list.len() >= 5, + "Should have at least 5 core plugins" + ); + + Ok(()) +} + +/// Test async quorum change notification +#[tokio::test] +async fn test_quorum_change_async() -> Result<()> { + let config = create_test_config(true); + let status = create_test_status(); + let _plugins = plugins::init_plugins(config, status.clone()); + + // Initial state + status.set_quorate(true); + assert!(status.is_quorate()); + + // Simulate async quorum loss + status.set_quorate(false); + tokio::time::sleep(std::time::Duration::from_millis(10)).await; + assert!(!status.is_quorate(), "Quorum loss should be immediate"); + + // Simulate async quorum regain + status.set_quorate(true); + tokio::time::sleep(std::time::Duration::from_millis(10)).await; + assert!(status.is_quorate(), "Quorum regain should be immediate"); + + Ok(()) +} diff --git a/src/pmxcfs-rs/pmxcfs/tests/single_node_functional.rs b/src/pmxcfs-rs/pmxcfs/tests/single_node_functional.rs new file mode 100644 index 000000000..fac828495 --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/tests/single_node_functional.rs @@ -0,0 +1,361 @@ +/// Single-node functional test +/// +/// This test simulates a complete single-node pmxcfs deployment +/// without requiring root privileges or actual FUSE mounting. +use anyhow::Result; +use pmxcfs_config::Config; +use pmxcfs_memdb::MemDb; +use pmxcfs_rs::plugins::{PluginRegistry, init_plugins}; +use pmxcfs_status::{Status, VmType}; +use std::sync::Arc; +use tempfile::TempDir; + +/// Helper to initialize plugins for testing +fn init_test_plugins(nodename: &str, status: Arc) -> Arc { + let config = Config::shared( + nodename.to_string(), + "127.0.0.1".parse().unwrap(), + 33, // www-data gid + false, + false, + "pmxcfs".to_string(), + ); + init_plugins(config, status) +} + +/// Test complete single-node workflow +#[tokio::test] +async fn test_single_node_workflow() -> Result<()> { + println!("\n=== Single-Node Functional Test ===\n"); + + // Initialize status subsystem + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config); + + // Clear any VMs from previous tests + let existing_vms: Vec = status.get_vmlist().keys().copied().collect(); + for vmid in existing_vms { + status.delete_vm(vmid); + } + + let plugins = init_test_plugins("localhost", status.clone()); + println!( + " [OK] Plugin system initialized ({} plugins)", + plugins.list().len() + ); + + // Create temporary database + let temp_dir = TempDir::new()?; + let db_path = temp_dir.path().join("pmxcfs.db"); + println!("\n2. Creating database at {}", db_path.display()); + + let db = MemDb::open(&db_path, true)?; + + // Test directory structure creation + println!("\n3. Creating directory structure..."); + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH)? + .as_secs() as u32; + + db.create("/nodes", libc::S_IFDIR, 0, now)?; + db.create("/nodes/localhost", libc::S_IFDIR, 0, now)?; + db.create("/nodes/localhost/qemu-server", libc::S_IFDIR, 0, now)?; + db.create("/nodes/localhost/lxc", libc::S_IFDIR, 0, now)?; + db.create("/nodes/localhost/priv", libc::S_IFDIR, 0, now)?; + + db.create("/priv", libc::S_IFDIR, 0, now)?; + db.create("/priv/lock", libc::S_IFDIR, 0, now)?; + db.create("/priv/lock/qemu-server", libc::S_IFDIR, 0, now)?; + db.create("/priv/lock/lxc", libc::S_IFDIR, 0, now)?; + db.create("/qemu-server", libc::S_IFDIR, 0, now)?; + db.create("/lxc", libc::S_IFDIR, 0, now)?; + + // Test configuration file creation + println!("\n4. Creating configuration files..."); + + // Create corosync.conf + let corosync_conf = b"totem {\n version: 2\n cluster_name: test\n}\n"; + db.create("/corosync.conf", libc::S_IFREG, 0, now)?; + db.write("/corosync.conf", 0, 0, now, corosync_conf, false)?; + println!( + " [OK] Created /corosync.conf ({} bytes)", + corosync_conf.len() + ); + + // Create datacenter.cfg + let datacenter_cfg = b"keyboard: en-us\n"; + db.create("/datacenter.cfg", libc::S_IFREG, 0, now)?; + db.write("/datacenter.cfg", 0, 0, now, datacenter_cfg, false)?; + println!( + " [OK] Created /datacenter.cfg ({} bytes)", + datacenter_cfg.len() + ); + + // Create some VM configs + let vm_config = b"cores: 2\nmemory: 2048\nnet0: virtio=00:00:00:00:00:01,bridge=vmbr0\n"; + db.create("/qemu-server/100.conf", libc::S_IFREG, 0, now)?; + db.write("/qemu-server/100.conf", 0, 0, now, vm_config, false)?; + + db.create("/qemu-server/101.conf", libc::S_IFREG, 0, now)?; + db.write("/qemu-server/101.conf", 0, 0, now, vm_config, false)?; + + // Create LXC container config + let ct_config = b"cores: 1\nmemory: 512\nrootfs: local:100/vm-100-disk-0.raw\n"; + db.create("/lxc/200.conf", libc::S_IFREG, 0, now)?; + db.write("/lxc/200.conf", 0, 0, now, ct_config, false)?; + + // Create private file + let private_data = b"secret token data"; + db.create("/priv/token.cfg", libc::S_IFREG, 0, now)?; + db.write("/priv/token.cfg", 0, 0, now, private_data, false)?; + + // Test file operations + + // Read back corosync.conf + let read_data = db.read("/corosync.conf", 0, 1024)?; + assert_eq!(&read_data[..], corosync_conf); + + // Test file size limit (1MB) + let large_data = vec![0u8; 1024 * 1024]; // Exactly 1MB + db.create("/large.bin", libc::S_IFREG, 0, now)?; + let result = db.write("/large.bin", 0, 0, now, &large_data, false); + assert!(result.is_ok(), "1MB file should be accepted"); + + // Test directory listing + let entries = db.readdir("/qemu-server")?; + assert_eq!(entries.len(), 2, "Should have 2 VM configs"); + + // Test rename + db.rename("/qemu-server/101.conf", "/qemu-server/102.conf", 0, 1000)?; + assert!(db.exists("/qemu-server/102.conf")?); + assert!(!db.exists("/qemu-server/101.conf")?); + + // Test delete + db.delete("/large.bin", 0, 1000)?; + assert!(!db.exists("/large.bin")?); + + // Test VM list management + + // Clear VMs again right before this section to avoid test interference + let existing_vms: Vec = status.get_vmlist().keys().copied().collect(); + for vmid in existing_vms { + status.delete_vm(vmid); + } + + status.register_vm(100, VmType::Qemu, "localhost".to_string()); + status.register_vm(102, VmType::Qemu, "localhost".to_string()); + status.register_vm(200, VmType::Lxc, "localhost".to_string()); + + let vmlist = status.get_vmlist(); + assert_eq!(vmlist.len(), 3, "Should have 3 VMs registered"); + + // Verify VM types + assert_eq!(vmlist.get(&100).unwrap().vmtype, VmType::Qemu); + assert_eq!(vmlist.get(&200).unwrap().vmtype, VmType::Lxc); + + // Test lock management + + let lock_path = "/priv/lock/qemu-server/100.conf"; + let csum = [1u8; 32]; + + db.acquire_lock(lock_path, &csum)?; + assert!(db.is_locked(lock_path)); + + db.release_lock(lock_path, &csum)?; + assert!(!db.is_locked(lock_path)); + + // Test checksum operations + + let checksum = db.compute_database_checksum()?; + println!( + " [OK] Database checksum: {:02x}{:02x}{:02x}{:02x}...", + checksum[0], checksum[1], checksum[2], checksum[3] + ); + + // Modify database and verify checksum changes + db.write("/datacenter.cfg", 0, 0, now, b"keyboard: de\n", false)?; + let new_checksum = db.compute_database_checksum()?; + assert_ne!( + checksum, new_checksum, + "Checksum should change after modification" + ); + + // Test database encoding + let _encoded = db.encode_database()?; + + // Test RRD data collection + + // Set RRD data in C-compatible format + // Format: "key:timestamp:val1:val2:val3:..." + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH)? + .as_secs(); + + status + .set_rrd_data( + "pve2-node/localhost".to_string(), + format!( + "{}:0:1.5:4:45.5:2.1:8000000000:6000000000:0:0:0:0:1000000:500000", + now + ), + ) + .await?; + + let rrd_dump = status.get_rrd_dump(); + assert!( + rrd_dump.contains("pve2-node/localhost"), + "Should have node data" + ); + let num_entries = rrd_dump.lines().count(); + + // Test cluster log + use pmxcfs_status::ClusterLogEntry; + let log_entry = ClusterLogEntry { + uid: 0, + timestamp: now, + priority: 6, // Info priority + tag: "startup".to_string(), + pid: 0, + node: "localhost".to_string(), + ident: "pmxcfs".to_string(), + message: "Cluster filesystem started".to_string(), + }; + status.add_log_entry(log_entry); + + let log_entries = status.get_log_entries(100); + assert_eq!(log_entries.len(), 1); + + // Test plugin system + + // Test .version plugin + if let Some(plugin) = plugins.get(".version") { + let content = plugin.read()?; + let version_str = String::from_utf8(content)?; + assert!(version_str.contains("version")); + assert!(version_str.contains("9.0.6")); + } + + // Test .vmlist plugin + if let Some(plugin) = plugins.get(".vmlist") { + let content = plugin.read()?; + let vmlist_str = String::from_utf8(content)?; + assert!(vmlist_str.contains("\"100\"")); + assert!(vmlist_str.contains("\"200\"")); + assert!(vmlist_str.contains("qemu")); + assert!(vmlist_str.contains("lxc")); + println!( + " [OK] .vmlist plugin: {} bytes, {} VMs", + vmlist_str.len(), + 3 + ); + } + + // Test .rrd plugin + if let Some(plugin) = plugins.get(".rrd") { + let content = plugin.read()?; + let rrd_str = String::from_utf8(content)?; + // Should contain the node RRD data in C-compatible format + assert!( + rrd_str.contains("pve2-node/localhost"), + "RRD should contain node data" + ); + } + + // Test database persistence + + drop(db); // Close database + + // Reopen and verify data persists + let db = MemDb::open(&db_path, false)?; + assert!(db.exists("/corosync.conf")?); + assert!(db.exists("/qemu-server/100.conf")?); + assert!(db.exists("/lxc/200.conf")?); + + let read_conf = db.read("/corosync.conf", 0, 1024)?; + assert_eq!(&read_conf[..], corosync_conf); + + // Test state export + + let all_entries = db.get_all_entries()?; + + // Verify entry structure + let root_entry = db.lookup_path("/").expect("Root should exist"); + assert_eq!(root_entry.inode, 0); // Root inode is 0 + assert!(root_entry.is_dir()); + + println!("\n=== Single-Node Test Complete ===\n"); + println!("\nTest Summary:"); + println!("\nDatabase Statistics:"); + println!(" • Total entries: {}", all_entries.len()); + println!(" • VMs/CTs tracked: {}", vmlist.len()); + println!(" • RRD entries: {}", num_entries); + println!(" • Cluster log entries: 1"); + println!( + " • Database size: {} bytes", + std::fs::metadata(&db_path)?.len() + ); + + Ok(()) +} + +/// Test simulated multi-operation workflow +#[tokio::test] +async fn test_realistic_workflow() -> Result<()> { + println!("\n=== Realistic Workflow Test ===\n"); + + let temp_dir = TempDir::new()?; + let db_path = temp_dir.path().join("pmxcfs.db"); + let db = MemDb::open(&db_path, true)?; + + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config); + + // Clear any VMs from previous tests + let existing_vms: Vec = status.get_vmlist().keys().copied().collect(); + for vmid in existing_vms { + status.delete_vm(vmid); + } + + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH)? + .as_secs() as u32; + + println!("Scenario: Creating a new VM"); + + // 1. Check if VMID is available + let vmid = 103; + assert!(!status.vm_exists(vmid)); + + // 2. Acquire lock for VM creation + let lock_path = format!("/priv/lock/qemu-server/{}.conf", vmid); + let csum = [1u8; 32]; + + // Create lock directories first + db.create("/priv", libc::S_IFDIR, 0, now).ok(); + db.create("/priv/lock", libc::S_IFDIR, 0, now).ok(); + db.create("/priv/lock/qemu-server", libc::S_IFDIR, 0, now).ok(); + + db.acquire_lock(&lock_path, &csum)?; + + // 3. Create VM configuration + let config_path = format!("/qemu-server/{}.conf", vmid); + db.create("/qemu-server", libc::S_IFDIR, 0, now).ok(); // May already exist + let vm_config = format!( + "name: test-vm-{}\ncores: 4\nmemory: 4096\nbootdisk: scsi0\n", + vmid + ); + db.create(&config_path, libc::S_IFREG, 0, now)?; + db.write(&config_path, 0, 0, now, vm_config.as_bytes(), false)?; + + // 4. Register VM in cluster + status.register_vm(vmid, VmType::Qemu, "localhost".to_string()); + + // 5. Release lock + db.release_lock(&lock_path, &csum)?; + + // 6. Verify VM is accessible + assert!(db.exists(&config_path)?); + assert!(status.vm_exists(vmid)); + + Ok(()) +} diff --git a/src/pmxcfs-rs/pmxcfs/tests/symlink_quorum_test.rs b/src/pmxcfs-rs/pmxcfs/tests/symlink_quorum_test.rs new file mode 100644 index 000000000..49ea886df --- /dev/null +++ b/src/pmxcfs-rs/pmxcfs/tests/symlink_quorum_test.rs @@ -0,0 +1,145 @@ +/// Test for quorum-aware symlink permissions +/// +/// This test verifies that symlink plugins correctly adjust their permissions +/// based on quorum status, matching the C implementation behavior in cfs-plug-link.c:68-72 +use pmxcfs_memdb::MemDb; +use pmxcfs_rs::{fuse, plugins}; +use std::fs; +use std::time::Duration; +use tempfile::TempDir; + +#[tokio::test] +#[ignore = "Requires FUSE mount permissions (run with sudo or configure /etc/fuse.conf)"] +async fn test_symlink_permissions_with_quorum() -> Result<(), Box> { + let test_dir = TempDir::new()?; + let db_path = test_dir.path().join("test.db"); + let mount_path = test_dir.path().join("mnt"); + + fs::create_dir_all(&mount_path)?; + + // Create MemDb and status (no RRD persistence needed for test) + let memdb = MemDb::open(&db_path, true)?; + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config.clone()); + + // Test with quorum enabled (should have 0o777 permissions) + status.set_quorate(true); + let plugins = plugins::init_plugins(config.clone(), status.clone()); + + // Spawn FUSE mount + let mount_path_clone = mount_path.clone(); + let memdb_clone = memdb.clone(); + let config_clone = config.clone(); + let plugins_clone = plugins.clone(); + let status_clone = status.clone(); + + let fuse_task = tokio::spawn(async move { + if let Err(e) = fuse::mount_fuse( + &mount_path_clone, + memdb_clone, + config_clone, + None, + plugins_clone, + status_clone, + ) + .await + { + eprintln!("FUSE mount error: {}", e); + } + }); + + // Give FUSE time to mount + tokio::time::sleep(Duration::from_millis(2000)).await; + + // Check if the symlink exists + let local_link = mount_path.join("local"); + if local_link.exists() { + let metadata = fs::symlink_metadata(&local_link)?; + let permissions = metadata.permissions(); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mode = permissions.mode(); + let link_perms = mode & 0o777; + println!(" Link 'local' permissions: {:04o}", link_perms); + // Note: On most systems, symlink permissions are always 0777 + // This test mainly ensures the code path works correctly + } + } else { + println!(" [WARN] Symlink 'local' not visible (may be a FUSE mounting issue)"); + } + + // Cleanup + fuse_task.abort(); + tokio::time::sleep(Duration::from_millis(100)).await; + + // Remount with quorum disabled + let mount_path2 = test_dir.path().join("mnt2"); + fs::create_dir_all(&mount_path2)?; + + status.set_quorate(false); + let plugins2 = plugins::init_plugins(config.clone(), status.clone()); + + let mount_path_clone2 = mount_path2.clone(); + let memdb_clone2 = memdb.clone(); + let fuse_task2 = tokio::spawn(async move { + let _ = fuse::mount_fuse( + &mount_path_clone2, + memdb_clone2, + config, + None, + plugins2, + status, + ) + .await; + }); + + tokio::time::sleep(Duration::from_millis(2000)).await; + + let local_link2 = mount_path2.join("local"); + if local_link2.exists() { + let metadata = fs::symlink_metadata(&local_link2)?; + let permissions = metadata.permissions(); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mode = permissions.mode(); + let link_perms = mode & 0o777; + println!(" Link 'local' permissions: {:04o}", link_perms); + } + } else { + println!(" [WARN] Symlink 'local' not visible (may be a FUSE mounting issue)"); + } + + // Cleanup + fuse_task2.abort(); + + println!(" Note: Actual permission enforcement depends on FUSE and kernel"); + + Ok(()) +} + +#[test] +fn test_link_plugin_has_quorum_aware_mode() { + // This is a unit test to verify the LinkPlugin mode is computed correctly + let _test_dir = TempDir::new().unwrap(); + + // Create status with quorum (no async needed, no RRD persistence) + let config = pmxcfs_test_utils::create_test_config(false); + let status = pmxcfs_status::init_with_config(config.clone()); + status.set_quorate(true); + let registry_quorate = plugins::init_plugins(config.clone(), status.clone()); + + // Check that symlinks are identified correctly + let local_plugin = registry_quorate + .get("local") + .expect("local symlink should exist"); + assert!(local_plugin.is_symlink(), "local should be a symlink"); + + // The mode itself is still 0o777, but the filesystem layer will use quorum status + assert_eq!( + local_plugin.mode(), + 0o777, + "Link plugin base mode should be 0o777" + ); +} -- 2.47.3