From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 862431FF142 for ; Mon, 16 Feb 2026 13:14:18 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 4179F10FA8; Mon, 16 Feb 2026 13:15:04 +0100 (CET) From: Christian Ebner To: pbs-devel@lists.proxmox.com Subject: [PATCH proxmox-backup v2 10/12] metrics: collect s3 datastore statistics as rrd metrics Date: Mon, 16 Feb 2026 13:14:04 +0100 Message-ID: <20260216121406.99617-17-c.ebner@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260216121406.99617-1-c.ebner@proxmox.com> References: <20260216121406.99617-1-c.ebner@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1771244055220 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.952 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_MAILER 2 Automated Mailer Tag Left in Email SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: BLVH6B7VMRJC4N5JRENVH2BAQWKMTEB6 X-Message-ID-Hash: BLVH6B7VMRJC4N5JRENVH2BAQWKMTEB6 X-MailFrom: c.ebner@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Backup Server development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: For datastores with s3 backend, load the shared s3 request counters via the mmapped file and include them as rrd metrics. Combine the pre-existing DiskStat with an optional S3Statistics into a common DatastoreStats struct as dedicated type for the internal method interfaces. Request counters are collected by method, total upload and download traffic as gauge values as well as derived values to get averaged rate statistics. Signed-off-by: Christian Ebner --- changes since version 1: - avoid re-opening and mmapping counters for datastores on each rrd data collection, cache the open mmapped counters instead. src/server/metric_collection/metric_server.rs | 8 +- src/server/metric_collection/mod.rs | 90 +++++++++++++++++-- src/server/metric_collection/pull_metrics.rs | 18 +++- src/server/metric_collection/rrd.rs | 34 ++++++- 4 files changed, 132 insertions(+), 18 deletions(-) diff --git a/src/server/metric_collection/metric_server.rs b/src/server/metric_collection/metric_server.rs index ba20628a0..4584fc14c 100644 --- a/src/server/metric_collection/metric_server.rs +++ b/src/server/metric_collection/metric_server.rs @@ -5,10 +5,10 @@ use serde_json::{json, Value}; use proxmox_metrics::MetricsData; -use super::{DiskStat, HostStats}; +use super::{DatastoreStats, DiskStat, HostStats}; pub async fn send_data_to_metric_servers( - stats: Arc<(HostStats, DiskStat, Vec)>, + stats: Arc<(HostStats, DiskStat, Vec)>, ) -> Result<(), Error> { let (config, _digest) = pbs_config::metrics::config()?; let channel_list = get_metric_server_connections(config)?; @@ -66,10 +66,10 @@ pub async fn send_data_to_metric_servers( for datastore in stats.2.iter() { values.push(Arc::new( - MetricsData::new("blockstat", ctime, datastore.to_value())? + MetricsData::new("blockstat", ctime, datastore.disk.to_value())? .tag("object", "host") .tag("host", nodename) - .tag("datastore", datastore.name.clone()), + .tag("datastore", datastore.disk.name.clone()), )); } diff --git a/src/server/metric_collection/mod.rs b/src/server/metric_collection/mod.rs index 51e5a501a..9ff9feec0 100644 --- a/src/server/metric_collection/mod.rs +++ b/src/server/metric_collection/mod.rs @@ -1,18 +1,27 @@ +use std::collections::hash_map::Entry; use std::collections::HashMap; use std::path::Path; use std::pin::pin; -use std::sync::{Arc, OnceLock}; +use std::sync::atomic::Ordering; +use std::sync::{Arc, LazyLock, Mutex, OnceLock}; use std::time::{Duration, Instant}; -use anyhow::Error; +use anyhow::{format_err, Error}; +use hyper::Method; use tokio::join; -use pbs_api_types::{DataStoreConfig, Operation}; +use pbs_api_types::{ + DataStoreConfig, DatastoreBackendConfig, DatastoreBackendType, Operation, S3Statistics, +}; +use proxmox_lang::try_block; use proxmox_network_api::{get_network_interfaces, IpLink}; +use proxmox_s3_client::SharedRequestCounters; +use proxmox_schema::ApiType; use proxmox_sys::fs::FileSystemInformation; use proxmox_sys::linux::procfs::{Loadavg, ProcFsMemInfo, ProcFsNetDev, ProcFsStat}; use crate::tools::disks::{zfs_dataset_stats, BlockDevStat, DiskManage}; +use pbs_datastore::S3_CLIENT_REQUEST_COUNTER_BASE_PATH; mod metric_server; pub(crate) mod pull_metrics; @@ -109,6 +118,11 @@ struct DiskStat { dev: Option, } +struct DatastoreStats { + disk: DiskStat, + s3_stats: Option, +} + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, serde::Serialize, serde::Deserialize)] enum NetdevType { Physical, @@ -215,7 +229,52 @@ fn collect_host_stats_sync() -> HostStats { } } -fn collect_disk_stats_sync() -> (DiskStat, Vec) { +static S3_REQUEST_COUNTERS_MAP: LazyLock>> = + LazyLock::new(|| Mutex::new(HashMap::new())); + +fn collect_s3_stats( + store: &str, + backend_config: &DatastoreBackendConfig, +) -> Result, Error> { + let endpoint_id = backend_config + .client + .as_ref() + .ok_or(format_err!("missing s3 endpoint id"))?; + let bucket = backend_config + .bucket + .as_ref() + .ok_or(format_err!("missing s3 bucket name"))?; + let path = + format!("{S3_CLIENT_REQUEST_COUNTER_BASE_PATH}/{endpoint_id}-{bucket}-{store}.shmem"); + + let mut counters = S3_REQUEST_COUNTERS_MAP.lock().unwrap(); + let s3_stats = match counters.entry(path.clone()) { + Entry::Occupied(o) => load_s3_statistics(o.get()), + Entry::Vacant(v) => { + let user = pbs_config::backup_user()?; + let counters = SharedRequestCounters::open_shared_memory_mapped(path, user)?; + let s3_stats = load_s3_statistics(&counters); + v.insert(counters); + s3_stats + } + }; + + Ok(Some(s3_stats)) +} + +fn load_s3_statistics(counters: &SharedRequestCounters) -> S3Statistics { + S3Statistics { + get: counters.load(Method::GET, Ordering::Acquire), + put: counters.load(Method::PUT, Ordering::Acquire), + post: counters.load(Method::POST, Ordering::Acquire), + delete: counters.load(Method::DELETE, Ordering::Acquire), + head: counters.load(Method::HEAD, Ordering::Acquire), + uploaded: counters.get_upload_traffic(Ordering::Acquire), + downloaded: counters.get_download_traffic(Ordering::Acquire), + } +} + +fn collect_disk_stats_sync() -> (DiskStat, Vec) { let disk_manager = DiskManage::new(); let root = gather_disk_stats(disk_manager.clone(), Path::new("/"), "host"); @@ -239,11 +298,30 @@ fn collect_disk_stats_sync() -> (DiskStat, Vec) { continue; } - datastores.push(gather_disk_stats( + let s3_stats: Option = try_block!({ + let backend_config: DatastoreBackendConfig = serde_json::from_value( + DatastoreBackendConfig::API_SCHEMA + .parse_property_string(config.backend.as_deref().unwrap_or(""))?, + )?; + + if backend_config.ty.unwrap_or_default() == DatastoreBackendType::S3 { + collect_s3_stats(&config.name, &backend_config) + } else { + Ok(None) + } + }) + .unwrap_or_else(|err: Error| { + eprintln!("parsing datastore backend config failed - {err}"); + None + }); + + let disk = gather_disk_stats( disk_manager.clone(), Path::new(&config.absolute_path()), &config.name, - )); + ); + + datastores.push(DatastoreStats { disk, s3_stats }); } } Err(err) => { diff --git a/src/server/metric_collection/pull_metrics.rs b/src/server/metric_collection/pull_metrics.rs index e99662faf..4dcd336a5 100644 --- a/src/server/metric_collection/pull_metrics.rs +++ b/src/server/metric_collection/pull_metrics.rs @@ -6,13 +6,14 @@ use nix::sys::stat::Mode; use pbs_api_types::{ MetricDataPoint, MetricDataType::{self, Derive, Gauge}, + S3Statistics, }; use pbs_buildcfg::PROXMOX_BACKUP_RUN_DIR; use proxmox_shared_cache::SharedCache; use proxmox_sys::fs::CreateOptions; use serde::{Deserialize, Serialize}; -use super::{DiskStat, HostStats, NetdevType, METRIC_COLLECTION_INTERVAL}; +use super::{DatastoreStats, DiskStat, HostStats, NetdevType, METRIC_COLLECTION_INTERVAL}; const METRIC_CACHE_TIME: Duration = Duration::from_secs(30 * 60); const STORED_METRIC_GENERATIONS: u64 = @@ -89,7 +90,7 @@ pub fn get_all_metrics(start_time: i64) -> Result, Error> { pub(super) fn update_metrics( host: &HostStats, hostdisk: &DiskStat, - datastores: &[DiskStat], + datastores: &[DatastoreStats], ) -> Result<(), Error> { let mut points = MetricDataPoints::new(proxmox_time::epoch_i64()); @@ -129,8 +130,11 @@ pub(super) fn update_metrics( update_disk_metrics(&mut points, hostdisk, "host"); for stat in datastores { - let id = format!("datastore/{}", stat.name); - update_disk_metrics(&mut points, stat, &id); + let id = format!("datastore/{}", stat.disk.name); + update_disk_metrics(&mut points, &stat.disk, &id); + if let Some(stat) = &stat.s3_stats { + update_s3_metrics(&mut points, stat, &id); + } } get_cache()?.set(&points, Duration::from_secs(2))?; @@ -158,6 +162,12 @@ fn update_disk_metrics(points: &mut MetricDataPoints, disk: &DiskStat, id: &str) } } +fn update_s3_metrics(points: &mut MetricDataPoints, stat: &S3Statistics, id: &str) { + let id = format!("{id}/s3"); + points.add(Gauge, &id, "uploaded", stat.uploaded as f64); + points.add(Gauge, &id, "downloaded", stat.downloaded as f64); +} + #[derive(Serialize, Deserialize)] struct MetricDataPoints { timestamp: i64, diff --git a/src/server/metric_collection/rrd.rs b/src/server/metric_collection/rrd.rs index 7b13b51ff..a0ea1a566 100644 --- a/src/server/metric_collection/rrd.rs +++ b/src/server/metric_collection/rrd.rs @@ -13,10 +13,11 @@ use proxmox_rrd::rrd::{AggregationFn, Archive, DataSourceType, Database}; use proxmox_rrd::Cache; use proxmox_sys::fs::CreateOptions; +use pbs_api_types::S3Statistics; use pbs_buildcfg::PROXMOX_BACKUP_STATE_DIR_M; use proxmox_rrd_api_types::{RrdMode, RrdTimeframe}; -use super::{DiskStat, HostStats, NetdevType}; +use super::{DatastoreStats, DiskStat, HostStats, NetdevType}; const RRD_CACHE_BASEDIR: &str = concat!(PROXMOX_BACKUP_STATE_DIR_M!(), "/rrdb"); @@ -148,7 +149,7 @@ fn update_derive(name: &str, value: f64) { } } -pub(super) fn update_metrics(host: &HostStats, hostdisk: &DiskStat, datastores: &[DiskStat]) { +pub(super) fn update_metrics(host: &HostStats, hostdisk: &DiskStat, datastores: &[DatastoreStats]) { if let Some(stat) = &host.proc { update_gauge("host/cpu", stat.cpu); update_gauge("host/iowait", stat.iowait_percent); @@ -182,8 +183,11 @@ pub(super) fn update_metrics(host: &HostStats, hostdisk: &DiskStat, datastores: update_disk_metrics(hostdisk, "host"); for stat in datastores { - let rrd_prefix = format!("datastore/{}", stat.name); - update_disk_metrics(stat, &rrd_prefix); + let rrd_prefix = format!("datastore/{}", stat.disk.name); + update_disk_metrics(&stat.disk, &rrd_prefix); + if let Some(stats) = &stat.s3_stats { + update_s3_metrics(stats, &rrd_prefix); + } } } @@ -212,3 +216,25 @@ fn update_disk_metrics(disk: &DiskStat, rrd_prefix: &str) { update_derive(&rrd_key, (stat.io_ticks as f64) / 1000.0); } } + +fn update_s3_metrics(stats: &S3Statistics, rrd_prefix: &str) { + let rrd_key = format!("{rrd_prefix}/s3/total/uploaded"); + update_gauge(&rrd_key, stats.uploaded as f64); + let rrd_key = format!("{rrd_prefix}/s3/total/downloaded"); + update_gauge(&rrd_key, stats.downloaded as f64); + let rrd_key = format!("{rrd_prefix}/s3/uploaded"); + update_derive(&rrd_key, stats.uploaded as f64); + let rrd_key = format!("{rrd_prefix}/s3/downloaded"); + update_derive(&rrd_key, stats.downloaded as f64); + + let rrd_key = format!("{rrd_prefix}/s3/total/get"); + update_gauge(&rrd_key, stats.get as f64); + let rrd_key = format!("{rrd_prefix}/s3/total/put"); + update_gauge(&rrd_key, stats.put as f64); + let rrd_key = format!("{rrd_prefix}/s3/total/post"); + update_gauge(&rrd_key, stats.post as f64); + let rrd_key = format!("{rrd_prefix}/s3/total/head"); + update_gauge(&rrd_key, stats.head as f64); + let rrd_key = format!("{rrd_prefix}/s3/total/delete"); + update_gauge(&rrd_key, stats.delete as f64); +} -- 2.47.3