From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id B5B981FF13B for ; Wed, 11 Feb 2026 17:29:44 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id E544733382; Wed, 11 Feb 2026 17:30:29 +0100 (CET) Message-ID: <66635c76-44b6-4767-8a29-29da5522071b@proxmox.com> Date: Wed, 11 Feb 2026 17:29:53 +0100 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird Subject: Re: [PATCH v1 09/11] metrics: collect s3 datastore statistics as rrd metrics To: pbs-devel@lists.proxmox.com References: <20260209091533.156902-1-c.ebner@proxmox.com> <20260209091533.156902-16-c.ebner@proxmox.com> Content-Language: en-US, de-DE From: Christian Ebner In-Reply-To: <20260209091533.156902-16-c.ebner@proxmox.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1770827307788 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.954 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_MAILER 2 Automated Mailer Tag Left in Email RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: WPJY5U4RWWWWNDKHSFJZHPIFCVAVNCHK X-Message-ID-Hash: WPJY5U4RWWWWNDKHSFJZHPIFCVAVNCHK X-MailFrom: c.ebner@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Backup Server development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: On 2/9/26 10:14 AM, Christian Ebner wrote: > For datastores with s3 backend, load the shared s3 request counters > via the mmapped file and include them as rrd metrics. Combine the > pre-existing DiskStat with an optional S3Statistics into a common > DatastoreStats struct as dedicated type for the internal method > interfaces. > > Request counters are collected by method, total upload and download > traffic as gauge values as well as derived values to get averaged > rate statistics. > > Signed-off-by: Christian Ebner > --- > src/server/metric_collection/metric_server.rs | 8 +-- > src/server/metric_collection/mod.rs | 65 +++++++++++++++++-- > src/server/metric_collection/pull_metrics.rs | 18 +++-- > src/server/metric_collection/rrd.rs | 34 ++++++++-- > 4 files changed, 108 insertions(+), 17 deletions(-) > > diff --git a/src/server/metric_collection/metric_server.rs b/src/server/metric_collection/metric_server.rs > index ba20628a0..4584fc14c 100644 > --- a/src/server/metric_collection/metric_server.rs > +++ b/src/server/metric_collection/metric_server.rs > @@ -5,10 +5,10 @@ use serde_json::{json, Value}; > > use proxmox_metrics::MetricsData; > > -use super::{DiskStat, HostStats}; > +use super::{DatastoreStats, DiskStat, HostStats}; > > pub async fn send_data_to_metric_servers( > - stats: Arc<(HostStats, DiskStat, Vec)>, > + stats: Arc<(HostStats, DiskStat, Vec)>, > ) -> Result<(), Error> { > let (config, _digest) = pbs_config::metrics::config()?; > let channel_list = get_metric_server_connections(config)?; > @@ -66,10 +66,10 @@ pub async fn send_data_to_metric_servers( > > for datastore in stats.2.iter() { > values.push(Arc::new( > - MetricsData::new("blockstat", ctime, datastore.to_value())? > + MetricsData::new("blockstat", ctime, datastore.disk.to_value())? > .tag("object", "host") > .tag("host", nodename) > - .tag("datastore", datastore.name.clone()), > + .tag("datastore", datastore.disk.name.clone()), > )); > } > > diff --git a/src/server/metric_collection/mod.rs b/src/server/metric_collection/mod.rs > index 9b62cbb42..db5830ad1 100644 > --- a/src/server/metric_collection/mod.rs > +++ b/src/server/metric_collection/mod.rs > @@ -1,3 +1,4 @@ > +use std::sync::atomic::Ordering; > use std::{ > collections::HashMap, > path::Path, > @@ -6,11 +7,17 @@ use std::{ > time::{Duration, Instant}, > }; > > -use anyhow::Error; > +use anyhow::{format_err, Error}; > +use hyper::Method; > use tokio::join; > > -use pbs_api_types::{DataStoreConfig, Operation}; > +use pbs_api_types::{ > + DataStoreConfig, DatastoreBackendConfig, DatastoreBackendType, Operation, S3Statistics, > +}; > +use proxmox_lang::try_block; > use proxmox_network_api::{get_network_interfaces, IpLink}; > +use proxmox_s3_client::SharedRequestCounters; > +use proxmox_schema::ApiType; > use proxmox_sys::{ > fs::FileSystemInformation, > linux::procfs::{Loadavg, ProcFsMemInfo, ProcFsNetDev, ProcFsStat}, > @@ -113,6 +120,11 @@ struct DiskStat { > dev: Option, > } > > +struct DatastoreStats { > + disk: DiskStat, > + s3_stats: Option, > +} > + > #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, serde::Serialize, serde::Deserialize)] > enum NetdevType { > Physical, > @@ -219,7 +231,7 @@ fn collect_host_stats_sync() -> HostStats { > } > } > > -fn collect_disk_stats_sync() -> (DiskStat, Vec) { > +fn collect_disk_stats_sync() -> (DiskStat, Vec) { > let disk_manager = DiskManage::new(); > > let root = gather_disk_stats(disk_manager.clone(), Path::new("/"), "host"); > @@ -243,11 +255,54 @@ fn collect_disk_stats_sync() -> (DiskStat, Vec) { > continue; > } > > - datastores.push(gather_disk_stats( > + let s3_stats: Option = try_block!({ > + let backend_config: DatastoreBackendConfig = serde_json::from_value( > + DatastoreBackendConfig::API_SCHEMA > + .parse_property_string(config.backend.as_deref().unwrap_or(""))?, > + )?; > + if backend_config.ty.unwrap_or_default() == DatastoreBackendType::S3 { > + let path = format!( > + "{}/{}-{}-{}.shmem", > + pbs_datastore::S3_CLIENT_REQUEST_COUNTER_BASE_PATH, > + backend_config > + .client > + .as_ref() > + .ok_or(format_err!("missing s3 endpoint id"))?, > + backend_config > + .bucket > + .as_ref() > + .ok_or(format_err!("missing s3 bucket name"))?, > + config.name, > + ); > + let user = pbs_config::backup_user()?; > + let counters = > + SharedRequestCounters::open_shared_memory_mapped(path, user)?; This should not happen each time the metrics are collected, but rather once per datastore on demand, keeping the mmaped counters open. Will incorporate this in a version 2 of the patch series as well. > + let s3_statistics = S3Statistics { > + get: counters.load(Method::GET, Ordering::SeqCst), > + put: counters.load(Method::PUT, Ordering::SeqCst), > + post: counters.load(Method::POST, Ordering::SeqCst), > + delete: counters.load(Method::DELETE, Ordering::SeqCst), > + head: counters.load(Method::HEAD, Ordering::SeqCst), > + uploaded: counters.get_upload_traffic(Ordering::SeqCst), > + downloaded: counters.get_download_traffic(Ordering::SeqCst), > + }; > + Ok(Some(s3_statistics)) > + } else { > + Ok(None) > + } > + }) > + .unwrap_or_else(|err: Error| { > + eprintln!("reading datastore s3 counters failed - {err}"); > + None > + }); > + > + let disk = gather_disk_stats( > disk_manager.clone(), > Path::new(&config.absolute_path()), > &config.name, > - )); > + ); > + > + datastores.push(DatastoreStats { disk, s3_stats }); > } > } > Err(err) => { > diff --git a/src/server/metric_collection/pull_metrics.rs b/src/server/metric_collection/pull_metrics.rs > index e99662faf..4dcd336a5 100644 > --- a/src/server/metric_collection/pull_metrics.rs > +++ b/src/server/metric_collection/pull_metrics.rs > @@ -6,13 +6,14 @@ use nix::sys::stat::Mode; > use pbs_api_types::{ > MetricDataPoint, > MetricDataType::{self, Derive, Gauge}, > + S3Statistics, > }; > use pbs_buildcfg::PROXMOX_BACKUP_RUN_DIR; > use proxmox_shared_cache::SharedCache; > use proxmox_sys::fs::CreateOptions; > use serde::{Deserialize, Serialize}; > > -use super::{DiskStat, HostStats, NetdevType, METRIC_COLLECTION_INTERVAL}; > +use super::{DatastoreStats, DiskStat, HostStats, NetdevType, METRIC_COLLECTION_INTERVAL}; > > const METRIC_CACHE_TIME: Duration = Duration::from_secs(30 * 60); > const STORED_METRIC_GENERATIONS: u64 = > @@ -89,7 +90,7 @@ pub fn get_all_metrics(start_time: i64) -> Result, Error> { > pub(super) fn update_metrics( > host: &HostStats, > hostdisk: &DiskStat, > - datastores: &[DiskStat], > + datastores: &[DatastoreStats], > ) -> Result<(), Error> { > let mut points = MetricDataPoints::new(proxmox_time::epoch_i64()); > > @@ -129,8 +130,11 @@ pub(super) fn update_metrics( > update_disk_metrics(&mut points, hostdisk, "host"); > > for stat in datastores { > - let id = format!("datastore/{}", stat.name); > - update_disk_metrics(&mut points, stat, &id); > + let id = format!("datastore/{}", stat.disk.name); > + update_disk_metrics(&mut points, &stat.disk, &id); > + if let Some(stat) = &stat.s3_stats { > + update_s3_metrics(&mut points, stat, &id); > + } > } > > get_cache()?.set(&points, Duration::from_secs(2))?; > @@ -158,6 +162,12 @@ fn update_disk_metrics(points: &mut MetricDataPoints, disk: &DiskStat, id: &str) > } > } > > +fn update_s3_metrics(points: &mut MetricDataPoints, stat: &S3Statistics, id: &str) { > + let id = format!("{id}/s3"); > + points.add(Gauge, &id, "uploaded", stat.uploaded as f64); > + points.add(Gauge, &id, "downloaded", stat.downloaded as f64); > +} > + > #[derive(Serialize, Deserialize)] > struct MetricDataPoints { > timestamp: i64, > diff --git a/src/server/metric_collection/rrd.rs b/src/server/metric_collection/rrd.rs > index 7b13b51ff..a0ea1a566 100644 > --- a/src/server/metric_collection/rrd.rs > +++ b/src/server/metric_collection/rrd.rs > @@ -13,10 +13,11 @@ use proxmox_rrd::rrd::{AggregationFn, Archive, DataSourceType, Database}; > use proxmox_rrd::Cache; > use proxmox_sys::fs::CreateOptions; > > +use pbs_api_types::S3Statistics; > use pbs_buildcfg::PROXMOX_BACKUP_STATE_DIR_M; > use proxmox_rrd_api_types::{RrdMode, RrdTimeframe}; > > -use super::{DiskStat, HostStats, NetdevType}; > +use super::{DatastoreStats, DiskStat, HostStats, NetdevType}; > > const RRD_CACHE_BASEDIR: &str = concat!(PROXMOX_BACKUP_STATE_DIR_M!(), "/rrdb"); > > @@ -148,7 +149,7 @@ fn update_derive(name: &str, value: f64) { > } > } > > -pub(super) fn update_metrics(host: &HostStats, hostdisk: &DiskStat, datastores: &[DiskStat]) { > +pub(super) fn update_metrics(host: &HostStats, hostdisk: &DiskStat, datastores: &[DatastoreStats]) { > if let Some(stat) = &host.proc { > update_gauge("host/cpu", stat.cpu); > update_gauge("host/iowait", stat.iowait_percent); > @@ -182,8 +183,11 @@ pub(super) fn update_metrics(host: &HostStats, hostdisk: &DiskStat, datastores: > update_disk_metrics(hostdisk, "host"); > > for stat in datastores { > - let rrd_prefix = format!("datastore/{}", stat.name); > - update_disk_metrics(stat, &rrd_prefix); > + let rrd_prefix = format!("datastore/{}", stat.disk.name); > + update_disk_metrics(&stat.disk, &rrd_prefix); > + if let Some(stats) = &stat.s3_stats { > + update_s3_metrics(stats, &rrd_prefix); > + } > } > } > > @@ -212,3 +216,25 @@ fn update_disk_metrics(disk: &DiskStat, rrd_prefix: &str) { > update_derive(&rrd_key, (stat.io_ticks as f64) / 1000.0); > } > } > + > +fn update_s3_metrics(stats: &S3Statistics, rrd_prefix: &str) { > + let rrd_key = format!("{rrd_prefix}/s3/total/uploaded"); > + update_gauge(&rrd_key, stats.uploaded as f64); > + let rrd_key = format!("{rrd_prefix}/s3/total/downloaded"); > + update_gauge(&rrd_key, stats.downloaded as f64); > + let rrd_key = format!("{rrd_prefix}/s3/uploaded"); > + update_derive(&rrd_key, stats.uploaded as f64); > + let rrd_key = format!("{rrd_prefix}/s3/downloaded"); > + update_derive(&rrd_key, stats.downloaded as f64); > + > + let rrd_key = format!("{rrd_prefix}/s3/total/get"); > + update_gauge(&rrd_key, stats.get as f64); > + let rrd_key = format!("{rrd_prefix}/s3/total/put"); > + update_gauge(&rrd_key, stats.put as f64); > + let rrd_key = format!("{rrd_prefix}/s3/total/post"); > + update_gauge(&rrd_key, stats.post as f64); > + let rrd_key = format!("{rrd_prefix}/s3/total/head"); > + update_gauge(&rrd_key, stats.head as f64); > + let rrd_key = format!("{rrd_prefix}/s3/total/delete"); > + update_gauge(&rrd_key, stats.delete as f64); > +}