From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id BB0E71FF17B for ; Tue, 15 Oct 2024 10:46:24 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 977151172D; Tue, 15 Oct 2024 10:46:52 +0200 (CEST) From: Lukas Wagner To: pbs-devel@lists.proxmox.com Date: Tue, 15 Oct 2024 10:46:36 +0200 Message-Id: <20241015084636.57106-14-l.wagner@proxmox.com> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20241015084636.57106-1-l.wagner@proxmox.com> References: <20241015084636.57106-1-l.wagner@proxmox.com> MIME-Version: 1.0 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.141 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment POISEN_SPAM_PILL 0.1 Meta: its spam POISEN_SPAM_PILL_1 0.1 random spam to be learned in bayes POISEN_SPAM_PILL_3 0.1 random spam to be learned in bayes SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [PATCH proxmox-backup v2 13/13] api: add /status/metrics API X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" This one is modelled exactly as the one in PVE (there it is available under /cluster/metrics/export). The returned data format is quite simple, being an array of metric records, including a value, a metric name, an id to identify the object (e.g. datastore/foo, host), a timestamp and a type ('gauge', 'derive', ...). The latter property makes the format self-describing and aids the metric collector in choosing a representation for storing the metric data. [ ... { "metric": "cpu_avg1", "value": 0.12, "timestamp": 170053205, "id": "host", "type": "gauge" }, ... ] In terms of permissions, the new endpoint requires Sys.Audit on /system/status for metrics of the 'host' object, and Datastore.Audit on /datastore/{store} for 'datastore/{store}' metric objects. Via the 'history' and 'start-time' parameters one can query the last 30mins of metric history. If these parameters are not provided, only the most recent metric generation is returned. Signed-off-by: Lukas Wagner --- Notes: Changes since v2: - move api2/status.rs to api2/status/mod.rs - move api2/metrics.rs to api2/status/metrics.rs src/api2/status/metrics.rs | 73 ++++++++++++++++++++ src/api2/{status.rs => status/mod.rs} | 15 ++-- src/server/metric_collection/mod.rs | 4 +- src/server/metric_collection/pull_metrics.rs | 45 ++++++++++++ 4 files changed, 131 insertions(+), 6 deletions(-) create mode 100644 src/api2/status/metrics.rs rename src/api2/{status.rs => status/mod.rs} (95%) diff --git a/src/api2/status/metrics.rs b/src/api2/status/metrics.rs new file mode 100644 index 00000000..a5583aac --- /dev/null +++ b/src/api2/status/metrics.rs @@ -0,0 +1,73 @@ +use anyhow::Error; +use pbs_api_types::{Authid, MetricDataPoint, Metrics, PRIV_DATASTORE_AUDIT, PRIV_SYS_AUDIT}; +use pbs_config::CachedUserInfo; +use proxmox_router::{Permission, Router, RpcEnvironment}; +use proxmox_schema::api; + +use crate::server::metric_collection::pull_metrics; + +pub const ROUTER: Router = Router::new().get(&API_METHOD_GET_METRICS); + +#[api( + input: { + properties: { + "start-time": { + optional: true, + default: 0, + description: "Only return values with a timestamp > start-time. Only has an effect if 'history' is also set", + }, + "history": { + optional: true, + default: false, + description: "Include historic values (last 30 minutes)", + } + }, + }, + access: { + description: "Users need Sys.Audit on /system/status for host metrics and Datastore.Audit on /datastore/{store} for datastore metrics", + permission: &Permission::Anybody, + }, +)] +/// Return backup server metrics. +pub fn get_metrics( + start_time: i64, + history: bool, + rpcenv: &mut dyn RpcEnvironment, +) -> Result { + let metrics = if history { + pull_metrics::get_all_metrics(start_time)? + } else { + pull_metrics::get_most_recent_metrics()? + }; + + let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?; + let user_info = CachedUserInfo::new()?; + + let filter_by_privs = |point: &MetricDataPoint| { + let elements: Vec<&str> = point.id.as_str().split('/').collect(); + + match elements.as_slice() { + ["host"] => { + let user_privs = + CachedUserInfo::lookup_privs(&user_info, &auth_id, &["system", "status"]); + (user_privs & PRIV_SYS_AUDIT) != 0 + } + ["datastore", datastore_id] => { + let user_privs = CachedUserInfo::lookup_privs( + &user_info, + &auth_id, + &["datastore", datastore_id], + ); + (user_privs & PRIV_DATASTORE_AUDIT) != 0 + } + _ => { + log::error!("invalid metric object id: {}", point.id); + false + } + } + }; + + Ok(Metrics { + data: metrics.into_iter().filter(filter_by_privs).collect(), + }) +} diff --git a/src/api2/status.rs b/src/api2/status/mod.rs similarity index 95% rename from src/api2/status.rs rename to src/api2/status/mod.rs index e46fc1ae..113aa985 100644 --- a/src/api2/status.rs +++ b/src/api2/status/mod.rs @@ -7,6 +7,7 @@ use proxmox_router::list_subdirs_api_method; use proxmox_router::{ApiMethod, Permission, Router, RpcEnvironment, SubdirMap}; use proxmox_rrd_api_types::{RrdMode, RrdTimeframe}; use proxmox_schema::api; +use proxmox_sortable_macro::sortable; use pbs_api_types::{ Authid, DataStoreStatusListItem, Operation, PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP, @@ -20,6 +21,8 @@ use crate::tools::statistics::linear_regression; use crate::backup::can_access_any_namespace; +pub mod metrics; + #[api( returns: { description: "Lists the Status of the Datastores.", @@ -137,10 +140,14 @@ pub async fn datastore_status( Ok(list) } -const SUBDIRS: SubdirMap = &[( - "datastore-usage", - &Router::new().get(&API_METHOD_DATASTORE_STATUS), -)]; +#[sortable] +const SUBDIRS: SubdirMap = &sorted!([ + ( + "datastore-usage", + &Router::new().get(&API_METHOD_DATASTORE_STATUS), + ), + ("metrics", &metrics::ROUTER), +]); pub const ROUTER: Router = Router::new() .get(&list_subdirs_api_method!(SUBDIRS)) diff --git a/src/server/metric_collection/mod.rs b/src/server/metric_collection/mod.rs index e6e04c5b..3cbd7425 100644 --- a/src/server/metric_collection/mod.rs +++ b/src/server/metric_collection/mod.rs @@ -17,8 +17,8 @@ use proxmox_sys::{ use crate::tools::disks::{zfs_dataset_stats, BlockDevStat, DiskManage}; mod metric_server; -mod pull_metrics; -pub mod rrd; +pub(crate) mod pull_metrics; +pub(crate) mod rrd; const METRIC_COLLECTION_INTERVAL: Duration = Duration::from_secs(10); diff --git a/src/server/metric_collection/pull_metrics.rs b/src/server/metric_collection/pull_metrics.rs index f4b506cf..1b5f3777 100644 --- a/src/server/metric_collection/pull_metrics.rs +++ b/src/server/metric_collection/pull_metrics.rs @@ -39,6 +39,51 @@ pub(super) fn init() -> Result<(), Error> { Ok(()) } +/// Return most recent metrics +/// +/// If the metric collection loop has no produced any metrics yet, an empty +/// `Vec` is returned. Returns an error if the cache could not be accessed. +pub fn get_most_recent_metrics() -> Result, Error> { + let cached_datapoints: Option = get_cache()?.get()?; + let mut points = cached_datapoints.map(|r| r.datapoints).unwrap_or_default(); + + points.sort_unstable_by_key(|p| p.timestamp); + + Ok(points) +} + +/// Return all cached metrics with a `timestamp > start_time` +/// +/// If the metric collection loop has no produced any metrics yet, an empty +/// `Vec` is returned. Returns an error if the cache could not be accessed. +pub fn get_all_metrics(start_time: i64) -> Result, Error> { + let now = proxmox_time::epoch_i64(); + + let delta = now - start_time; + + if delta < 0 { + // start-time in the future, no metrics for you + return Ok(Vec::new()); + } + + let generations = delta / (METRIC_COLLECTION_INTERVAL.as_secs() as i64); + let generations = generations.clamp(0, STORED_METRIC_GENERATIONS as i64); + + let cached_datapoints: Vec = get_cache()?.get_last(generations as u32)?; + + let mut points = Vec::new(); + + for gen in cached_datapoints { + if gen.timestamp > start_time { + points.extend(gen.datapoints); + } + } + + points.sort_unstable_by_key(|p| p.timestamp); + + Ok(points) +} + /// Convert `DiskStat` `HostStat` into a universal metric data point and cache /// them for a later retrieval. pub(super) fn update_metrics( -- 2.39.5 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel