From: Daniel Kral <d.kral@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [RFC ha-manager 16/21] sim: hardware: add set-dynamic-stats for services
Date: Tue, 17 Feb 2026 15:14:23 +0100 [thread overview]
Message-ID: <20260217141437.584852-30-d.kral@proxmox.com> (raw)
In-Reply-To: <20260217141437.584852-1-d.kral@proxmox.com>
From: Dominik Rusovac <d.rusovac@proxmox.com>
This adds command set-dynamic-stats, to simulate the cpu load (cpu) and
memory usage (mem in MiB) of a service, as well as command
set-static-stats, to configure the number of cores (maxcpu) and RAM
(maxmem in MiB) of a service. In addition to using the designated
command, dynamic service stats can be specified beforehand in file
dynamic_service_stats.
Upon calling set-dynamic-stats on some service, the dynamic stats of the
node this very service is running on will be aggregated accordingly.
Signed-off-by: Dominik Rusovac <d.rusovac@proxmox.com>
Signed-off-by: Daniel Kral <d.kral@proxmox.com>
---
src/PVE/HA/Sim/Hardware.pm | 130 +++++++++++++++++++++++++++++++++++++
1 file changed, 130 insertions(+)
diff --git a/src/PVE/HA/Sim/Hardware.pm b/src/PVE/HA/Sim/Hardware.pm
index ec135e09..910f9718 100644
--- a/src/PVE/HA/Sim/Hardware.pm
+++ b/src/PVE/HA/Sim/Hardware.pm
@@ -21,8 +21,11 @@ use PVE::HA::Groups;
my $watchdog_timeout = 60;
+my $default_service_cpu = 2.0;
my $default_service_maxcpu = 4.0;
+my $default_service_mem = 2048 * 1024**2;
my $default_service_maxmem = 4096 * 1024**2;
+
my $default_node_maxcpu = 24.0;
my $default_node_maxmem = 131072 * 1024**2;
@@ -213,6 +216,25 @@ sub set_static_service_stats {
$self->write_static_service_stats($stats);
}
+sub set_dynamic_service_stats {
+ my ($self, $sid, $new_stats) = @_;
+
+ my $conf = $self->read_service_config();
+ die "no such service '$sid'" if !$conf->{$sid};
+
+ my $stats = $self->read_dynamic_service_stats();
+
+ if (my $memory = $new_stats->{mem}) {
+ $stats->{$sid}->{mem} = $memory;
+ }
+
+ if (my $cpu = $new_stats->{cpu}) {
+ $stats->{$sid}->{cpu} = $cpu;
+ }
+
+ $self->write_dynamic_service_stats($stats);
+}
+
sub add_service {
my ($self, $sid, $opts, $running) = @_;
@@ -438,6 +460,16 @@ sub read_static_service_stats {
return $stats;
}
+sub read_dynamic_service_stats {
+ my ($self) = @_;
+
+ my $filename = "$self->{statusdir}/dynamic_service_stats";
+ my $stats = eval { PVE::HA::Tools::read_json_from_file($filename) };
+ $self->log('error', "loading dynamic service stats failed - $@") if $@;
+
+ return $stats;
+}
+
sub write_static_service_stats {
my ($self, $stats) = @_;
@@ -446,6 +478,14 @@ sub write_static_service_stats {
$self->log('error', "writing static service stats failed - $@") if $@;
}
+sub write_dynamic_service_stats {
+ my ($self, $stats) = @_;
+
+ my $filename = "$self->{statusdir}/dynamic_service_stats";
+ eval { PVE::HA::Tools::write_json_to_file($filename, $stats) };
+ $self->log('error', "writing dynamic service stats failed - $@") if $@;
+}
+
sub new {
my ($this, $testdir) = @_;
@@ -536,6 +576,18 @@ sub new {
$self->write_static_service_stats($stats);
}
+ if (-f "$testdir/dynamic_service_stats") {
+ copy("$testdir/dynamic_service_stats", "$statusdir/dynamic_service_stats");
+ } else {
+ my $services = $self->read_static_service_stats();
+ my $stats = {
+ map { $_ => { cpu => $default_service_cpu, mem => $default_service_mem } }
+ keys %$services
+ };
+
+ $self->write_dynamic_service_stats($stats);
+ }
+
my $cstatus = $self->read_hardware_status_nolock();
foreach my $node (sort keys %$cstatus) {
@@ -744,6 +796,7 @@ sub get_cfs_state {
# service <sid> lock/unlock [lockname]
# service <sid> add <node> [<request-state=started>] [<running=0>]
# service <sid> set-static-stats <maxcpu|maxmem> <cpu cores|MiB>
+# service <sid> set-dynamic-stats <cpu|mem> <load in cpu cores|usage in MiB>
# service <sid> delete
sub sim_hardware_cmd {
my ($self, $cmdstr, $logid) = @_;
@@ -911,6 +964,24 @@ sub sim_hardware_cmd {
} else {
die "sim_hardware_cmd: unknown target stat '$target' for '$action' command";
}
+ } elsif ($action eq 'set-dynamic-stats') {
+ my ($target, $val) = ($params[0], $params[1]);
+
+ if (!$target) {
+ die "sim_hardware_cmd: missing target stat for '$action' command";
+ } elsif ($target eq "cpu") {
+ die "sim_hardware_cmd: missing value for '$action $target' command"
+ if !$val;
+
+ $self->set_dynamic_service_stats($sid, { $target => 0.0 + $val });
+ } elsif ($target eq "mem") {
+ die "sim_hardware_cmd: missing value for '$action $target' command"
+ if !$val;
+
+ $self->set_dynamic_service_stats($sid, { $target => $val * 1024**2 });
+ } else {
+ die "sim_hardware_cmd: unknown target stat '$target' for '$action' command";
+ }
} elsif ($action eq 'delete') {
$self->delete_service($sid);
@@ -1135,6 +1206,27 @@ sub get_static_service_stats {
return $stats;
}
+sub get_dynamic_service_stats {
+ my ($self) = @_;
+
+ my $stats = get_cluster_service_stats($self);
+ my $static_stats = $self->read_static_service_stats();
+ my $dynamic_stats = $self->read_dynamic_service_stats();
+
+ for my $sid (keys %$stats) {
+ $stats->{$sid}->{usage} = {
+ $static_stats->{$sid}->%*, $dynamic_stats->{$sid}->%*,
+ };
+
+ die "overcommitted cpu on '$sid'"
+ if $stats->{$sid}->{usage}->{cpu} > $stats->{$sid}->{usage}->{maxcpu};
+ die "overcommitted mem on '$sid'"
+ if $stats->{$sid}->{usage}->{mem} > $stats->{$sid}->{usage}->{maxmem};
+ }
+
+ return $stats;
+}
+
sub get_static_node_stats {
my ($self) = @_;
@@ -1148,6 +1240,44 @@ sub get_static_node_stats {
return $stats;
}
+sub get_dynamic_node_stats {
+ my ($self) = @_;
+
+ my $stats = $self->get_static_node_stats();
+ for my $node (keys %$stats) {
+ $stats->{$node}->{maxcpu} = $stats->{$node}->{maxcpu} // $default_node_maxcpu;
+ $stats->{$node}->{cpu} = $stats->{$node}->{cpu} // 0.0;
+ $stats->{$node}->{maxmem} = $stats->{$node}->{maxmem} // $default_node_maxmem;
+ $stats->{$node}->{mem} = $stats->{$node}->{mem} // 0;
+ }
+
+ my $service_conf = $self->read_service_config();
+ my $dynamic_service_stats = $self->get_dynamic_service_stats();
+
+ my $cstatus = $self->read_hardware_status_nolock();
+ my $node_service_status = { map { $_ => $self->read_service_status($_) } keys %$cstatus };
+
+ for my $sid (keys %$service_conf) {
+ my $node = $service_conf->{$sid}->{node};
+
+ if ($node_service_status->{$node}->{$sid}) {
+ my ($cpu, $mem) = $dynamic_service_stats->{$sid}->{usage}->@{qw(cpu mem)};
+
+ die "unknown cpu load for '$sid'" if !defined($cpu);
+ $stats->{$node}->{cpu} += $cpu;
+ die "overcommitted cpu on '$node'"
+ if $stats->{$node}->{cpu} > $stats->{$node}->{maxcpu};
+
+ die "unknown memory usage for '$sid'" if !defined($mem);
+ $stats->{$node}->{mem} += $mem;
+ die "overcommitted mem on '$node'"
+ if $stats->{$node}->{mem} > $stats->{$node}->{maxmem};
+ }
+ }
+
+ return $stats;
+}
+
sub get_node_version {
my ($self, $node) = @_;
--
2.47.3
next prev parent reply other threads:[~2026-02-17 14:16 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-17 14:13 [RFC PATCH-SERIES many 00/36] dynamic scheduler + load rebalancer Daniel Kral
2026-02-17 14:13 ` [RFC proxmox 1/5] resource-scheduling: move score_nodes_to_start_service to scheduler crate Daniel Kral
2026-02-17 14:13 ` [RFC proxmox 2/5] resource-scheduling: introduce generic cluster usage implementation Daniel Kral
2026-02-17 14:13 ` [RFC proxmox 3/5] resource-scheduling: add dynamic node and service stats Daniel Kral
2026-02-17 14:13 ` [RFC proxmox 4/5] resource-scheduling: implement rebalancing migration selection Daniel Kral
2026-02-17 14:13 ` [RFC proxmox 5/5] resource-scheduling: implement Add and Default for {Dynamic,Static}ServiceStats Daniel Kral
2026-02-17 14:14 ` [RFC perl-rs 1/6] pve-rs: resource scheduling: use generic cluster usage implementation Daniel Kral
2026-02-17 14:14 ` [RFC perl-rs 2/6] pve-rs: resource scheduling: create service_nodes hashset from array Daniel Kral
2026-02-17 14:14 ` [RFC perl-rs 3/6] pve-rs: resource scheduling: store service stats independently of node Daniel Kral
2026-02-17 14:14 ` [RFC perl-rs 4/6] pve-rs: resource scheduling: expose auto rebalancing methods Daniel Kral
2026-02-17 14:14 ` [RFC perl-rs 5/6] pve-rs: resource scheduling: move pve_static into resource_scheduling module Daniel Kral
2026-02-17 14:14 ` [RFC perl-rs 6/6] pve-rs: resource scheduling: implement pve_dynamic bindings Daniel Kral
2026-02-17 14:14 ` [RFC cluster 1/2] datacenter config: add dynamic load scheduler option Daniel Kral
2026-02-18 11:06 ` Maximiliano Sandoval
2026-02-17 14:14 ` [RFC cluster 2/2] datacenter config: add auto rebalancing options Daniel Kral
2026-02-18 11:15 ` Maximiliano Sandoval
2026-02-17 14:14 ` [RFC ha-manager 01/21] rename static node stats to be consistent with similar interfaces Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 02/21] resources: remove redundant load_config fallback for static config Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 03/21] remove redundant service_node and migration_target parameter Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 04/21] factor out common pve to ha resource type mapping Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 05/21] derive static service stats while filling the service stats repository Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 06/21] test: make static service usage explicit for all resources Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 07/21] make static service stats indexable by sid Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 08/21] move static service stats repository to PVE::HA::Usage::Static Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 09/21] usage: augment service stats with node and state information Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 10/21] include running non-HA resources in the scheduler's accounting Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 11/21] env, resources: add dynamic node and service stats abstraction Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 12/21] env: pve2: implement dynamic node and service stats Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 13/21] sim: hardware: pass correct types for static stats Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 14/21] sim: hardware: factor out static stats' default values Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 15/21] sim: hardware: rewrite set-static-stats Daniel Kral
2026-02-17 14:14 ` Daniel Kral [this message]
2026-02-17 14:14 ` [RFC ha-manager 17/21] usage: add dynamic usage scheduler Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 18/21] manager: rename execute_migration to queue_resource_motion Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 19/21] manager: update_crs_scheduler_mode: factor out crs config Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 20/21] implement automatic rebalancing Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 21/21] test: add basic automatic rebalancing system test cases Daniel Kral
2026-02-17 14:14 ` [RFC manager 1/2] ui: dc/options: add dynamic load scheduler option Daniel Kral
2026-02-18 11:10 ` Maximiliano Sandoval
2026-02-17 14:14 ` [RFC manager 2/2] ui: dc/options: add auto rebalancing options Daniel Kral
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260217141437.584852-30-d.kral@proxmox.com \
--to=d.kral@proxmox.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox