public inbox for pve-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Daniel Kral <d.kral@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [RFC ha-manager 16/21] sim: hardware: add set-dynamic-stats for services
Date: Tue, 17 Feb 2026 15:14:23 +0100	[thread overview]
Message-ID: <20260217141437.584852-30-d.kral@proxmox.com> (raw)
In-Reply-To: <20260217141437.584852-1-d.kral@proxmox.com>

From: Dominik Rusovac <d.rusovac@proxmox.com>

This adds command set-dynamic-stats, to simulate the cpu load (cpu) and
memory usage (mem in MiB) of a service, as well as command
set-static-stats, to configure the number of cores (maxcpu) and RAM
(maxmem in MiB) of a service. In addition to using the designated
command, dynamic service stats can be specified beforehand in file
dynamic_service_stats.

Upon calling set-dynamic-stats on some service, the dynamic stats of the
node this very service is running on will be aggregated accordingly.

Signed-off-by: Dominik Rusovac <d.rusovac@proxmox.com>
Signed-off-by: Daniel Kral <d.kral@proxmox.com>
---
 src/PVE/HA/Sim/Hardware.pm | 130 +++++++++++++++++++++++++++++++++++++
 1 file changed, 130 insertions(+)

diff --git a/src/PVE/HA/Sim/Hardware.pm b/src/PVE/HA/Sim/Hardware.pm
index ec135e09..910f9718 100644
--- a/src/PVE/HA/Sim/Hardware.pm
+++ b/src/PVE/HA/Sim/Hardware.pm
@@ -21,8 +21,11 @@ use PVE::HA::Groups;
 
 my $watchdog_timeout = 60;
 
+my $default_service_cpu = 2.0;
 my $default_service_maxcpu = 4.0;
+my $default_service_mem = 2048 * 1024**2;
 my $default_service_maxmem = 4096 * 1024**2;
+
 my $default_node_maxcpu = 24.0;
 my $default_node_maxmem = 131072 * 1024**2;
 
@@ -213,6 +216,25 @@ sub set_static_service_stats {
     $self->write_static_service_stats($stats);
 }
 
+sub set_dynamic_service_stats {
+    my ($self, $sid, $new_stats) = @_;
+
+    my $conf = $self->read_service_config();
+    die "no such service '$sid'" if !$conf->{$sid};
+
+    my $stats = $self->read_dynamic_service_stats();
+
+    if (my $memory = $new_stats->{mem}) {
+        $stats->{$sid}->{mem} = $memory;
+    }
+
+    if (my $cpu = $new_stats->{cpu}) {
+        $stats->{$sid}->{cpu} = $cpu;
+    }
+
+    $self->write_dynamic_service_stats($stats);
+}
+
 sub add_service {
     my ($self, $sid, $opts, $running) = @_;
 
@@ -438,6 +460,16 @@ sub read_static_service_stats {
     return $stats;
 }
 
+sub read_dynamic_service_stats {
+    my ($self) = @_;
+
+    my $filename = "$self->{statusdir}/dynamic_service_stats";
+    my $stats = eval { PVE::HA::Tools::read_json_from_file($filename) };
+    $self->log('error', "loading dynamic service stats failed - $@") if $@;
+
+    return $stats;
+}
+
 sub write_static_service_stats {
     my ($self, $stats) = @_;
 
@@ -446,6 +478,14 @@ sub write_static_service_stats {
     $self->log('error', "writing static service stats failed - $@") if $@;
 }
 
+sub write_dynamic_service_stats {
+    my ($self, $stats) = @_;
+
+    my $filename = "$self->{statusdir}/dynamic_service_stats";
+    eval { PVE::HA::Tools::write_json_to_file($filename, $stats) };
+    $self->log('error', "writing dynamic service stats failed - $@") if $@;
+}
+
 sub new {
     my ($this, $testdir) = @_;
 
@@ -536,6 +576,18 @@ sub new {
         $self->write_static_service_stats($stats);
     }
 
+    if (-f "$testdir/dynamic_service_stats") {
+        copy("$testdir/dynamic_service_stats", "$statusdir/dynamic_service_stats");
+    } else {
+        my $services = $self->read_static_service_stats();
+        my $stats = {
+            map { $_ => { cpu => $default_service_cpu, mem => $default_service_mem } }
+                keys %$services
+        };
+
+        $self->write_dynamic_service_stats($stats);
+    }
+
     my $cstatus = $self->read_hardware_status_nolock();
 
     foreach my $node (sort keys %$cstatus) {
@@ -744,6 +796,7 @@ sub get_cfs_state {
 #   service <sid> lock/unlock [lockname]
 #   service <sid> add <node> [<request-state=started>] [<running=0>]
 #   service <sid> set-static-stats  <maxcpu|maxmem> <cpu cores|MiB>
+#   service <sid> set-dynamic-stats <cpu|mem>       <load in cpu cores|usage in MiB>
 #   service <sid> delete
 sub sim_hardware_cmd {
     my ($self, $cmdstr, $logid) = @_;
@@ -911,6 +964,24 @@ sub sim_hardware_cmd {
                 } else {
                     die "sim_hardware_cmd: unknown target stat '$target' for '$action' command";
                 }
+            } elsif ($action eq 'set-dynamic-stats') {
+                my ($target, $val) = ($params[0], $params[1]);
+
+                if (!$target) {
+                    die "sim_hardware_cmd: missing target stat for '$action' command";
+                } elsif ($target eq "cpu") {
+                    die "sim_hardware_cmd: missing value for '$action $target' command"
+                        if !$val;
+
+                    $self->set_dynamic_service_stats($sid, { $target => 0.0 + $val });
+                } elsif ($target eq "mem") {
+                    die "sim_hardware_cmd: missing value for '$action $target' command"
+                        if !$val;
+
+                    $self->set_dynamic_service_stats($sid, { $target => $val * 1024**2 });
+                } else {
+                    die "sim_hardware_cmd: unknown target stat '$target' for '$action' command";
+                }
             } elsif ($action eq 'delete') {
 
                 $self->delete_service($sid);
@@ -1135,6 +1206,27 @@ sub get_static_service_stats {
     return $stats;
 }
 
+sub get_dynamic_service_stats {
+    my ($self) = @_;
+
+    my $stats = get_cluster_service_stats($self);
+    my $static_stats = $self->read_static_service_stats();
+    my $dynamic_stats = $self->read_dynamic_service_stats();
+
+    for my $sid (keys %$stats) {
+        $stats->{$sid}->{usage} = {
+            $static_stats->{$sid}->%*, $dynamic_stats->{$sid}->%*,
+        };
+
+        die "overcommitted cpu on '$sid'"
+            if $stats->{$sid}->{usage}->{cpu} > $stats->{$sid}->{usage}->{maxcpu};
+        die "overcommitted mem on '$sid'"
+            if $stats->{$sid}->{usage}->{mem} > $stats->{$sid}->{usage}->{maxmem};
+    }
+
+    return $stats;
+}
+
 sub get_static_node_stats {
     my ($self) = @_;
 
@@ -1148,6 +1240,44 @@ sub get_static_node_stats {
     return $stats;
 }
 
+sub get_dynamic_node_stats {
+    my ($self) = @_;
+
+    my $stats = $self->get_static_node_stats();
+    for my $node (keys %$stats) {
+        $stats->{$node}->{maxcpu} = $stats->{$node}->{maxcpu} // $default_node_maxcpu;
+        $stats->{$node}->{cpu} = $stats->{$node}->{cpu} // 0.0;
+        $stats->{$node}->{maxmem} = $stats->{$node}->{maxmem} // $default_node_maxmem;
+        $stats->{$node}->{mem} = $stats->{$node}->{mem} // 0;
+    }
+
+    my $service_conf = $self->read_service_config();
+    my $dynamic_service_stats = $self->get_dynamic_service_stats();
+
+    my $cstatus = $self->read_hardware_status_nolock();
+    my $node_service_status = { map { $_ => $self->read_service_status($_) } keys %$cstatus };
+
+    for my $sid (keys %$service_conf) {
+        my $node = $service_conf->{$sid}->{node};
+
+        if ($node_service_status->{$node}->{$sid}) {
+            my ($cpu, $mem) = $dynamic_service_stats->{$sid}->{usage}->@{qw(cpu mem)};
+
+            die "unknown cpu load for '$sid'" if !defined($cpu);
+            $stats->{$node}->{cpu} += $cpu;
+            die "overcommitted cpu on '$node'"
+                if $stats->{$node}->{cpu} > $stats->{$node}->{maxcpu};
+
+            die "unknown memory usage for '$sid'" if !defined($mem);
+            $stats->{$node}->{mem} += $mem;
+            die "overcommitted mem on '$node'"
+                if $stats->{$node}->{mem} > $stats->{$node}->{maxmem};
+        }
+    }
+
+    return $stats;
+}
+
 sub get_node_version {
     my ($self, $node) = @_;
 
-- 
2.47.3





  parent reply	other threads:[~2026-02-17 14:16 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-17 14:13 [RFC PATCH-SERIES many 00/36] dynamic scheduler + load rebalancer Daniel Kral
2026-02-17 14:13 ` [RFC proxmox 1/5] resource-scheduling: move score_nodes_to_start_service to scheduler crate Daniel Kral
2026-02-17 14:13 ` [RFC proxmox 2/5] resource-scheduling: introduce generic cluster usage implementation Daniel Kral
2026-02-17 14:13 ` [RFC proxmox 3/5] resource-scheduling: add dynamic node and service stats Daniel Kral
2026-02-17 14:13 ` [RFC proxmox 4/5] resource-scheduling: implement rebalancing migration selection Daniel Kral
2026-02-17 14:13 ` [RFC proxmox 5/5] resource-scheduling: implement Add and Default for {Dynamic,Static}ServiceStats Daniel Kral
2026-02-17 14:14 ` [RFC perl-rs 1/6] pve-rs: resource scheduling: use generic cluster usage implementation Daniel Kral
2026-02-17 14:14 ` [RFC perl-rs 2/6] pve-rs: resource scheduling: create service_nodes hashset from array Daniel Kral
2026-02-17 14:14 ` [RFC perl-rs 3/6] pve-rs: resource scheduling: store service stats independently of node Daniel Kral
2026-02-17 14:14 ` [RFC perl-rs 4/6] pve-rs: resource scheduling: expose auto rebalancing methods Daniel Kral
2026-02-17 14:14 ` [RFC perl-rs 5/6] pve-rs: resource scheduling: move pve_static into resource_scheduling module Daniel Kral
2026-02-17 14:14 ` [RFC perl-rs 6/6] pve-rs: resource scheduling: implement pve_dynamic bindings Daniel Kral
2026-02-17 14:14 ` [RFC cluster 1/2] datacenter config: add dynamic load scheduler option Daniel Kral
2026-02-18 11:06   ` Maximiliano Sandoval
2026-02-17 14:14 ` [RFC cluster 2/2] datacenter config: add auto rebalancing options Daniel Kral
2026-02-18 11:15   ` Maximiliano Sandoval
2026-02-17 14:14 ` [RFC ha-manager 01/21] rename static node stats to be consistent with similar interfaces Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 02/21] resources: remove redundant load_config fallback for static config Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 03/21] remove redundant service_node and migration_target parameter Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 04/21] factor out common pve to ha resource type mapping Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 05/21] derive static service stats while filling the service stats repository Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 06/21] test: make static service usage explicit for all resources Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 07/21] make static service stats indexable by sid Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 08/21] move static service stats repository to PVE::HA::Usage::Static Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 09/21] usage: augment service stats with node and state information Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 10/21] include running non-HA resources in the scheduler's accounting Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 11/21] env, resources: add dynamic node and service stats abstraction Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 12/21] env: pve2: implement dynamic node and service stats Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 13/21] sim: hardware: pass correct types for static stats Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 14/21] sim: hardware: factor out static stats' default values Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 15/21] sim: hardware: rewrite set-static-stats Daniel Kral
2026-02-17 14:14 ` Daniel Kral [this message]
2026-02-17 14:14 ` [RFC ha-manager 17/21] usage: add dynamic usage scheduler Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 18/21] manager: rename execute_migration to queue_resource_motion Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 19/21] manager: update_crs_scheduler_mode: factor out crs config Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 20/21] implement automatic rebalancing Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 21/21] test: add basic automatic rebalancing system test cases Daniel Kral
2026-02-17 14:14 ` [RFC manager 1/2] ui: dc/options: add dynamic load scheduler option Daniel Kral
2026-02-18 11:10   ` Maximiliano Sandoval
2026-02-17 14:14 ` [RFC manager 2/2] ui: dc/options: add auto rebalancing options Daniel Kral

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260217141437.584852-30-d.kral@proxmox.com \
    --to=d.kral@proxmox.com \
    --cc=pve-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal