On 16/04/2024 14:20, Fabian Grünbichler wrote: > one for combining the per-node broadcasted values, one for checking a pool's > limit, and one specific helper for checking guest-related actions such as > starting a VM. > > Signed-off-by: Fabian Grünbichler > --- > > Notes: > v2: > - style > - introduce new helper for mapping limit key to usage hash > - introduce new helper for default usage hash > - avoid hard-coding cpu/mem and run/config where sensible > > src/PVE/GuestHelpers.pm | 183 ++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 183 insertions(+) > > diff --git a/src/PVE/GuestHelpers.pm b/src/PVE/GuestHelpers.pm > index 961a7b8..e52eaf0 100644 > --- a/src/PVE/GuestHelpers.pm > +++ b/src/PVE/GuestHelpers.pm > @@ -416,4 +416,187 @@ sub check_vnet_access { > if !($tag || $trunks); > } > > +sub pool_limit_to_usage { > + my ($limit_key) = @_; > + > + my ($resource, $kind) = split(/-/, $limit_key, 2); > + > + return ($resource, $kind, $kind eq 'run' ? 1 : 0); > +} > + > +sub pool_default_usage { > + my $default = {}; > + > + for my $limit (keys $PVE::AccessControl::pool_limits_desc->%*) { Perlcritic complains here about the direct access to the `pool_limits_desc`. I haven't found any other occurence where we reference a variable across packages directly. I can see that they are only used here, but would it make sense to move these helpers (which are unrelated to guests themselves) to the `PVE::AccessControl` package? > + my ($resource, $kind) = pool_limit_to_usage($limit); > + $default->{$resource}->{$kind} = 0; > + } > + > + return $default; > +} > + > +# combines the broadcasted pool usage information to get per-pool stats > +# > +# $pools parsed pool info from user.cfg > +# $usage broadcasted KV hash > +# $pool filter for specific pool > +# $skip skip a certain guest to ignore its current usage > +# > +# returns usage hash: > +# pool -> cpu/mem/.. -> run/config -> $usage > +sub get_pool_usage { > + my ($pools, $usage, $pool, $skip) = @_; > + > + my $res = {}; > + my $included_guests = {}; > + for my $node (keys $usage->%*) { > + my $node_usage = JSON::decode_json($usage->{$node} // ''); As pointed out in the pve-manager ui patch, `encode_json` and `decode_json` seem to be a little racy for preserving the order. If these values stay user visible, we should sort the keys here afterwards, so users can rely on some preserved order. > + > + # long IDs first, so we can add children to their parents right away > + for my $poolid (sort {$b cmp $a} keys $pools->%*) { > + if ( > + defined($pool) > + && !($pool eq $poolid || $poolid =~ m!^$pool/! || $pool =~ m!^$poolid/!) > + ) { > + next; > + } > + > + my $d = $res->{$poolid} //= pool_default_usage(); > + > + my $pool_usage = $node_usage->{data}->{$poolid} // {}; > + for my $vmid (keys $pool_usage->%*) { > + # only include once in case of migration between broadcast > + next if $included_guests->{$vmid}; > + next if $skip && $skip->{$vmid}; > + $included_guests->{$vmid} = 1; > + > + my $vm_data = $pool_usage->{$vmid}; > + for my $key (keys $vm_data->%*) { > + next if $key eq 'running'; > + $d->{$key}->{run} += $vm_data->{$key}->{run} if $vm_data->{running}; > + $d->{$key}->{config} += $vm_data->{$key}->{config}; > + } > + } > + > + if (my $parent = $pools->{$poolid}->{parent}) { > + $res->{$parent} //= pool_default_usage(); > + for my $key (keys $d->%*) { > + for my $kind (keys $d->{$key}->%*) { > + $res->{$parent}->{$key}->{$kind} = $d->{$key}->{$kind}; > + } > + } > + } > + } > + } > + > + return $res; > +} > + > +# checks whether a pool is (or would be) over its resource limits > +# > +# $changes is for checking limits for config/state changes like VM starts, if > +# set, only the limits with changes are checked (see check_guest_pool_limit) > +# > +# return value indicates whether any limit was overstepped or not (if $noerr is set) > +sub check_pool_limits { > + my ($usage, $limits, $noerr, $changes) = @_; > + > + my $over = {}; > + my $only_changed = defined($changes); > + > + my $check_limit = sub { > + my ($key, $running, $limit, $change) = @_; > + > + return if $only_changed && $change == 0; > + > + my $kind = $running ? 'run' : 'config'; > + > + my $value = $usage->{$key}->{$kind}; > + $value = int($value); > + $value += $change; > + $value = $value / (1024*1024) if $key eq 'mem'; > + if ($limit < $value) { > + $over->{$key}->{$kind}->{change} = $change if $change; > + $over->{$key}->{$kind}->{over} = 1; > + } > + }; > + > + my $get_change = sub { > + my ($key, $running) = @_; > + > + return 0 if !defined($changes); > + > + my $check_running = defined($changes->{running}) && $changes->{running} ? 1 : 0; > + > + if ($running == $check_running) { > + return $changes->{$key} // 0; > + } else { > + return 0; > + } > + }; > + > + while (my ($key, $limit) = each $limits->%*) { > + my ($resource, $kind, $running) = pool_limit_to_usage($key); > + my $change = $get_change->($resource, $running); > + $check_limit->($resource, $running, $limit, $change); > + } > + > + if (!$noerr) { > + my $msg = ''; > + for my $key (keys $over->%*) { > + for my $kind (keys $over->{$key}->%*) { > + my $value = $usage->{$key}->{$kind}; > + $value = $value / (1024*1024) if $key eq 'mem'; > + my $change = $over->{$key}->{$kind}->{change}; > + if ($change) { > + $change = $change / (1024*1024) if $key eq 'mem'; > + $value = "$value + $change" if $change; > + } > + my $limit = $limits->{"$key-$kind"}; > + $msg .= "($kind) $key: $value over $limit, "; > + } > + } > + if ($msg) { > + $msg =~ s/, $//; > + die "pool limits exhausted: $msg\n"; > + } > + } > + > + return $over->%* ? 1 : 0; > +} > + > +# checks whether the given changes for a certain guest would overstep a pool limit > +# > +# $changes is an optional hash containing > +# - absolute: flag whether changes are relative or absolute > +# - running: flag whether the config or running limits should be checked > +# - cpu: change value for cpu limit > +# - mem: change value for mem limit the description could benefit from a unit, i.e. "in bytes". FWIW, we could use `PVE::Tools::convert_size` more often to make the input values more agnostic to prefix (e.g. Mega vs Giga) and base unit (bit vs byte) to reduce the cognitive load across pve-container and qemu-server when to use what. We could also use that information above for the error message to include units (i.e. "$value MiB over $limit"). But that would involve much more effort and I'm unsure it's worth it. > +# all elements are optional > +# > +# if no $changes is provided, the limits are checked against the current usage > +# > +# $poolid allows overriding the guest's pool membership, for example in case it > +# is not yet properly set when creating the guest > +sub check_guest_pool_limit { > + my ($vmid, $changes, $poolid) = @_; > + > + my $user_cfg = PVE::Cluster::cfs_read_file("user.cfg"); > + > + $poolid = $user_cfg->{vms}->{$vmid} if !defined($poolid); > + if ($poolid) { > + my $pool = $user_cfg->{pools}->{$poolid}; > + > + my $limits = $pool->{limits}; > + return if !$limits; > + > + my $skip = {}; > + $skip->{$vmid} = 1 if $changes && $changes->{absolute}; > + my $usage = PVE::Cluster::get_node_kv('pool-usage'); > + > + $usage = get_pool_usage($user_cfg->{pools}, $usage, $poolid, $skip); > + check_pool_limits($usage->{$poolid}, $limits, 0, $changes); > + } > +} > + > 1; > -- > 2.39.2