From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <w.bumiller@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256)
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id C3E6E94B8D
 for <pve-devel@lists.proxmox.com>; Thu, 11 Apr 2024 11:17:50 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id A63C51D862
 for <pve-devel@lists.proxmox.com>; Thu, 11 Apr 2024 11:17:20 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256)
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS
 for <pve-devel@lists.proxmox.com>; Thu, 11 Apr 2024 11:17:19 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 991AC44A03
 for <pve-devel@lists.proxmox.com>; Thu, 11 Apr 2024 11:17:19 +0200 (CEST)
Date: Thu, 11 Apr 2024 11:17:18 +0200
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
To: Fabian =?utf-8?Q?Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
Cc: pve-devel@lists.proxmox.com
Message-ID: <cofr4ipem2za7ojhxika5fec5gdjrys6qjudouxu6d7cv2ekax@am2mfhvmwqrh>
References: <20240410131316.1208679-1-f.gruenbichler@proxmox.com>
 <20240410131316.1208679-10-f.gruenbichler@proxmox.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=iso-8859-1
Content-Disposition: inline
Content-Transfer-Encoding: 8bit
In-Reply-To: <20240410131316.1208679-10-f.gruenbichler@proxmox.com>
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.086 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See
 http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more
 information. [guesthelpers.pm]
Subject: Re: [pve-devel] [PATCH guest-common 1/1] helpers: add pool
 limit/usage helpers
X-BeenThere: pve-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox VE development discussion <pve-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pve-devel/>
List-Post: <mailto:pve-devel@lists.proxmox.com>
List-Help: <mailto:pve-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Thu, 11 Apr 2024 09:17:50 -0000

On Wed, Apr 10, 2024 at 03:13:06PM +0200, Fabian Grünbichler wrote:
> one for combining the per-node broadcasted values, one for checking a pool's
> limit, and one specific helper for checking guest-related actions such as
> starting a VM.
> 
> Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
> ---
>  src/PVE/GuestHelpers.pm | 190 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 190 insertions(+)
> 
> diff --git a/src/PVE/GuestHelpers.pm b/src/PVE/GuestHelpers.pm
> index 961a7b8..36b44bb 100644
> --- a/src/PVE/GuestHelpers.pm
> +++ b/src/PVE/GuestHelpers.pm
> @@ -416,4 +416,194 @@ sub check_vnet_access {
>  	if !($tag || $trunks);
>  }
>  
> +# combines the broadcasted pool usage information to get per-pool stats
> +#
> +# $pools parsed pool info from user.cfg
> +# $usage broadcasted KV hash
> +# $pool filter for specific pool
> +# $skip skip a certain guest to ignore its current usage
> +#
> +# returns usage hash:
> +# pool -> cpu/mem/.. -> run/config -> $usage
> +sub get_pool_usage {
> +    my ($pools, $usage, $pool, $skip) = @_;
> +
> +    my $res = {};
> +    my $included_guests = {};
> +    for my $node (keys $usage->%*) {
> +	my $node_usage = JSON::decode_json($usage->{$node} // '');
> +
> +	# long IDs first, so we can add children to their parents right away
> +	for my $poolid (sort {$b cmp $a} keys $pools->%*) {
> +	    next if defined($pool) && !($pool eq $poolid || $poolid =~ m!^$pool/! || $pool =~ m!^$poolid/!);
> +
> +	    my $d = $res->{$poolid} //= {
> +		cpu => {
> +		    run => 0,
> +		    config => 0,
> +		},
> +		mem => {
> +		    run => 0,
> +		    config => 0,
> +		},
> +	    };
> +
> +	    my $pool_usage = $node_usage->{data}->{$poolid} // {};
> +	    for my $vmid (keys $pool_usage->%*) {
> +		# only include once in case of migration between broadcast
> +		next if $included_guests->{$vmid};
> +		next if $skip && $skip->{$vmid};
> +		$included_guests->{$vmid} = 1;
> +
> +		my $vm_data = $pool_usage->{$vmid};
> +		for my $key (keys $vm_data->%*) {
> +		    next if $key eq 'running';
> +		    $d->{$key}->{run} += $vm_data->{$key}->{run} if $vm_data->{running};
> +		    $d->{$key}->{config} += $vm_data->{$key}->{config};

So here we autovivify more keys inside $d, so simply receiving them in
the $pool_usage will add them
...

> +		}
> +	    }
> +
> +	    if (my $parent = $pools->{$poolid}->{parent}) {
> +		$res->{$parent} //= {
> +		    cpu => {
> +			run=> 0,
> +			config => 0,
> +		    },
> +		    mem => {
> +			run => 0,
> +			config => 0,
> +		    },
> +		};

...
so would it make sense to just iterate over `keys %$d` here instead
of having `cpu` and `mem` hardcoded?
And/or maybe access-control should expose a list derived from
$pool_limits_desc directly (or helper sub) to deal with the
`-run`/`-config` suffixes
...

> +		$res->{$parent}->{cpu}->{run} += $d->{cpu}->{run};
> +		$res->{$parent}->{mem}->{run} += $d->{mem}->{run};
> +		$res->{$parent}->{cpu}->{config} += $d->{cpu}->{config};
> +		$res->{$parent}->{mem}->{config} += $d->{mem}->{config};
> +	    }
> +	}
> +    }
> +
> +    return $res;
> +}
> +
> +# checks whether a pool is (or would be) over its resource limits
> +#
> +# $changes is for checking limits for config/state changes like VM starts, if
> +# set, only the limits with changes are checked (see check_guest_pool_limit)
> +#
> +# return value indicates whether any limit was overstepped or not (if $noerr is set)
> +sub check_pool_limits {
> +    my ($usage, $limits, $noerr, $changes) = @_;
> +
> +    my $over = {};
> +    my $only_changed = defined($changes);
> +
> +    my $check_limit = sub {
> +	my ($key, $running, $limit, $change) = @_;
> +
> +	return if $only_changed && $change == 0;
> +
> +	my $kind = $running ? 'run' : 'config';
> +
> +	my $value = $usage->{$key}->{$kind};
> +	$value = int($value);
> +	$value += $change;
> +	$value = $value / (1024*1024) if $key eq 'mem';
> +	if ($limit < $value) {
> +	    $over->{$key}->{$kind}->{change} = $change if $change;
> +	    $over->{$key}->{$kind}->{over} = 1;
> +	}
> +    };
> +
> +    my $get_change = sub {
> +	my ($key, $running) = @_;
> +
> +	return 0 if !defined($changes);
> +
> +	my $check_running = defined($changes->{running}) && $changes->{running} ? 1 : 0;
> +
> +	if ($running == $check_running) {
> +	    return $changes->{$key} // 0;
> +	} else {
> +	    return 0;
> +	}
> +    };
> +
> +    if (my $limit = $limits->{"mem-run"}) {
> +	my $change = $get_change->('mem', 1);
> +	$check_limit->('mem', 1, $limit, $change);
> +    }
> +
> +    if (my $limit = $limits->{"mem-config"}) {
> +	my $change = $get_change->('mem', 0);
> +	$check_limit->('mem', 0, $limit, $change);
> +    }

...
Similarly this could then iterate over the list.

> +
> +    if (my $limit = $limits->{"cpu-run"}) {
> +	my $change = $get_change->('cpu', 1);
> +	$check_limit->('cpu', 1, $limit, $change);
> +    }
> +
> +    if (my $limit = $limits->{"cpu-config"}) {
> +	my $change = $get_change->('cpu', 0);
> +	$check_limit->('cpu', 0, $limit, $change);
> +    }
> +
> +    if (!$noerr) {
> +	my $msg = '';
> +	for my $key (keys $over->%*) {
> +	    for my $kind (keys $over->{$key}->%*) {
> +		my $value = $usage->{$key}->{$kind};
> +		$value = $value / (1024*1024) if $key eq 'mem';
> +		my $change = $over->{$key}->{$kind}->{change};
> +		if ($change) {
> +		    $change = $change / (1024*1024) if $key eq 'mem';
> +		    $value = "$value + $change" if $change;
> +		}
> +		my $limit = $limits->{"$key-$kind"};
> +		$msg .= "($kind) $key: $value over $limit, ";
> +	    }
> +	}
> +	if ($msg) {
> +	$msg =~ s/, $//;
> +	die "pool limits exhausted: $msg\n";
> +	}
> +    }
> +
> +    return $over->%* ? 1 : 0;
> +}
> +
> +# checks whether the given changes for a certain guest would overstep a pool limit
> +#
> +# $changes is an optional hash containing
> +# - absolute: flag whether changes are relative or absolute
> +# - running: flag whether the config or running limits should be checked
> +# - cpu: change value for cpu limit
> +# - mem: change value for mem limit
> +# all elements are optional
> +#
> +# if no $changes is provided, the limits are checked against the current usage
> +#
> +# $poolid allows overriding the guest's pool membership, for example in case it
> +# is not yet properly set when creating the guest
> +sub check_guest_pool_limit {
> +    my ($vmid, $changes, $poolid) = @_;
> +
> +    my $user_cfg = PVE::Cluster::cfs_read_file("user.cfg");
> +
> +    $poolid = $user_cfg->{vms}->{$vmid} if !defined($poolid);
> +    if ($poolid) {
> +	my $pool = $user_cfg->{pools}->{$poolid};
> +
> +	my $limits = $pool->{limits};
> +	return if !$limits;
> +
> +	my $skip = {};
> +	$skip->{$vmid} = 1 if $changes && $changes->{absolute};
> +	my $usage = PVE::Cluster::get_node_kv('pool-usage');
> +
> +	$usage = PVE::GuestHelpers::get_pool_usage($user_cfg->{pools}, $usage, $poolid, $skip);

^ This is already inside PVE::GuestHelpers ;-)

> +	check_pool_limits($usage->{$poolid}, $limits, 0, $changes);
> +    }
> +}
> +
>  1;
> -- 
> 2.39.2