From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 96E5E8B2A6 for ; Wed, 24 Aug 2022 13:34:47 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 88915195DB for ; Wed, 24 Aug 2022 13:34:47 +0200 (CEST) Received: from bastionodiso.odiso.net (bastionodiso.odiso.net [IPv6:2a0a:1580:2000::2d]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Wed, 24 Aug 2022 13:34:45 +0200 (CEST) Received: from kvmformation3.odiso.net (formationkvm3.odiso.net [10.3.94.12]) by bastionodiso.odiso.net (Postfix) with ESMTP id 36F20A8D2; Wed, 24 Aug 2022 13:34:45 +0200 (CEST) Received: by kvmformation3.odiso.net (Postfix, from userid 0) id 1F8DA1E539B; Wed, 24 Aug 2022 13:34:45 +0200 (CEST) From: Alexandre Derumier To: pve-devel@lists.proxmox.com Date: Wed, 24 Aug 2022 13:34:42 +0200 Message-Id: <20220824113443.936256-2-aderumier@odiso.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220824113443.936256-1-aderumier@odiso.com> References: <20220824113443.936256-1-aderumier@odiso.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.026 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% HEADER_FROM_DIFFERENT_DOMAINS 0.249 From and EnvelopeFrom 2nd level mail domains are different KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_LAZY_DOMAIN_SECURITY 1 Sending domain does not have any anti-forgery methods NO_DNS_FOR_FROM 0.001 Envelope sender has no MX or A DNS records SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_NONE 0.001 SPF: sender does not publish an SPF Record T_SCC_BODY_TEXT_LINE -0.01 - Subject: [pve-devel] [PATCH V2 qemu-server 1/2] add virtio-mem support X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 24 Aug 2022 11:34:47 -0000 This patch add virtio-mem support, through a new maxmemory option. a 4GB static memory is needed for DMA+boot memory, as this memory is almost always un-unpluggeable. 1 virtio-mem pci device is setup for each numa node on pci.4 bridge virtio-mem use a fixed blocksize with 32k max blocksize, so blocksize is computed from the maxmemory/32000 with a minimum of 2MB to map THP. (lower blocksize = more chance to unplug memory). Signed-off-by: Alexandre Derumier --- PVE/QemuServer.pm | 9 ++- PVE/QemuServer/Memory.pm | 133 ++++++++++++++++++++++++++++++--------- PVE/QemuServer/PCI.pm | 8 +++ 3 files changed, 121 insertions(+), 29 deletions(-) diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm index c706653..fb4b1c4 100644 --- a/PVE/QemuServer.pm +++ b/PVE/QemuServer.pm @@ -340,6 +340,13 @@ my $confdesc = { maximum => 262144, default => 'cgroup v1: 1024, cgroup v2: 100', }, + maxmemory => { + optional => 1, + type => 'integer', + description => "Max hotpluggable virtio-mem memory", + minimum => 4096, + default => undef, + }, memory => { optional => 1, type => 'integer', @@ -3815,7 +3822,7 @@ sub config_to_command { push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough); } - PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd); + PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd, $devices, $bridges, $arch, $machine_type); push @$cmd, '-S' if $conf->{freeze}; diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm index a41f5ae..bd26002 100644 --- a/PVE/QemuServer/Memory.pm +++ b/PVE/QemuServer/Memory.pm @@ -8,11 +8,48 @@ use PVE::Exception qw(raise raise_param_exc); use PVE::QemuServer; use PVE::QemuServer::Monitor qw(mon_cmd); +use PVE::QemuServer::PCI qw(print_pci_addr); my $MAX_NUMA = 8; my $MAX_MEM = 4194304; my $STATICMEM = 1024; +my $compute_static_mem = sub { + my ($conf, $defaults) = @_; + + my $sockets = 1; + $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused + $sockets = $conf->{sockets} if $conf->{sockets}; + my $hotplug_features = PVE::QemuServer::parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1'); + + my $static_memory = 0; + + if ($hotplug_features->{memory} || $conf->{maxmemory}) { + $static_memory = $STATICMEM; + $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024); + $static_memory = 4096 if $conf->{maxmemory}; + } else { + $static_memory = $conf->{memory} || $defaults->{memory}; + } + + return $static_memory; +}; + +my $compute_virtiomem_block_size = sub { + my ($conf, $static_memory) = @_; + + my $maxmemory = $conf->{maxmemory}; + return undef if !$maxmemory; + + #virtiomem can map 32000 block size. try to use lowerst blocksize, lower = more chance to unplug memory. + my $blocksize = ($maxmemory - $static_memory) / 32000; + #round next power of 2 + $blocksize = 2**(int(log($blocksize)/log(2))+1); + #2MB is the minimum to be aligned with THP + $blocksize = 2 if $blocksize < 2; + return $blocksize; +}; + sub get_numa_node_list { my ($conf) = @_; my @numa_map; @@ -104,6 +141,8 @@ sub foreach_reverse_dimm { } } + + sub qemu_memory_hotplug { my ($vmid, $conf, $defaults, $opt, $value) = @_; @@ -116,13 +155,47 @@ sub qemu_memory_hotplug { $value = $defaults->{memory} if !$value; return $value if $value == $memory; - my $static_memory = $STATICMEM; - $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024); + my $static_memory = &$compute_static_mem($conf, $defaults); + my $maxmemory = $conf->{maxmemory} || $MAX_MEM; die "memory can't be lower than $static_memory MB" if $value < $static_memory; - die "you cannot add more memory than $MAX_MEM MB!\n" if $memory > $MAX_MEM; + die "you cannot add more memory than $maxmemory MB!\n" if $value > $maxmemory; + + if ($conf->{maxmemory}) { + + die "memory size need to be multiple of 32MB when maxmemory is defined" if $value % 32 != 0; + + my $requested_size = ($value - $static_memory) / $sockets * 1024 * 1024; + + my $totalsize = $static_memory; + my $err = undef; + + for (my $i = 0; $i < $sockets; $i++) { + + my $id = "virtiomem$i"; + my $retry = 0; + mon_cmd($vmid, 'qom-set', path => "/machine/peripheral/$id", property => "requested-size", value => int($requested_size)); + + my $size = 0; + while (1) { + sleep 1; + $size = mon_cmd($vmid, 'qom-get', path => "/machine/peripheral/$id", property => "size"); + $err = 1 if $retry > 5; + last if $size eq $requested_size || $retry > 5; + $retry++; + } + $totalsize += ($size / 1024 / 1024 ); + } - if($value > $memory) { + #update conf after each succesful module unplug + if($err) { + $conf->{memory} = $totalsize; + PVE::QemuConfig->write_config($vmid, $conf); + raise_param_exc({ 'memory' => "error modify virtio memory" }) if $err; + } + return $totalsize; + + } elsif($value > $memory) { my $numa_hostmap; @@ -217,32 +290,28 @@ sub qemu_dimm_list { } sub config { - my ($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd) = @_; + my ($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd, $devices, $bridges, $arch, $machine_type) = @_; my $memory = $conf->{memory} || $defaults->{memory}; - my $static_memory = 0; + my $static_memory = &$compute_static_mem($conf, $defaults); + + if ($hotplug_features->{memory} || $conf->{maxmemory}) { + + my $maxmemory = $conf->{maxmemory} || $MAX_MEM; - if ($hotplug_features->{memory}) { die "NUMA needs to be enabled for memory hotplug\n" if !$conf->{numa}; - die "Total memory is bigger than ${MAX_MEM}MB\n" if $memory > $MAX_MEM; + die "Total memory is bigger than ${maxmemory}MB\n" if $memory > $maxmemory; for (my $i = 0; $i < $MAX_NUMA; $i++) { die "cannot enable memory hotplugging with custom NUMA topology\n" if $conf->{"numa$i"}; } - - my $sockets = 1; - $sockets = $conf->{sockets} if $conf->{sockets}; - - $static_memory = $STATICMEM; - $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024); - die "minimum memory must be ${static_memory}MB\n" if($memory < $static_memory); - push @$cmd, '-m', "size=${static_memory},slots=255,maxmem=${MAX_MEM}M"; - + die "memory size need to be multiple of 32MB when maxmemory is defined" if $conf->{maxmemory} && ($memory % 32 != 0); + my $cmdstr = "size=${static_memory},maxmem=${maxmemory}M"; + $cmdstr .= ",slots=255" if !$conf->{maxmemory}; + push @$cmd, '-m', $cmdstr; } else { - - $static_memory = $memory; push @$cmd, '-m', $static_memory; } @@ -309,7 +378,21 @@ sub config { } } - if ($hotplug_features->{memory}) { + if ($conf->{maxmemory}) { + my $node_maxmem = ($conf->{maxmemory} - $static_memory) / $sockets; + my $node_mem = ($memory - $static_memory) / $sockets; + my $blocksize = &$compute_virtiomem_block_size($conf, $static_memory); + + for (my $i = 0; $i < $sockets; $i++) { + + my $id = "virtiomem$i"; + my $pciaddr = print_pci_addr($id, $bridges, $arch, $machine_type); + my $mem_object = print_mem_object($conf, "mem-$id", $node_maxmem); + + push @$cmd, "-object" , $mem_object; + push @$devices, "-device", "virtio-mem-pci,block-size=${blocksize}M,requested-size=${node_mem}M,id=$id,memdev=mem-$id,node=$i$pciaddr"; + } + } elsif ($hotplug_features->{memory}) { foreach_dimm($conf, $vmid, $memory, $sockets, sub { my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_; @@ -435,20 +518,14 @@ sub hugepages_topology { my $defaults = PVE::QemuServer::load_defaults(); my $memory = $conf->{memory} || $defaults->{memory}; - my $static_memory = 0; + my $static_memory = &$compute_static_mem($conf, $defaults); + my $sockets = 1; $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused $sockets = $conf->{sockets} if $conf->{sockets}; my $numa_custom_topology = undef; my $hotplug_features = PVE::QemuServer::parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1'); - if ($hotplug_features->{memory}) { - $static_memory = $STATICMEM; - $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024); - } else { - $static_memory = $memory; - } - #custom numa topology for (my $i = 0; $i < $MAX_NUMA; $i++) { next if !$conf->{"numa$i"}; diff --git a/PVE/QemuServer/PCI.pm b/PVE/QemuServer/PCI.pm index 3d0e70e..9b59b91 100644 --- a/PVE/QemuServer/PCI.pm +++ b/PVE/QemuServer/PCI.pm @@ -249,6 +249,14 @@ sub get_pci_addr_map { 'scsihw2' => { bus => 4, addr => 1 }, 'scsihw3' => { bus => 4, addr => 2 }, 'scsihw4' => { bus => 4, addr => 3 }, + 'virtiomem0' => { bus => 4, addr => 4 }, + 'virtiomem1' => { bus => 4, addr => 5 }, + 'virtiomem2' => { bus => 4, addr => 6 }, + 'virtiomem3' => { bus => 4, addr => 7 }, + 'virtiomem4' => { bus => 4, addr => 8 }, + 'virtiomem5' => { bus => 4, addr => 9 }, + 'virtiomem6' => { bus => 4, addr => 10 }, + 'virtiomem7' => { bus => 4, addr => 11 }, } if !defined($pci_addr_map); return $pci_addr_map; } -- 2.30.2