From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id C95FEE72C for ; Fri, 9 Dec 2022 20:28:10 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 639CD2733D for ; Fri, 9 Dec 2022 20:27:40 +0100 (CET) Received: from bastionodiso.odiso.net (bastionodiso.odiso.net [IPv6:2a0a:1580:2000::2d]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Fri, 9 Dec 2022 20:27:37 +0100 (CET) Received: from kvmformation3.odiso.net (formationkvm3.odiso.net [10.3.94.12]) by bastionodiso.odiso.net (Postfix) with ESMTP id CC071748E; Fri, 9 Dec 2022 20:27:28 +0100 (CET) Received: by kvmformation3.odiso.net (Postfix, from userid 0) id CAC89FC1EB; Fri, 9 Dec 2022 20:27:28 +0100 (CET) From: Alexandre Derumier To: pve-devel@lists.proxmox.com, t.lamprecht@proxmox.com Date: Fri, 9 Dec 2022 20:27:24 +0100 Message-Id: <20221209192726.1499142-9-aderumier@odiso.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20221209192726.1499142-1-aderumier@odiso.com> References: <20221209192726.1499142-1-aderumier@odiso.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.029 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% HEADER_FROM_DIFFERENT_DOMAINS 0.25 From and EnvelopeFrom 2nd level mail domains are different KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_LAZY_DOMAIN_SECURITY 1 Sending domain does not have any anti-forgery methods NO_DNS_FOR_FROM 0.001 Envelope sender has no MX or A DNS records SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_NONE 0.001 SPF: sender does not publish an SPF Record Subject: [pve-devel] [PATCH qemu-server 08/10] memory: add virtio-mem support X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 09 Dec 2022 19:28:10 -0000 a 4GB static memory is needed for DMA+boot memory, as this memory is almost always un-unpluggeable. 1 virtio-mem pci device is setup for each numa node on pci.4 bridge virtio-mem use a fixed blocksize with 32000 blocks Blocksize is computed from the maxmemory-4096/32000 with a minimum of 2MB to map THP. (lower blocksize = more chance to unplug memory). fixes: https://bugzilla.proxmox.com/show_bug.cgi?id=931 https://bugzilla.proxmox.com/show_bug.cgi?id=2949 Signed-off-by: Alexandre Derumier --- PVE/QemuServer.pm | 8 +++- PVE/QemuServer/Memory.pm | 98 +++++++++++++++++++++++++++++++++++++--- PVE/QemuServer/PCI.pm | 8 ++++ 3 files changed, 106 insertions(+), 8 deletions(-) diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm index 0d5b550..43fab29 100644 --- a/PVE/QemuServer.pm +++ b/PVE/QemuServer.pm @@ -285,6 +285,12 @@ my $memory_fmt = { optional => 1, enum => [@max_memory_list], }, + virtio => { + description => "enable virtio-mem memory", + type => 'boolean', + optional => 1, + default => 0, + }, }; my $meta_info_fmt = { @@ -3898,7 +3904,7 @@ sub config_to_command { push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough); } - PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd); + PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd, $devices, $bridges, $arch, $machine_type); push @$cmd, '-S' if $conf->{freeze}; diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm index 8bbbf07..70ab65a 100644 --- a/PVE/QemuServer/Memory.pm +++ b/PVE/QemuServer/Memory.pm @@ -8,6 +8,8 @@ use PVE::Exception qw(raise raise_param_exc); use PVE::QemuServer; use PVE::QemuServer::Monitor qw(mon_cmd); +use PVE::QemuServer::PCI qw(print_pci_addr); + use base qw(Exporter); our @EXPORT_OK = qw( @@ -27,7 +29,9 @@ my sub get_static_mem { my $static_memory = 0; my $memory = PVE::QemuServer::parse_memory($conf->{memory}); - if($memory->{max}) { + if ($memory->{virtio}) { + $static_memory = 4096; + } elsif ($memory->{max}) { my $dimm_size = $memory->{max} / 64; #static mem can't be lower than 4G and lower than 1 dimmsize by socket $static_memory = $dimm_size * $sockets; @@ -102,6 +106,24 @@ my sub get_max_mem { return $cpu_max_mem; } +my sub get_virtiomem_block_size { + my ($conf) = @_; + + my $MAX_MEM = get_max_mem($conf); + my $static_memory = get_static_mem($conf); + my $memory = get_current_memory($conf); + #virtiomem can map 32000 block size. try to use lowerst blocksize, lower = more chance to unplug memory. + my $blocksize = ($MAX_MEM - $static_memory) / 32000; + #round next power of 2 + $blocksize = 2**(int(log($blocksize)/log(2))+1); + #2MB is the minimum to be aligned with THP + $blocksize = 2 if $blocksize < 2; + + die "memory size need to be multiple of $blocksize MB when virtio-mem is enabled" if ($memory % $blocksize != 0); + + return $blocksize; +} + sub get_current_memory{ my ($conf) = @_; @@ -224,7 +246,41 @@ sub qemu_memory_hotplug { my $MAX_MEM = get_max_mem($conf); die "you cannot add more memory than max mem $MAX_MEM MB!\n" if $value > $MAX_MEM; - if ($value > $memory) { + my $confmem = PVE::QemuServer::parse_memory($conf->{memory}); + + if ($confmem->{virtio}) { + my $blocksize = get_virtiomem_block_size($conf); + my $requested_size = ($value - $static_memory) / $sockets * 1024 * 1024; + my $totalsize = $static_memory; + my $err = undef; + + for (my $i = 0; $i < $sockets; $i++) { + + my $id = "virtiomem$i"; + my $retry = 0; + mon_cmd($vmid, 'qom-set', path => "/machine/peripheral/$id", property => "requested-size", value => int($requested_size)); + + my $size = 0; + while (1) { + sleep 1; + $size = mon_cmd($vmid, 'qom-get', path => "/machine/peripheral/$id", property => "size"); + $err = 1 if $retry > 5; + last if $size eq $requested_size || $retry > 5; + $retry++; + } + $totalsize += ($size / 1024 / 1024 ); + } + #update conf after each succesfull change + if($err) { + my $mem = { max => $MAX_MEM, virtio => 1}; + $mem->{current} = $totalsize; + $conf->{memory} = PVE::QemuServer::print_memory($mem); + PVE::QemuConfig->write_config($vmid, $conf); + raise_param_exc({ 'memory' => "error modify virtio memory" }) if $err; + } + return $totalsize; + + } elsif ($value > $memory) { my $numa_hostmap; @@ -324,14 +380,15 @@ sub qemu_dimm_list { } sub config { - my ($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd) = @_; + my ($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd, $devices, $bridges, $arch, $machine_type) = @_; my $memory = get_current_memory($conf); my $static_memory = get_static_mem($conf); + my $confmem = PVE::QemuServer::parse_memory($conf->{memory}); - if ($hotplug_features->{memory} || defined($confmem->{max})) { + if ($hotplug_features->{memory} || defined($confmem->{max}) || defined($confmem->{virtio})) { die "NUMA needs to be enabled for memory hotplug\n" if !$conf->{numa}; my $MAX_MEM = get_max_mem($conf); die "Total memory is bigger than ${MAX_MEM}MB\n" if $memory > $MAX_MEM; @@ -342,8 +399,12 @@ sub config { } die "minimum memory must be ${static_memory}MB\n" if($memory < $static_memory); + + my $cmdstr = "size=${static_memory}"; my $slots = $confmem->{max} ? 64 : 255; - push @$cmd, '-m', "size=${static_memory},slots=$slots,maxmem=${MAX_MEM}M"; + $cmdstr .= ",slots=$slots" if !$confmem->{'virtio'}; + $cmdstr .= ",maxmem=${MAX_MEM}M"; + push @$cmd, '-m', $cmdstr; } else { push @$cmd, '-m', $static_memory; @@ -412,7 +473,26 @@ sub config { } } - if ($hotplug_features->{memory} || $confmem->{max}) { + if ($confmem->{'virtio'}) { + my $MAX_MEM = get_max_mem($conf); + my $node_maxmem = ($MAX_MEM - $static_memory) / $sockets; + my $node_mem = ($memory - $static_memory) / $sockets; + my $blocksize = get_virtiomem_block_size($conf); + + for (my $i = 0; $i < $sockets; $i++) { + + my $id = "virtiomem$i"; + my $mem_object = print_mem_object($conf, "mem-$id", $node_maxmem); + push @$cmd, "-object" , "$mem_object,reserve=off"; + + my $pciaddr = print_pci_addr($id, $bridges, $arch, $machine_type); + my $mem_device = "virtio-mem-pci,block-size=${blocksize}M,requested-size=${node_mem}M,id=$id,memdev=mem-$id,node=$i$pciaddr"; + $mem_device .= ",prealloc=on" if $conf->{hugepages}; + push @$devices, "-device", $mem_device; + } + + } elsif ($hotplug_features->{memory} || $confmem->{max}) { + foreach_dimm($conf, $vmid, $memory, $sockets, sub { my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_; @@ -430,12 +510,16 @@ sub config { sub print_mem_object { my ($conf, $id, $size) = @_; + my $confmem = PVE::QemuServer::parse_memory($conf->{memory}); + if ($conf->{hugepages}) { my $hugepages_size = hugepages_size($conf, $size); my $path = hugepages_mount_path($hugepages_size); - return "memory-backend-file,id=$id,size=${size}M,mem-path=$path,share=on,prealloc=yes"; + my $object = "memory-backend-file,id=$id,size=${size}M,mem-path=$path,share=on"; + $object .= ",prealloc=yes" if !$confmem->{virtio}; + return $object; } else { return "memory-backend-ram,id=$id,size=${size}M"; } diff --git a/PVE/QemuServer/PCI.pm b/PVE/QemuServer/PCI.pm index a18b974..0187c74 100644 --- a/PVE/QemuServer/PCI.pm +++ b/PVE/QemuServer/PCI.pm @@ -249,6 +249,14 @@ sub get_pci_addr_map { 'scsihw2' => { bus => 4, addr => 1 }, 'scsihw3' => { bus => 4, addr => 2 }, 'scsihw4' => { bus => 4, addr => 3 }, + 'virtiomem0' => { bus => 4, addr => 4 }, + 'virtiomem1' => { bus => 4, addr => 5 }, + 'virtiomem2' => { bus => 4, addr => 6 }, + 'virtiomem3' => { bus => 4, addr => 7 }, + 'virtiomem4' => { bus => 4, addr => 8 }, + 'virtiomem5' => { bus => 4, addr => 9 }, + 'virtiomem6' => { bus => 4, addr => 10 }, + 'virtiomem7' => { bus => 4, addr => 11 }, } if !defined($pci_addr_map); return $pci_addr_map; } -- 2.30.2