public inbox for pve-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Alexandre Derumier <aderumier@odiso.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH V2 qemu-server 1/2] add virtio-mem support
Date: Wed, 24 Aug 2022 13:34:42 +0200	[thread overview]
Message-ID: <20220824113443.936256-2-aderumier@odiso.com> (raw)
In-Reply-To: <20220824113443.936256-1-aderumier@odiso.com>

This patch add virtio-mem support, through a new maxmemory option.

a 4GB static memory is needed for DMA+boot memory, as this memory
is almost always un-unpluggeable.

1 virtio-mem pci device is setup for each numa node on pci.4 bridge

virtio-mem use a fixed blocksize with 32k max blocksize,
so blocksize is computed from the maxmemory/32000 with a minimum of
2MB to map THP.
(lower blocksize = more chance to unplug memory).

Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
---
 PVE/QemuServer.pm        |   9 ++-
 PVE/QemuServer/Memory.pm | 133 ++++++++++++++++++++++++++++++---------
 PVE/QemuServer/PCI.pm    |   8 +++
 3 files changed, 121 insertions(+), 29 deletions(-)

diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index c706653..fb4b1c4 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -340,6 +340,13 @@ my $confdesc = {
 	maximum => 262144,
 	default => 'cgroup v1: 1024, cgroup v2: 100',
     },
+    maxmemory => {
+	optional => 1,
+	type => 'integer',
+	description => "Max hotpluggable virtio-mem memory",
+	minimum => 4096,
+	default => undef,
+    },
     memory => {
 	optional => 1,
 	type => 'integer',
@@ -3815,7 +3822,7 @@ sub config_to_command {
 	push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
     }
 
-    PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
+    PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd, $devices, $bridges, $arch, $machine_type);
 
     push @$cmd, '-S' if $conf->{freeze};
 
diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm
index a41f5ae..bd26002 100644
--- a/PVE/QemuServer/Memory.pm
+++ b/PVE/QemuServer/Memory.pm
@@ -8,11 +8,48 @@ use PVE::Exception qw(raise raise_param_exc);
 
 use PVE::QemuServer;
 use PVE::QemuServer::Monitor qw(mon_cmd);
+use PVE::QemuServer::PCI qw(print_pci_addr);
 
 my $MAX_NUMA = 8;
 my $MAX_MEM = 4194304;
 my $STATICMEM = 1024;
 
+my $compute_static_mem = sub {
+    my ($conf, $defaults) = @_;
+
+    my $sockets = 1;
+    $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
+    $sockets = $conf->{sockets} if $conf->{sockets};
+    my $hotplug_features = PVE::QemuServer::parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
+
+    my $static_memory = 0;
+
+    if ($hotplug_features->{memory} || $conf->{maxmemory}) {
+	$static_memory = $STATICMEM;
+	$static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024);
+	$static_memory = 4096 if $conf->{maxmemory};
+    } else {
+	$static_memory = $conf->{memory} || $defaults->{memory};
+    }
+
+    return $static_memory;
+};
+
+my $compute_virtiomem_block_size = sub {
+    my ($conf, $static_memory) = @_;
+
+    my $maxmemory = $conf->{maxmemory};
+    return undef if !$maxmemory;
+
+    #virtiomem can map 32000 block size. try to use lowerst blocksize, lower = more chance to unplug memory.
+    my $blocksize = ($maxmemory - $static_memory) / 32000;
+    #round next power of 2
+    $blocksize = 2**(int(log($blocksize)/log(2))+1);
+    #2MB is the minimum to be aligned with THP
+    $blocksize = 2 if $blocksize < 2;
+    return $blocksize;
+};
+
 sub get_numa_node_list {
     my ($conf) = @_;
     my @numa_map;
@@ -104,6 +141,8 @@ sub foreach_reverse_dimm {
     }
 }
 
+
+
 sub qemu_memory_hotplug {
     my ($vmid, $conf, $defaults, $opt, $value) = @_;
 
@@ -116,13 +155,47 @@ sub qemu_memory_hotplug {
     $value = $defaults->{memory} if !$value;
     return $value if $value == $memory;
 
-    my $static_memory = $STATICMEM;
-    $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024);
+    my $static_memory = &$compute_static_mem($conf, $defaults);
+    my $maxmemory = $conf->{maxmemory} || $MAX_MEM;
 
     die "memory can't be lower than $static_memory MB" if $value < $static_memory;
-    die "you cannot add more memory than $MAX_MEM MB!\n" if $memory > $MAX_MEM;
+    die "you cannot add more memory than $maxmemory MB!\n" if $value > $maxmemory;
+
+    if ($conf->{maxmemory}) {
+
+	die "memory size need to be multiple of 32MB when maxmemory is defined" if $value % 32 != 0;
+
+	my $requested_size = ($value - $static_memory) / $sockets * 1024 * 1024;
+
+	my $totalsize = $static_memory;
+	my $err = undef;
+
+	for (my $i = 0; $i < $sockets; $i++)  {
+
+	    my $id = "virtiomem$i";
+	    my $retry = 0;
+	    mon_cmd($vmid, 'qom-set', path => "/machine/peripheral/$id", property => "requested-size", value => int($requested_size));
+
+	    my $size = 0;
+	    while (1) {
+		sleep 1;
+		$size = mon_cmd($vmid, 'qom-get', path => "/machine/peripheral/$id", property => "size");
+		$err = 1 if $retry > 5;
+		last if $size eq $requested_size || $retry > 5;
+		$retry++;
+	    }
+	    $totalsize += ($size / 1024 / 1024 );
+	}
 
-    if($value > $memory) {
+	#update conf after each succesful module unplug
+	if($err) {
+	    $conf->{memory} = $totalsize;
+	    PVE::QemuConfig->write_config($vmid, $conf);
+	    raise_param_exc({ 'memory' => "error modify virtio memory" }) if $err;
+	}
+	return $totalsize;
+
+    } elsif($value > $memory) {
 
 	my $numa_hostmap;
 
@@ -217,32 +290,28 @@ sub qemu_dimm_list {
 }
 
 sub config {
-    my ($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd) = @_;
+    my ($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd, $devices, $bridges, $arch, $machine_type) = @_;
 
     my $memory = $conf->{memory} || $defaults->{memory};
-    my $static_memory = 0;
+    my $static_memory = &$compute_static_mem($conf, $defaults);
+
+    if ($hotplug_features->{memory} || $conf->{maxmemory}) {
+
+	my $maxmemory = $conf->{maxmemory} || $MAX_MEM;
 
-    if ($hotplug_features->{memory}) {
 	die "NUMA needs to be enabled for memory hotplug\n" if !$conf->{numa};
-	die "Total memory is bigger than ${MAX_MEM}MB\n" if $memory > $MAX_MEM;
+	die "Total memory is bigger than ${maxmemory}MB\n" if $memory > $maxmemory;
 
 	for (my $i = 0; $i < $MAX_NUMA; $i++) {
 	    die "cannot enable memory hotplugging with custom NUMA topology\n"
 		if $conf->{"numa$i"};
 	}
-
-	my $sockets = 1;
-	$sockets = $conf->{sockets} if $conf->{sockets};
-
-	$static_memory = $STATICMEM;
-	$static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024);
-
 	die "minimum memory must be ${static_memory}MB\n" if($memory < $static_memory);
-	push @$cmd, '-m', "size=${static_memory},slots=255,maxmem=${MAX_MEM}M";
-
+	die "memory size need to be multiple of 32MB when maxmemory is defined" if $conf->{maxmemory} && ($memory % 32 != 0);
+	my $cmdstr = "size=${static_memory},maxmem=${maxmemory}M";
+	$cmdstr .= ",slots=255" if !$conf->{maxmemory};
+	push @$cmd, '-m', $cmdstr;
     } else {
-
-	$static_memory = $memory;
 	push @$cmd, '-m', $static_memory;
     }
 
@@ -309,7 +378,21 @@ sub config {
 	}
     }
 
-    if ($hotplug_features->{memory}) {
+    if ($conf->{maxmemory}) {
+	my $node_maxmem = ($conf->{maxmemory} - $static_memory) / $sockets;
+	my $node_mem = ($memory - $static_memory) / $sockets;
+	my $blocksize = &$compute_virtiomem_block_size($conf, $static_memory);
+
+	for (my $i = 0; $i < $sockets; $i++)  {
+
+	    my $id = "virtiomem$i";
+	    my $pciaddr = print_pci_addr($id, $bridges, $arch, $machine_type);
+	    my $mem_object = print_mem_object($conf, "mem-$id", $node_maxmem);
+
+	    push @$cmd, "-object" , $mem_object;
+	    push @$devices, "-device", "virtio-mem-pci,block-size=${blocksize}M,requested-size=${node_mem}M,id=$id,memdev=mem-$id,node=$i$pciaddr";
+	}
+    } elsif ($hotplug_features->{memory}) {
 	foreach_dimm($conf, $vmid, $memory, $sockets, sub {
 	    my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_;
 
@@ -435,20 +518,14 @@ sub hugepages_topology {
 
     my $defaults = PVE::QemuServer::load_defaults();
     my $memory = $conf->{memory} || $defaults->{memory};
-    my $static_memory = 0;
+    my $static_memory = &$compute_static_mem($conf, $defaults);
+
     my $sockets = 1;
     $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
     $sockets = $conf->{sockets} if $conf->{sockets};
     my $numa_custom_topology = undef;
     my $hotplug_features = PVE::QemuServer::parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
 
-    if ($hotplug_features->{memory}) {
-	$static_memory = $STATICMEM;
-	$static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024);
-    } else {
-	$static_memory = $memory;
-    }
-
     #custom numa topology
     for (my $i = 0; $i < $MAX_NUMA; $i++) {
 	next if !$conf->{"numa$i"};
diff --git a/PVE/QemuServer/PCI.pm b/PVE/QemuServer/PCI.pm
index 3d0e70e..9b59b91 100644
--- a/PVE/QemuServer/PCI.pm
+++ b/PVE/QemuServer/PCI.pm
@@ -249,6 +249,14 @@ sub get_pci_addr_map {
 	'scsihw2' => { bus => 4, addr => 1 },
 	'scsihw3' => { bus => 4, addr => 2 },
 	'scsihw4' => { bus => 4, addr => 3 },
+	'virtiomem0' => { bus => 4, addr => 4 },
+	'virtiomem1' => { bus => 4, addr => 5 },
+	'virtiomem2' => { bus => 4, addr => 6 },
+	'virtiomem3' => { bus => 4, addr => 7 },
+	'virtiomem4' => { bus => 4, addr => 8 },
+	'virtiomem5' => { bus => 4, addr => 9 },
+	'virtiomem6' => { bus => 4, addr => 10 },
+	'virtiomem7' => { bus => 4, addr => 11 },
     } if !defined($pci_addr_map);
     return $pci_addr_map;
 }
-- 
2.30.2




  reply	other threads:[~2022-08-24 11:34 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-24 11:34 [pve-devel] [PATCH V2 qemu-server 0/2] " Alexandre Derumier
2022-08-24 11:34 ` Alexandre Derumier [this message]
2022-08-24 11:34 ` [pve-devel] [PATCH V2 qemu-server 2/2] tests: add virtio-mem tests Alexandre Derumier
2022-09-23 11:20 ` [pve-devel] [PATCH V2 qemu-server 0/2] add virtio-mem support DERUMIER, Alexandre

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220824113443.936256-2-aderumier@odiso.com \
    --to=aderumier@odiso.com \
    --cc=pve-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal