From: Alexandre Derumier <aderumier@odiso.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH v3 qemu-server 1/2] add virtio-mem support
Date: Fri, 18 Nov 2022 13:13:19 +0100 [thread overview]
Message-ID: <20221118121320.132283-2-aderumier@odiso.com> (raw)
In-Reply-To: <20221118121320.132283-1-aderumier@odiso.com>
This patch add virtio-mem support, through a new maxmemory option.
a 4GB static memory is needed for DMA+boot memory, as this memory
is almost always un-unpluggeable.
1 virtio-mem pci device is setup for each numa node on pci.4 bridge
virtio-mem use a fixed blocksize with 32k max blocksize,
so blocksize is computed from the maxmemory/32000 with a minimum of
2MB to map THP.
(lower blocksize = more chance to unplug memory).
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
---
PVE/QemuServer.pm | 9 ++-
PVE/QemuServer/Memory.pm | 141 ++++++++++++++++++++++++++++++---------
PVE/QemuServer/PCI.pm | 8 +++
3 files changed, 126 insertions(+), 32 deletions(-)
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 9a20647..d0f6d1f 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -338,6 +338,13 @@ my $confdesc = {
maximum => 262144,
default => 'cgroup v1: 1024, cgroup v2: 100',
},
+ 'memory_max' => {
+ optional => 1,
+ type => 'integer',
+ description => "Max hotpluggable virtio-mem memory",
+ minimum => 4096,
+ default => undef,
+ },
memory => {
optional => 1,
type => 'integer',
@@ -3858,7 +3865,7 @@ sub config_to_command {
push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
}
- PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
+ PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd, $devices, $bridges, $arch, $machine_type);
push @$cmd, '-S' if $conf->{freeze};
diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm
index 013917e..ed7eff6 100644
--- a/PVE/QemuServer/Memory.pm
+++ b/PVE/QemuServer/Memory.pm
@@ -8,9 +8,45 @@ use PVE::Exception qw(raise raise_param_exc);
use PVE::QemuServer;
use PVE::QemuServer::Monitor qw(mon_cmd);
+use PVE::QemuServer::PCI qw(print_pci_addr);
my $MAX_NUMA = 8;
-my $STATICMEM = 1024;
+
+my sub get_static_mem {
+ my ($conf, $defaults) = @_;
+
+ my $sockets = 1;
+ $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
+ $sockets = $conf->{sockets} if $conf->{sockets};
+ my $hotplug_features = PVE::QemuServer::parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
+
+ my $static_memory = 0;
+
+ if ($hotplug_features->{memory} || $conf->{'memory_max'}) {
+ $static_memory = 1024;
+ $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024);
+ $static_memory = 4096 if $conf->{'memory_max'};
+ } else {
+ $static_memory = $conf->{memory} || $defaults->{memory};
+ }
+
+ return $static_memory;
+}
+
+my sub get_virtiomem_block_size {
+ my ($conf, $static_memory) = @_;
+
+ my $maxmemory = $conf->{'memory_max'};
+ return undef if !$maxmemory;
+
+ #virtiomem can map 32000 block size. try to use lowerst blocksize, lower = more chance to unplug memory.
+ my $blocksize = ($maxmemory - $static_memory) / 32000;
+ #round next power of 2
+ $blocksize = 2**(int(log($blocksize)/log(2))+1);
+ #2MB is the minimum to be aligned with THP
+ $blocksize = 2 if $blocksize < 2;
+ return $blocksize;
+}
my $_host_bits;
my sub get_host_phys_address_bits {
@@ -58,7 +94,14 @@ my sub get_max_mem {
# remove 20 bits to get MB and half that as QEMU needs some overhead
my $bits_to_max_mem = int(1 << ($bits - 21));
- return $bits_to_max_mem > 4*1024*1024 ? 4*1024*1024 : $bits_to_max_mem;
+ my $max_mem = $bits_to_max_mem > 4*1024*1024 ? 4*1024*1024 : $bits_to_max_mem;
+
+ if($conf->{'memory_max'}) {
+ die "memory_max can't be bigger than $max_mem MB" if $conf->{'memory_max'} > $max_mem;
+ return $conf->{'memory_max'};
+ }
+
+ return $max_mem;
}
sub get_numa_node_list {
@@ -152,6 +195,8 @@ sub foreach_reverse_dimm {
}
}
+
+
sub qemu_memory_hotplug {
my ($vmid, $conf, $defaults, $opt, $value) = @_;
@@ -164,14 +209,46 @@ sub qemu_memory_hotplug {
$value = $defaults->{memory} if !$value;
return $value if $value == $memory;
- my $static_memory = $STATICMEM;
- $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024);
+ my $static_memory = get_static_mem($conf, $defaults);
+ my $max_mem = get_max_mem($conf);
die "memory can't be lower than $static_memory MB" if $value < $static_memory;
- my $MAX_MEM = get_max_mem($conf);
- die "you cannot add more memory than max mem $MAX_MEM MB!\n" if $memory > $MAX_MEM;
+ die "you cannot add more memory than max mem $max_mem MB!\n" if $value > $max_mem;
+
+ if ($conf->{'memory_max'}) {
+
+ die "memory size need to be multiple of 32MB when memory_max is defined" if $value % 32 != 0;
+
+ my $requested_size = ($value - $static_memory) / $sockets * 1024 * 1024;
- if ($value > $memory) {
+ my $totalsize = $static_memory;
+ my $err = undef;
+
+ for (my $i = 0; $i < $sockets; $i++) {
+
+ my $id = "virtiomem$i";
+ my $retry = 0;
+ mon_cmd($vmid, 'qom-set', path => "/machine/peripheral/$id", property => "requested-size", value => int($requested_size));
+
+ my $size = 0;
+ while (1) {
+ sleep 1;
+ $size = mon_cmd($vmid, 'qom-get', path => "/machine/peripheral/$id", property => "size");
+ $err = 1 if $retry > 5;
+ last if $size eq $requested_size || $retry > 5;
+ $retry++;
+ }
+ $totalsize += ($size / 1024 / 1024 );
+ }
+ #update conf after each succesful module unplug
+ if($err) {
+ $conf->{memory} = $totalsize;
+ PVE::QemuConfig->write_config($vmid, $conf);
+ raise_param_exc({ 'memory' => "error modify virtio memory" }) if $err;
+ }
+ return $totalsize;
+
+ } elsif($value > $memory) {
my $numa_hostmap;
@@ -266,33 +343,27 @@ sub qemu_dimm_list {
}
sub config {
- my ($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd) = @_;
+ my ($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd, $devices, $bridges, $arch, $machine_type) = @_;
my $memory = $conf->{memory} || $defaults->{memory};
- my $static_memory = 0;
+ my $static_memory = get_static_mem($conf, $defaults);
- if ($hotplug_features->{memory}) {
+ if ($hotplug_features->{memory} || $conf->{'memory_max'}) {
die "NUMA needs to be enabled for memory hotplug\n" if !$conf->{numa};
- my $MAX_MEM = get_max_mem($conf);
- die "Total memory is bigger than ${MAX_MEM}MB\n" if $memory > $MAX_MEM;
+ my $max_mem = get_max_mem($conf);
+ die "Total memory is bigger than ${max_mem}MB\n" if $memory > $max_mem;
for (my $i = 0; $i < $MAX_NUMA; $i++) {
die "cannot enable memory hotplugging with custom NUMA topology\n"
if $conf->{"numa$i"};
}
- my $sockets = 1;
- $sockets = $conf->{sockets} if $conf->{sockets};
-
- $static_memory = $STATICMEM;
- $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024);
-
- die "minimum memory must be ${static_memory}MB\n" if($memory < $static_memory);
- push @$cmd, '-m', "size=${static_memory},slots=255,maxmem=${MAX_MEM}M";
+ die "memory size need to be multiple of 32MB when memory_max is defined" if $conf->{'memory_max'} && ($memory % 32 != 0);
+ my $cmdstr = "size=${static_memory},maxmem=${max_mem}M";
+ $cmdstr .= ",slots=255" if !$conf->{'memory_max'};
+ push @$cmd, '-m', $cmdstr;
} else {
-
- $static_memory = $memory;
push @$cmd, '-m', $static_memory;
}
@@ -359,7 +430,21 @@ sub config {
}
}
- if ($hotplug_features->{memory}) {
+ if ($conf->{'memory_max'}) {
+ my $node_maxmem = ($conf->{'memory_max'} - $static_memory) / $sockets;
+ my $node_mem = ($memory - $static_memory) / $sockets;
+ my $blocksize = get_virtiomem_block_size($conf, $static_memory);
+
+ for (my $i = 0; $i < $sockets; $i++) {
+
+ my $id = "virtiomem$i";
+ my $pciaddr = print_pci_addr($id, $bridges, $arch, $machine_type);
+ my $mem_object = print_mem_object($conf, "mem-$id", $node_maxmem);
+
+ push @$cmd, "-object" , $mem_object;
+ push @$devices, "-device", "virtio-mem-pci,block-size=${blocksize}M,requested-size=${node_mem}M,id=$id,memdev=mem-$id,node=$i$pciaddr";
+ }
+ } elsif ($hotplug_features->{memory}) {
foreach_dimm($conf, $vmid, $memory, $sockets, sub {
my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_;
@@ -485,20 +570,14 @@ sub hugepages_topology {
my $defaults = PVE::QemuServer::load_defaults();
my $memory = $conf->{memory} || $defaults->{memory};
- my $static_memory = 0;
+ my $static_memory = get_static_mem($conf, $defaults);
+
my $sockets = 1;
$sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
$sockets = $conf->{sockets} if $conf->{sockets};
my $numa_custom_topology = undef;
my $hotplug_features = PVE::QemuServer::parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
- if ($hotplug_features->{memory}) {
- $static_memory = $STATICMEM;
- $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024);
- } else {
- $static_memory = $memory;
- }
-
#custom numa topology
for (my $i = 0; $i < $MAX_NUMA; $i++) {
next if !$conf->{"numa$i"};
diff --git a/PVE/QemuServer/PCI.pm b/PVE/QemuServer/PCI.pm
index a18b974..0187c74 100644
--- a/PVE/QemuServer/PCI.pm
+++ b/PVE/QemuServer/PCI.pm
@@ -249,6 +249,14 @@ sub get_pci_addr_map {
'scsihw2' => { bus => 4, addr => 1 },
'scsihw3' => { bus => 4, addr => 2 },
'scsihw4' => { bus => 4, addr => 3 },
+ 'virtiomem0' => { bus => 4, addr => 4 },
+ 'virtiomem1' => { bus => 4, addr => 5 },
+ 'virtiomem2' => { bus => 4, addr => 6 },
+ 'virtiomem3' => { bus => 4, addr => 7 },
+ 'virtiomem4' => { bus => 4, addr => 8 },
+ 'virtiomem5' => { bus => 4, addr => 9 },
+ 'virtiomem6' => { bus => 4, addr => 10 },
+ 'virtiomem7' => { bus => 4, addr => 11 },
} if !defined($pci_addr_map);
return $pci_addr_map;
}
--
2.30.2
next prev parent reply other threads:[~2022-11-18 12:14 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-18 12:13 [pve-devel] [PATCH v3 qemu-server 0/2] " Alexandre Derumier
2022-11-18 12:13 ` Alexandre Derumier [this message]
2022-11-18 12:13 ` [pve-devel] [PATCH v3 qemu-server 2/2] tests: add virtio-mem tests Alexandre Derumier
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221118121320.132283-2-aderumier@odiso.com \
--to=aderumier@odiso.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.