From: Alexandre Derumier <aderumier@odiso.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH V2 qemu-server 1/2] add virtio-mem support
Date: Wed, 24 Aug 2022 13:34:42 +0200 [thread overview]
Message-ID: <20220824113443.936256-2-aderumier@odiso.com> (raw)
In-Reply-To: <20220824113443.936256-1-aderumier@odiso.com>
This patch add virtio-mem support, through a new maxmemory option.
a 4GB static memory is needed for DMA+boot memory, as this memory
is almost always un-unpluggeable.
1 virtio-mem pci device is setup for each numa node on pci.4 bridge
virtio-mem use a fixed blocksize with 32k max blocksize,
so blocksize is computed from the maxmemory/32000 with a minimum of
2MB to map THP.
(lower blocksize = more chance to unplug memory).
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
---
PVE/QemuServer.pm | 9 ++-
PVE/QemuServer/Memory.pm | 133 ++++++++++++++++++++++++++++++---------
PVE/QemuServer/PCI.pm | 8 +++
3 files changed, 121 insertions(+), 29 deletions(-)
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index c706653..fb4b1c4 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -340,6 +340,13 @@ my $confdesc = {
maximum => 262144,
default => 'cgroup v1: 1024, cgroup v2: 100',
},
+ maxmemory => {
+ optional => 1,
+ type => 'integer',
+ description => "Max hotpluggable virtio-mem memory",
+ minimum => 4096,
+ default => undef,
+ },
memory => {
optional => 1,
type => 'integer',
@@ -3815,7 +3822,7 @@ sub config_to_command {
push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
}
- PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
+ PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd, $devices, $bridges, $arch, $machine_type);
push @$cmd, '-S' if $conf->{freeze};
diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm
index a41f5ae..bd26002 100644
--- a/PVE/QemuServer/Memory.pm
+++ b/PVE/QemuServer/Memory.pm
@@ -8,11 +8,48 @@ use PVE::Exception qw(raise raise_param_exc);
use PVE::QemuServer;
use PVE::QemuServer::Monitor qw(mon_cmd);
+use PVE::QemuServer::PCI qw(print_pci_addr);
my $MAX_NUMA = 8;
my $MAX_MEM = 4194304;
my $STATICMEM = 1024;
+my $compute_static_mem = sub {
+ my ($conf, $defaults) = @_;
+
+ my $sockets = 1;
+ $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
+ $sockets = $conf->{sockets} if $conf->{sockets};
+ my $hotplug_features = PVE::QemuServer::parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
+
+ my $static_memory = 0;
+
+ if ($hotplug_features->{memory} || $conf->{maxmemory}) {
+ $static_memory = $STATICMEM;
+ $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024);
+ $static_memory = 4096 if $conf->{maxmemory};
+ } else {
+ $static_memory = $conf->{memory} || $defaults->{memory};
+ }
+
+ return $static_memory;
+};
+
+my $compute_virtiomem_block_size = sub {
+ my ($conf, $static_memory) = @_;
+
+ my $maxmemory = $conf->{maxmemory};
+ return undef if !$maxmemory;
+
+ #virtiomem can map 32000 block size. try to use lowerst blocksize, lower = more chance to unplug memory.
+ my $blocksize = ($maxmemory - $static_memory) / 32000;
+ #round next power of 2
+ $blocksize = 2**(int(log($blocksize)/log(2))+1);
+ #2MB is the minimum to be aligned with THP
+ $blocksize = 2 if $blocksize < 2;
+ return $blocksize;
+};
+
sub get_numa_node_list {
my ($conf) = @_;
my @numa_map;
@@ -104,6 +141,8 @@ sub foreach_reverse_dimm {
}
}
+
+
sub qemu_memory_hotplug {
my ($vmid, $conf, $defaults, $opt, $value) = @_;
@@ -116,13 +155,47 @@ sub qemu_memory_hotplug {
$value = $defaults->{memory} if !$value;
return $value if $value == $memory;
- my $static_memory = $STATICMEM;
- $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024);
+ my $static_memory = &$compute_static_mem($conf, $defaults);
+ my $maxmemory = $conf->{maxmemory} || $MAX_MEM;
die "memory can't be lower than $static_memory MB" if $value < $static_memory;
- die "you cannot add more memory than $MAX_MEM MB!\n" if $memory > $MAX_MEM;
+ die "you cannot add more memory than $maxmemory MB!\n" if $value > $maxmemory;
+
+ if ($conf->{maxmemory}) {
+
+ die "memory size need to be multiple of 32MB when maxmemory is defined" if $value % 32 != 0;
+
+ my $requested_size = ($value - $static_memory) / $sockets * 1024 * 1024;
+
+ my $totalsize = $static_memory;
+ my $err = undef;
+
+ for (my $i = 0; $i < $sockets; $i++) {
+
+ my $id = "virtiomem$i";
+ my $retry = 0;
+ mon_cmd($vmid, 'qom-set', path => "/machine/peripheral/$id", property => "requested-size", value => int($requested_size));
+
+ my $size = 0;
+ while (1) {
+ sleep 1;
+ $size = mon_cmd($vmid, 'qom-get', path => "/machine/peripheral/$id", property => "size");
+ $err = 1 if $retry > 5;
+ last if $size eq $requested_size || $retry > 5;
+ $retry++;
+ }
+ $totalsize += ($size / 1024 / 1024 );
+ }
- if($value > $memory) {
+ #update conf after each succesful module unplug
+ if($err) {
+ $conf->{memory} = $totalsize;
+ PVE::QemuConfig->write_config($vmid, $conf);
+ raise_param_exc({ 'memory' => "error modify virtio memory" }) if $err;
+ }
+ return $totalsize;
+
+ } elsif($value > $memory) {
my $numa_hostmap;
@@ -217,32 +290,28 @@ sub qemu_dimm_list {
}
sub config {
- my ($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd) = @_;
+ my ($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd, $devices, $bridges, $arch, $machine_type) = @_;
my $memory = $conf->{memory} || $defaults->{memory};
- my $static_memory = 0;
+ my $static_memory = &$compute_static_mem($conf, $defaults);
+
+ if ($hotplug_features->{memory} || $conf->{maxmemory}) {
+
+ my $maxmemory = $conf->{maxmemory} || $MAX_MEM;
- if ($hotplug_features->{memory}) {
die "NUMA needs to be enabled for memory hotplug\n" if !$conf->{numa};
- die "Total memory is bigger than ${MAX_MEM}MB\n" if $memory > $MAX_MEM;
+ die "Total memory is bigger than ${maxmemory}MB\n" if $memory > $maxmemory;
for (my $i = 0; $i < $MAX_NUMA; $i++) {
die "cannot enable memory hotplugging with custom NUMA topology\n"
if $conf->{"numa$i"};
}
-
- my $sockets = 1;
- $sockets = $conf->{sockets} if $conf->{sockets};
-
- $static_memory = $STATICMEM;
- $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024);
-
die "minimum memory must be ${static_memory}MB\n" if($memory < $static_memory);
- push @$cmd, '-m', "size=${static_memory},slots=255,maxmem=${MAX_MEM}M";
-
+ die "memory size need to be multiple of 32MB when maxmemory is defined" if $conf->{maxmemory} && ($memory % 32 != 0);
+ my $cmdstr = "size=${static_memory},maxmem=${maxmemory}M";
+ $cmdstr .= ",slots=255" if !$conf->{maxmemory};
+ push @$cmd, '-m', $cmdstr;
} else {
-
- $static_memory = $memory;
push @$cmd, '-m', $static_memory;
}
@@ -309,7 +378,21 @@ sub config {
}
}
- if ($hotplug_features->{memory}) {
+ if ($conf->{maxmemory}) {
+ my $node_maxmem = ($conf->{maxmemory} - $static_memory) / $sockets;
+ my $node_mem = ($memory - $static_memory) / $sockets;
+ my $blocksize = &$compute_virtiomem_block_size($conf, $static_memory);
+
+ for (my $i = 0; $i < $sockets; $i++) {
+
+ my $id = "virtiomem$i";
+ my $pciaddr = print_pci_addr($id, $bridges, $arch, $machine_type);
+ my $mem_object = print_mem_object($conf, "mem-$id", $node_maxmem);
+
+ push @$cmd, "-object" , $mem_object;
+ push @$devices, "-device", "virtio-mem-pci,block-size=${blocksize}M,requested-size=${node_mem}M,id=$id,memdev=mem-$id,node=$i$pciaddr";
+ }
+ } elsif ($hotplug_features->{memory}) {
foreach_dimm($conf, $vmid, $memory, $sockets, sub {
my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_;
@@ -435,20 +518,14 @@ sub hugepages_topology {
my $defaults = PVE::QemuServer::load_defaults();
my $memory = $conf->{memory} || $defaults->{memory};
- my $static_memory = 0;
+ my $static_memory = &$compute_static_mem($conf, $defaults);
+
my $sockets = 1;
$sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
$sockets = $conf->{sockets} if $conf->{sockets};
my $numa_custom_topology = undef;
my $hotplug_features = PVE::QemuServer::parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
- if ($hotplug_features->{memory}) {
- $static_memory = $STATICMEM;
- $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024);
- } else {
- $static_memory = $memory;
- }
-
#custom numa topology
for (my $i = 0; $i < $MAX_NUMA; $i++) {
next if !$conf->{"numa$i"};
diff --git a/PVE/QemuServer/PCI.pm b/PVE/QemuServer/PCI.pm
index 3d0e70e..9b59b91 100644
--- a/PVE/QemuServer/PCI.pm
+++ b/PVE/QemuServer/PCI.pm
@@ -249,6 +249,14 @@ sub get_pci_addr_map {
'scsihw2' => { bus => 4, addr => 1 },
'scsihw3' => { bus => 4, addr => 2 },
'scsihw4' => { bus => 4, addr => 3 },
+ 'virtiomem0' => { bus => 4, addr => 4 },
+ 'virtiomem1' => { bus => 4, addr => 5 },
+ 'virtiomem2' => { bus => 4, addr => 6 },
+ 'virtiomem3' => { bus => 4, addr => 7 },
+ 'virtiomem4' => { bus => 4, addr => 8 },
+ 'virtiomem5' => { bus => 4, addr => 9 },
+ 'virtiomem6' => { bus => 4, addr => 10 },
+ 'virtiomem7' => { bus => 4, addr => 11 },
} if !defined($pci_addr_map);
return $pci_addr_map;
}
--
2.30.2
next prev parent reply other threads:[~2022-08-24 11:34 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-08-24 11:34 [pve-devel] [PATCH V2 qemu-server 0/2] " Alexandre Derumier
2022-08-24 11:34 ` Alexandre Derumier [this message]
2022-08-24 11:34 ` [pve-devel] [PATCH V2 qemu-server 2/2] tests: add virtio-mem tests Alexandre Derumier
2022-09-23 11:20 ` [pve-devel] [PATCH V2 qemu-server 0/2] add virtio-mem support DERUMIER, Alexandre
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220824113443.936256-2-aderumier@odiso.com \
--to=aderumier@odiso.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.