* [pve-devel] [PATCH V2 qemu-server 1/2] add virtio-mem support
2022-08-24 11:34 [pve-devel] [PATCH V2 qemu-server 0/2] add virtio-mem support Alexandre Derumier
@ 2022-08-24 11:34 ` Alexandre Derumier
2022-08-24 11:34 ` [pve-devel] [PATCH V2 qemu-server 2/2] tests: add virtio-mem tests Alexandre Derumier
2022-09-23 11:20 ` [pve-devel] [PATCH V2 qemu-server 0/2] add virtio-mem support DERUMIER, Alexandre
2 siblings, 0 replies; 4+ messages in thread
From: Alexandre Derumier @ 2022-08-24 11:34 UTC (permalink / raw)
To: pve-devel
This patch add virtio-mem support, through a new maxmemory option.
a 4GB static memory is needed for DMA+boot memory, as this memory
is almost always un-unpluggeable.
1 virtio-mem pci device is setup for each numa node on pci.4 bridge
virtio-mem use a fixed blocksize with 32k max blocksize,
so blocksize is computed from the maxmemory/32000 with a minimum of
2MB to map THP.
(lower blocksize = more chance to unplug memory).
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
---
PVE/QemuServer.pm | 9 ++-
PVE/QemuServer/Memory.pm | 133 ++++++++++++++++++++++++++++++---------
PVE/QemuServer/PCI.pm | 8 +++
3 files changed, 121 insertions(+), 29 deletions(-)
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index c706653..fb4b1c4 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -340,6 +340,13 @@ my $confdesc = {
maximum => 262144,
default => 'cgroup v1: 1024, cgroup v2: 100',
},
+ maxmemory => {
+ optional => 1,
+ type => 'integer',
+ description => "Max hotpluggable virtio-mem memory",
+ minimum => 4096,
+ default => undef,
+ },
memory => {
optional => 1,
type => 'integer',
@@ -3815,7 +3822,7 @@ sub config_to_command {
push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
}
- PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
+ PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd, $devices, $bridges, $arch, $machine_type);
push @$cmd, '-S' if $conf->{freeze};
diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm
index a41f5ae..bd26002 100644
--- a/PVE/QemuServer/Memory.pm
+++ b/PVE/QemuServer/Memory.pm
@@ -8,11 +8,48 @@ use PVE::Exception qw(raise raise_param_exc);
use PVE::QemuServer;
use PVE::QemuServer::Monitor qw(mon_cmd);
+use PVE::QemuServer::PCI qw(print_pci_addr);
my $MAX_NUMA = 8;
my $MAX_MEM = 4194304;
my $STATICMEM = 1024;
+my $compute_static_mem = sub {
+ my ($conf, $defaults) = @_;
+
+ my $sockets = 1;
+ $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
+ $sockets = $conf->{sockets} if $conf->{sockets};
+ my $hotplug_features = PVE::QemuServer::parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
+
+ my $static_memory = 0;
+
+ if ($hotplug_features->{memory} || $conf->{maxmemory}) {
+ $static_memory = $STATICMEM;
+ $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024);
+ $static_memory = 4096 if $conf->{maxmemory};
+ } else {
+ $static_memory = $conf->{memory} || $defaults->{memory};
+ }
+
+ return $static_memory;
+};
+
+my $compute_virtiomem_block_size = sub {
+ my ($conf, $static_memory) = @_;
+
+ my $maxmemory = $conf->{maxmemory};
+ return undef if !$maxmemory;
+
+ #virtiomem can map 32000 block size. try to use lowerst blocksize, lower = more chance to unplug memory.
+ my $blocksize = ($maxmemory - $static_memory) / 32000;
+ #round next power of 2
+ $blocksize = 2**(int(log($blocksize)/log(2))+1);
+ #2MB is the minimum to be aligned with THP
+ $blocksize = 2 if $blocksize < 2;
+ return $blocksize;
+};
+
sub get_numa_node_list {
my ($conf) = @_;
my @numa_map;
@@ -104,6 +141,8 @@ sub foreach_reverse_dimm {
}
}
+
+
sub qemu_memory_hotplug {
my ($vmid, $conf, $defaults, $opt, $value) = @_;
@@ -116,13 +155,47 @@ sub qemu_memory_hotplug {
$value = $defaults->{memory} if !$value;
return $value if $value == $memory;
- my $static_memory = $STATICMEM;
- $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024);
+ my $static_memory = &$compute_static_mem($conf, $defaults);
+ my $maxmemory = $conf->{maxmemory} || $MAX_MEM;
die "memory can't be lower than $static_memory MB" if $value < $static_memory;
- die "you cannot add more memory than $MAX_MEM MB!\n" if $memory > $MAX_MEM;
+ die "you cannot add more memory than $maxmemory MB!\n" if $value > $maxmemory;
+
+ if ($conf->{maxmemory}) {
+
+ die "memory size need to be multiple of 32MB when maxmemory is defined" if $value % 32 != 0;
+
+ my $requested_size = ($value - $static_memory) / $sockets * 1024 * 1024;
+
+ my $totalsize = $static_memory;
+ my $err = undef;
+
+ for (my $i = 0; $i < $sockets; $i++) {
+
+ my $id = "virtiomem$i";
+ my $retry = 0;
+ mon_cmd($vmid, 'qom-set', path => "/machine/peripheral/$id", property => "requested-size", value => int($requested_size));
+
+ my $size = 0;
+ while (1) {
+ sleep 1;
+ $size = mon_cmd($vmid, 'qom-get', path => "/machine/peripheral/$id", property => "size");
+ $err = 1 if $retry > 5;
+ last if $size eq $requested_size || $retry > 5;
+ $retry++;
+ }
+ $totalsize += ($size / 1024 / 1024 );
+ }
- if($value > $memory) {
+ #update conf after each succesful module unplug
+ if($err) {
+ $conf->{memory} = $totalsize;
+ PVE::QemuConfig->write_config($vmid, $conf);
+ raise_param_exc({ 'memory' => "error modify virtio memory" }) if $err;
+ }
+ return $totalsize;
+
+ } elsif($value > $memory) {
my $numa_hostmap;
@@ -217,32 +290,28 @@ sub qemu_dimm_list {
}
sub config {
- my ($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd) = @_;
+ my ($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd, $devices, $bridges, $arch, $machine_type) = @_;
my $memory = $conf->{memory} || $defaults->{memory};
- my $static_memory = 0;
+ my $static_memory = &$compute_static_mem($conf, $defaults);
+
+ if ($hotplug_features->{memory} || $conf->{maxmemory}) {
+
+ my $maxmemory = $conf->{maxmemory} || $MAX_MEM;
- if ($hotplug_features->{memory}) {
die "NUMA needs to be enabled for memory hotplug\n" if !$conf->{numa};
- die "Total memory is bigger than ${MAX_MEM}MB\n" if $memory > $MAX_MEM;
+ die "Total memory is bigger than ${maxmemory}MB\n" if $memory > $maxmemory;
for (my $i = 0; $i < $MAX_NUMA; $i++) {
die "cannot enable memory hotplugging with custom NUMA topology\n"
if $conf->{"numa$i"};
}
-
- my $sockets = 1;
- $sockets = $conf->{sockets} if $conf->{sockets};
-
- $static_memory = $STATICMEM;
- $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024);
-
die "minimum memory must be ${static_memory}MB\n" if($memory < $static_memory);
- push @$cmd, '-m', "size=${static_memory},slots=255,maxmem=${MAX_MEM}M";
-
+ die "memory size need to be multiple of 32MB when maxmemory is defined" if $conf->{maxmemory} && ($memory % 32 != 0);
+ my $cmdstr = "size=${static_memory},maxmem=${maxmemory}M";
+ $cmdstr .= ",slots=255" if !$conf->{maxmemory};
+ push @$cmd, '-m', $cmdstr;
} else {
-
- $static_memory = $memory;
push @$cmd, '-m', $static_memory;
}
@@ -309,7 +378,21 @@ sub config {
}
}
- if ($hotplug_features->{memory}) {
+ if ($conf->{maxmemory}) {
+ my $node_maxmem = ($conf->{maxmemory} - $static_memory) / $sockets;
+ my $node_mem = ($memory - $static_memory) / $sockets;
+ my $blocksize = &$compute_virtiomem_block_size($conf, $static_memory);
+
+ for (my $i = 0; $i < $sockets; $i++) {
+
+ my $id = "virtiomem$i";
+ my $pciaddr = print_pci_addr($id, $bridges, $arch, $machine_type);
+ my $mem_object = print_mem_object($conf, "mem-$id", $node_maxmem);
+
+ push @$cmd, "-object" , $mem_object;
+ push @$devices, "-device", "virtio-mem-pci,block-size=${blocksize}M,requested-size=${node_mem}M,id=$id,memdev=mem-$id,node=$i$pciaddr";
+ }
+ } elsif ($hotplug_features->{memory}) {
foreach_dimm($conf, $vmid, $memory, $sockets, sub {
my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_;
@@ -435,20 +518,14 @@ sub hugepages_topology {
my $defaults = PVE::QemuServer::load_defaults();
my $memory = $conf->{memory} || $defaults->{memory};
- my $static_memory = 0;
+ my $static_memory = &$compute_static_mem($conf, $defaults);
+
my $sockets = 1;
$sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
$sockets = $conf->{sockets} if $conf->{sockets};
my $numa_custom_topology = undef;
my $hotplug_features = PVE::QemuServer::parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
- if ($hotplug_features->{memory}) {
- $static_memory = $STATICMEM;
- $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024);
- } else {
- $static_memory = $memory;
- }
-
#custom numa topology
for (my $i = 0; $i < $MAX_NUMA; $i++) {
next if !$conf->{"numa$i"};
diff --git a/PVE/QemuServer/PCI.pm b/PVE/QemuServer/PCI.pm
index 3d0e70e..9b59b91 100644
--- a/PVE/QemuServer/PCI.pm
+++ b/PVE/QemuServer/PCI.pm
@@ -249,6 +249,14 @@ sub get_pci_addr_map {
'scsihw2' => { bus => 4, addr => 1 },
'scsihw3' => { bus => 4, addr => 2 },
'scsihw4' => { bus => 4, addr => 3 },
+ 'virtiomem0' => { bus => 4, addr => 4 },
+ 'virtiomem1' => { bus => 4, addr => 5 },
+ 'virtiomem2' => { bus => 4, addr => 6 },
+ 'virtiomem3' => { bus => 4, addr => 7 },
+ 'virtiomem4' => { bus => 4, addr => 8 },
+ 'virtiomem5' => { bus => 4, addr => 9 },
+ 'virtiomem6' => { bus => 4, addr => 10 },
+ 'virtiomem7' => { bus => 4, addr => 11 },
} if !defined($pci_addr_map);
return $pci_addr_map;
}
--
2.30.2
^ permalink raw reply [flat|nested] 4+ messages in thread
* [pve-devel] [PATCH V2 qemu-server 2/2] tests: add virtio-mem tests
2022-08-24 11:34 [pve-devel] [PATCH V2 qemu-server 0/2] add virtio-mem support Alexandre Derumier
2022-08-24 11:34 ` [pve-devel] [PATCH V2 qemu-server 1/2] " Alexandre Derumier
@ 2022-08-24 11:34 ` Alexandre Derumier
2022-09-23 11:20 ` [pve-devel] [PATCH V2 qemu-server 0/2] add virtio-mem support DERUMIER, Alexandre
2 siblings, 0 replies; 4+ messages in thread
From: Alexandre Derumier @ 2022-08-24 11:34 UTC (permalink / raw)
To: pve-devel
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
---
test/cfg2cmd/simple-virtio-mem-big.conf | 12 +++++
test/cfg2cmd/simple-virtio-mem-big.conf.cmd | 59 +++++++++++++++++++++
test/cfg2cmd/simple-virtio-mem.conf | 13 +++++
test/cfg2cmd/simple-virtio-mem.conf.cmd | 31 +++++++++++
4 files changed, 115 insertions(+)
create mode 100644 test/cfg2cmd/simple-virtio-mem-big.conf
create mode 100644 test/cfg2cmd/simple-virtio-mem-big.conf.cmd
create mode 100644 test/cfg2cmd/simple-virtio-mem.conf
create mode 100644 test/cfg2cmd/simple-virtio-mem.conf.cmd
diff --git a/test/cfg2cmd/simple-virtio-mem-big.conf b/test/cfg2cmd/simple-virtio-mem-big.conf
new file mode 100644
index 0000000..936da4b
--- /dev/null
+++ b/test/cfg2cmd/simple-virtio-mem-big.conf
@@ -0,0 +1,12 @@
+# TEST: virtio-mem with 128GB ram && 8 numa nodes
+maxmemory: 131072
+bootdisk: scsi0
+cores: 1
+memory: 8192
+name: simple
+numa: 1
+ostype: l26
+scsihw: virtio-scsi-pci
+smbios1: uuid=7b10d7af-b932-4c66-b2c3-3996152ec465
+sockets: 8
+vmgenid: c773c261-d800-4348-1010-1010add53cf8
diff --git a/test/cfg2cmd/simple-virtio-mem-big.conf.cmd b/test/cfg2cmd/simple-virtio-mem-big.conf.cmd
new file mode 100644
index 0000000..a09c750
--- /dev/null
+++ b/test/cfg2cmd/simple-virtio-mem-big.conf.cmd
@@ -0,0 +1,59 @@
+/usr/bin/kvm \
+ -id 8006 \
+ -name 'simple,debug-threads=on' \
+ -no-shutdown \
+ -chardev 'socket,id=qmp,path=/var/run/qemu-server/8006.qmp,server=on,wait=off' \
+ -mon 'chardev=qmp,mode=control' \
+ -chardev 'socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5' \
+ -mon 'chardev=qmp-event,mode=control' \
+ -pidfile /var/run/qemu-server/8006.pid \
+ -daemonize \
+ -smbios 'type=1,uuid=7b10d7af-b932-4c66-b2c3-3996152ec465' \
+ -smp '8,sockets=8,cores=1,maxcpus=8' \
+ -nodefaults \
+ -boot 'menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg' \
+ -vnc 'unix:/var/run/qemu-server/8006.vnc,password=on' \
+ -cpu kvm64,enforce,+kvm_pv_eoi,+kvm_pv_unhalt,+lahf_lm,+sep \
+ -m 'size=4096,maxmem=131072M' \
+ -object 'memory-backend-ram,id=ram-node0,size=512M' \
+ -numa 'node,nodeid=0,cpus=0,memdev=ram-node0' \
+ -object 'memory-backend-ram,id=ram-node1,size=512M' \
+ -numa 'node,nodeid=1,cpus=1,memdev=ram-node1' \
+ -object 'memory-backend-ram,id=ram-node2,size=512M' \
+ -numa 'node,nodeid=2,cpus=2,memdev=ram-node2' \
+ -object 'memory-backend-ram,id=ram-node3,size=512M' \
+ -numa 'node,nodeid=3,cpus=3,memdev=ram-node3' \
+ -object 'memory-backend-ram,id=ram-node4,size=512M' \
+ -numa 'node,nodeid=4,cpus=4,memdev=ram-node4' \
+ -object 'memory-backend-ram,id=ram-node5,size=512M' \
+ -numa 'node,nodeid=5,cpus=5,memdev=ram-node5' \
+ -object 'memory-backend-ram,id=ram-node6,size=512M' \
+ -numa 'node,nodeid=6,cpus=6,memdev=ram-node6' \
+ -object 'memory-backend-ram,id=ram-node7,size=512M' \
+ -numa 'node,nodeid=7,cpus=7,memdev=ram-node7' \
+ -object 'memory-backend-ram,id=mem-virtiomem0,size=15872M' \
+ -object 'memory-backend-ram,id=mem-virtiomem1,size=15872M' \
+ -object 'memory-backend-ram,id=mem-virtiomem2,size=15872M' \
+ -object 'memory-backend-ram,id=mem-virtiomem3,size=15872M' \
+ -object 'memory-backend-ram,id=mem-virtiomem4,size=15872M' \
+ -object 'memory-backend-ram,id=mem-virtiomem5,size=15872M' \
+ -object 'memory-backend-ram,id=mem-virtiomem6,size=15872M' \
+ -object 'memory-backend-ram,id=mem-virtiomem7,size=15872M' \
+ -device 'pci-bridge,id=pci.1,chassis_nr=1,bus=pci.0,addr=0x1e' \
+ -device 'pci-bridge,id=pci.2,chassis_nr=2,bus=pci.0,addr=0x1f' \
+ -device 'pci-bridge,id=pci.4,chassis_nr=4,bus=pci.1,addr=0x1c' \
+ -device 'vmgenid,guid=c773c261-d800-4348-1010-1010add53cf8' \
+ -device 'piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2' \
+ -device 'usb-tablet,id=tablet,bus=uhci.0,port=1' \
+ -device 'VGA,id=vga,bus=pci.0,addr=0x2' \
+ -device 'virtio-mem-pci,block-size=4M,requested-size=512M,id=virtiomem0,memdev=mem-virtiomem0,node=0,bus=pci.4,addr=0x4' \
+ -device 'virtio-mem-pci,block-size=4M,requested-size=512M,id=virtiomem1,memdev=mem-virtiomem1,node=1,bus=pci.4,addr=0x5' \
+ -device 'virtio-mem-pci,block-size=4M,requested-size=512M,id=virtiomem2,memdev=mem-virtiomem2,node=2,bus=pci.4,addr=0x6' \
+ -device 'virtio-mem-pci,block-size=4M,requested-size=512M,id=virtiomem3,memdev=mem-virtiomem3,node=3,bus=pci.4,addr=0x7' \
+ -device 'virtio-mem-pci,block-size=4M,requested-size=512M,id=virtiomem4,memdev=mem-virtiomem4,node=4,bus=pci.4,addr=0x8' \
+ -device 'virtio-mem-pci,block-size=4M,requested-size=512M,id=virtiomem5,memdev=mem-virtiomem5,node=5,bus=pci.4,addr=0x9' \
+ -device 'virtio-mem-pci,block-size=4M,requested-size=512M,id=virtiomem6,memdev=mem-virtiomem6,node=6,bus=pci.4,addr=0xa' \
+ -device 'virtio-mem-pci,block-size=4M,requested-size=512M,id=virtiomem7,memdev=mem-virtiomem7,node=7,bus=pci.4,addr=0xb' \
+ -device 'virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3,free-page-reporting=on' \
+ -iscsi 'initiator-name=iqn.1993-08.org.debian:01:aabbccddeeff' \
+ -machine 'type=pc+pve0'
diff --git a/test/cfg2cmd/simple-virtio-mem.conf b/test/cfg2cmd/simple-virtio-mem.conf
new file mode 100644
index 0000000..a8832d9
--- /dev/null
+++ b/test/cfg2cmd/simple-virtio-mem.conf
@@ -0,0 +1,13 @@
+# TEST: Simple test for virtio-mem maxmemory
+bootdisk: scsi0
+cores: 2
+memory: 8192
+name: simple
+numa: 1
+sockets: 1
+maxmemory: 16384
+ostype: l26
+scsihw: virtio-scsi-pci
+smbios1: uuid=7b10d7af-b932-4c66-b2c3-3996152ec465
+sockets: 1
+vmgenid: c773c261-d800-4348-1010-1010add53cf8
diff --git a/test/cfg2cmd/simple-virtio-mem.conf.cmd b/test/cfg2cmd/simple-virtio-mem.conf.cmd
new file mode 100644
index 0000000..36d6a24
--- /dev/null
+++ b/test/cfg2cmd/simple-virtio-mem.conf.cmd
@@ -0,0 +1,31 @@
+/usr/bin/kvm \
+ -id 8006 \
+ -name 'simple,debug-threads=on' \
+ -no-shutdown \
+ -chardev 'socket,id=qmp,path=/var/run/qemu-server/8006.qmp,server=on,wait=off' \
+ -mon 'chardev=qmp,mode=control' \
+ -chardev 'socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5' \
+ -mon 'chardev=qmp-event,mode=control' \
+ -pidfile /var/run/qemu-server/8006.pid \
+ -daemonize \
+ -smbios 'type=1,uuid=7b10d7af-b932-4c66-b2c3-3996152ec465' \
+ -smp '2,sockets=1,cores=2,maxcpus=2' \
+ -nodefaults \
+ -boot 'menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg' \
+ -vnc 'unix:/var/run/qemu-server/8006.vnc,password=on' \
+ -cpu kvm64,enforce,+kvm_pv_eoi,+kvm_pv_unhalt,+lahf_lm,+sep \
+ -m 'size=4096,maxmem=16384M' \
+ -object 'memory-backend-ram,id=ram-node0,size=4096M' \
+ -numa 'node,nodeid=0,cpus=0-1,memdev=ram-node0' \
+ -object 'memory-backend-ram,id=mem-virtiomem0,size=12288M' \
+ -device 'pci-bridge,id=pci.1,chassis_nr=1,bus=pci.0,addr=0x1e' \
+ -device 'pci-bridge,id=pci.2,chassis_nr=2,bus=pci.0,addr=0x1f' \
+ -device 'pci-bridge,id=pci.4,chassis_nr=4,bus=pci.1,addr=0x1c' \
+ -device 'vmgenid,guid=c773c261-d800-4348-1010-1010add53cf8' \
+ -device 'piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2' \
+ -device 'usb-tablet,id=tablet,bus=uhci.0,port=1' \
+ -device 'VGA,id=vga,bus=pci.0,addr=0x2' \
+ -device 'virtio-mem-pci,block-size=2M,requested-size=4096M,id=virtiomem0,memdev=mem-virtiomem0,node=0,bus=pci.4,addr=0x4' \
+ -device 'virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3,free-page-reporting=on' \
+ -iscsi 'initiator-name=iqn.1993-08.org.debian:01:aabbccddeeff' \
+ -machine 'type=pc+pve0'
--
2.30.2
^ permalink raw reply [flat|nested] 4+ messages in thread