From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <m.frank@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
key-exchange X25519 server-signature RSA-PSS (2048 bits))
(No client certificate requested)
by lists.proxmox.com (Postfix) with ESMTPS id 9044A957A4
for <pve-devel@lists.proxmox.com>; Wed, 18 Jan 2023 14:58:22 +0100 (CET)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
by firstgate.proxmox.com (Proxmox) with ESMTP id 365EB20F03
for <pve-devel@lists.proxmox.com>; Wed, 18 Jan 2023 14:58:22 +0100 (CET)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
[94.136.29.106])
(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
key-exchange X25519 server-signature RSA-PSS (2048 bits))
(No client certificate requested)
by firstgate.proxmox.com (Proxmox) with ESMTPS
for <pve-devel@lists.proxmox.com>; Wed, 18 Jan 2023 14:58:20 +0100 (CET)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 096BF4474A
for <pve-devel@lists.proxmox.com>; Wed, 18 Jan 2023 14:58:20 +0100 (CET)
From: Markus Frank <m.frank@proxmox.com>
To: pve-devel@lists.proxmox.com
Date: Wed, 18 Jan 2023 14:57:57 +0100
Message-Id: <20230118135800.131382-3-m.frank@proxmox.com>
X-Mailer: git-send-email 2.30.2
In-Reply-To: <20230118135800.131382-1-m.frank@proxmox.com>
References: <20230118135800.131382-1-m.frank@proxmox.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results: 0
AWL -0.035 Adjusted score from AWL reputation of From: address
BAYES_00 -1.9 Bayes spam probability is 0 to 1%
KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record
SPF_PASS -0.001 SPF: sender matches SPF record
Subject: [pve-devel] [PATCH qemu-server v5 2/5] feature #3784: Parameter for
guest vIOMMU & machine as property-string
X-BeenThere: pve-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox VE development discussion <pve-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pve-devel>,
<mailto:pve-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pve-devel/>
List-Post: <mailto:pve-devel@lists.proxmox.com>
List-Help: <mailto:pve-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel>,
<mailto:pve-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Wed, 18 Jan 2023 13:58:22 -0000
vIOMMU enables the option to passthrough pci devices to L2 VMs
in L1 VMs via Nested Virtualisation.
QEMU-Parameters:
https://www.qemu.org/docs/master/system/qemu-manpage.html
https://wiki.qemu.org/Features/VT-d
-machine ...,kernel-irqchip=split:
"split" because of intremap see below.
-device intel-iommu:
* caching-mode=on:
"It is required for -device vfio-pci to work with the VT-d device, because host
assigned devices requires to setup the DMA mapping on the host before guest DMA
starts."
* intremap=on:
"This enables interrupt remapping feature. It's required to enable complete
x2apic. Currently it only supports kvm kernel-irqchip modes off or split, while
full kernel-irqchip is not yet supported."
Signed-off-by: Markus Frank <m.frank@proxmox.com>
---
PVE/API2/Qemu.pm | 23 ++++++++++++++---
PVE/QemuConfig.pm | 3 ++-
PVE/QemuServer.pm | 52 ++++++++++++++++++++++++++++++++++++---
PVE/QemuServer/Machine.pm | 6 +++--
4 files changed, 75 insertions(+), 9 deletions(-)
diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
index c87602d..4e089f6 100644
--- a/PVE/API2/Qemu.pm
+++ b/PVE/API2/Qemu.pm
@@ -979,13 +979,21 @@ __PACKAGE__->register_method({
$conf->{vmgenid} = PVE::QemuServer::generate_uuid();
}
- my $machine = $conf->{machine};
+ my $machine_conf = PVE::QemuServer::parse_machine($conf->{machine});
+ my $machine = $machine_conf->{type};
if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
# always pin Windows' machine version on create, they get to easily confused
if (PVE::QemuServer::Helpers::windows_version($conf->{ostype})) {
- $conf->{machine} = PVE::QemuServer::windows_get_pinned_machine_version($machine);
+ $machine_conf->{type} = PVE::QemuServer::windows_get_pinned_machine_version($machine);
+ $conf->{machine} = PVE::QemuServer::print_machine($machine_conf);
}
}
+ my $q35 = $machine_conf->{type} && ($machine_conf->{type} =~ m/q35/) ? 1 : 0;
+ my $kvm = $conf->{kvm};
+ $kvm //= 1 if PVE::QemuServer::is_native($arch);
+ if ($machine_conf->{viommu} && (!$kvm || !$q35)) {
+ die "to use vIOMMU please enable kvm and set the machine type to q35\n";
+ }
PVE::QemuConfig->write_config($vmid, $conf);
@@ -1770,7 +1778,16 @@ my $update_vm_api = sub {
} elsif ($opt eq 'tags') {
assert_tag_permissions($vmid, $conf->{$opt}, $param->{$opt}, $rpcenv, $authuser);
$conf->{pending}->{$opt} = PVE::GuestHelpers::get_unique_tags($param->{$opt});
- } else {
+ } elsif ($opt eq 'machine') {
+ my $machine_conf = PVE::QemuServer::parse_machine($param->{$opt});
+ my $q35 = $machine_conf->{type} && ($machine_conf->{type} =~ m/q35/) ? 1 : 0;
+ my $kvm = $conf->{kvm};
+ $kvm //= 1 if PVE::QemuServer::is_native($arch);
+ if ($machine_conf->{viommu} && (!$kvm || !$q35)) {
+ die "to use vIOMMU please enable kvm and set the machine type to q35\n";
+ }
+ $conf->{pending}->{$opt} = $param->{$opt};
+ }else {
$conf->{pending}->{$opt} = $param->{$opt};
if ($opt eq 'boot') {
diff --git a/PVE/QemuConfig.pm b/PVE/QemuConfig.pm
index 051382c..7c998ef 100644
--- a/PVE/QemuConfig.pm
+++ b/PVE/QemuConfig.pm
@@ -433,7 +433,8 @@ sub __snapshot_rollback_hook {
} else {
# Note: old code did not store 'machine', so we try to be smart
# and guess the snapshot was generated with kvm 1.4 (pc-i440fx-1.4).
- $data->{forcemachine} = $conf->{machine} || 'pc-i440fx-1.4';
+ my $machine_conf = PVE::QemuServer::parse_machine($conf->{machine});
+ $data->{forcemachine} = $machine_conf->{type} || 'pc-i440fx-1.4';
# we remove the 'machine' configuration if not explicitly specified
# in the original config.
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 987908d..55c11d5 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -124,6 +124,19 @@ PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
optional => 1,
});
+my $machine_fmt = {
+ type => get_standard_option('pve-qemu-machine', {
+ default_key => 1,
+ format_description => "pve-qemu-machine-type",
+ }),
+ viommu => {
+ type => 'boolean',
+ description => "enable guest vIOMMU (needs kvm to be enabled and q35 to be set as machine)",
+ default => 0,
+ optional => 1,
+ },
+};
+
# FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
my $nodename_cache;
sub nodename {
@@ -626,7 +639,12 @@ EODESCR
pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
format_description => 'QEMU -cpu parameter'
},
- machine => get_standard_option('pve-qemu-machine'),
+ machine => {
+ description => "Specifies the Qemu machine type.",
+ type => 'string',
+ optional => 1,
+ format => $machine_fmt,
+ },
arch => {
description => "Virtual processor architecture. Defaults to the host.",
optional => 1,
@@ -2134,6 +2152,21 @@ sub parse_watchdog {
return $res;
}
+sub parse_machine {
+ my ($value) = @_;
+
+ return if !$value;
+
+ my $res = eval { parse_property_string($machine_fmt, $value) };
+ die $@ if $@;
+ return $res;
+}
+
+sub print_machine {
+ my ($machine_conf) = @_;
+ return PVE::JSONSchema::print_property_string($machine_conf, $machine_fmt);
+}
+
sub parse_guest_agent {
my ($conf) = @_;
@@ -2205,8 +2238,9 @@ sub qemu_created_version_fixups {
# check if we need to apply some handling for VMs that always use the latest machine version but
# had a machine version transition happen that affected HW such that, e.g., an OS config change
# would be required (we do not want to pin machine version for non-windows OS type)
+ my $machine_conf = parse_machine($conf->{machine});
if (
- (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
+ (!defined($machine_conf->{type}) || $machine_conf->{type} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
&& (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
&& (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
&& min_version($kvmver, 6, 1) # only need to apply the change since 6.1
@@ -3335,7 +3369,8 @@ sub windows_get_pinned_machine_version {
sub get_vm_machine {
my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
- my $machine = $forcemachine || $conf->{machine};
+ my $machine_conf = parse_machine($conf->{machine});
+ my $machine = $forcemachine || $machine_conf->{type};
if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
$kvmversion //= kvm_user_version();
@@ -3575,6 +3610,8 @@ sub config_to_command {
my $kvm = $conf->{kvm};
my $nodename = nodename();
+ my $machine_conf = parse_machine($conf->{machine});
+
my $arch = get_vm_arch($conf);
my $kvm_binary = get_command_for_arch($arch);
my $kvmver = kvm_user_version($kvm_binary);
@@ -4137,6 +4174,15 @@ sub config_to_command {
}
push @$machineFlags, "type=${machine_type_min}";
+ if ($machine_conf->{viommu} && (!$kvm || !$q35)) {
+ die "to use vIOMMU please enable kvm and set the machine type to q35\n";
+ }
+
+ if ($machine_conf->{viommu}) {
+ unshift @$devices, '-device', "intel-iommu,intremap=on,caching-mode=on";
+ push @$machineFlags, 'kernel-irqchip=split';
+ }
+
push @$cmd, @$devices;
push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
diff --git a/PVE/QemuServer/Machine.pm b/PVE/QemuServer/Machine.pm
index d9429ed..bfbde59 100644
--- a/PVE/QemuServer/Machine.pm
+++ b/PVE/QemuServer/Machine.pm
@@ -15,7 +15,8 @@ our $PVE_MACHINE_VERSION = {
sub machine_type_is_q35 {
my ($conf) = @_;
- return $conf->{machine} && ($conf->{machine} =~ m/q35/) ? 1 : 0;
+ my $machine_conf = PVE::QemuServer::parse_machine($conf->{machine});
+ return $machine_conf->{type} && ($machine_conf->{type} =~ m/q35/) ? 1 : 0;
}
sub current_from_query_machines {
@@ -120,7 +121,8 @@ sub qemu_machine_pxe {
my $machine = get_current_qemu_machine($vmid);
- if ($conf->{machine} && $conf->{machine} =~ m/\.pxe$/) {
+ my $machine_conf = PVE::QemuServer::parse_machine($conf->{machine});
+ if ($machine_conf->{type} && $machine_conf->{type} =~ m/\.pxe$/) {
$machine .= '.pxe';
}
--
2.30.2