From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id AE37D1FF13C for ; Thu, 25 Jun 2026 14:53:56 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 6AA82112B5; Thu, 25 Jun 2026 14:53:56 +0200 (CEST) Message-ID: <8074a513-a9b1-4d54-a435-22f039145a7f@proxmox.com> Date: Thu, 25 Jun 2026 14:53:21 +0200 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird Subject: Re: [pve-devel] [PATCH qemu-server] pci: try to detect large memory region preconditions To: Proxmox VE development discussion , Dominik Csapak References: <20260119142319.2631206-1-d.csapak@proxmox.com> Content-Language: en-US From: Fiona Ebner In-Reply-To: <20260119142319.2631206-1-d.csapak@proxmox.com> Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1782391997215 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.009 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: 7ZHYE7JCWQDYKR3M2UW6RCSGE4K2EFHW X-Message-ID-Hash: 7ZHYE7JCWQDYKR3M2UW6RCSGE4K2EFHW X-MailFrom: f.ebner@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox VE development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Could use a "close #7711" prefix now ;) Needs a (small) rebase, more comments below: Am 19.01.26 um 3:22 PM schrieb Dominik Csapak: > Also I'd like to move the section that exlains this, from the wiki[0] > to our reference docs. If nobody objects, I'd send a patch for that. > > 0: https://pve.proxmox.com/wiki/PCI_Passthrough#%22BAR0_is_0M%22_error_or_Windows_Code_12_Error Sounds good to me. > @@ -5652,6 +5655,13 @@ sub vm_start_nolock { > } > } > push @$cmd, '-uuid', $uuid if defined($uuid); > + > + if ($warn_mmio_size) { > + log_warn("A PCI device with a large memory region detected (e.g. VRAM), but VM" > + . " is not configured for a big enough MMIO size for OMVF. Consider enabling CPU" > + . " type 'host' or setting 'phys-bits' and possibly adding the 'pdpe1gb' flag." Can we also suggest which value is required for 'phys-bits' while at it? Would simply require tracking the largest size. And we know that we additionally need the pdpe1gb flag if (and only if) > 40. Like that, the warning could be more precise and better guide the user. > + ); > + } > }; > if (my $err = $@) { > eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); }; > diff --git a/src/PVE/QemuServer/PCI.pm b/src/PVE/QemuServer/PCI.pm > index c9cf8de0..265b3a51 100644 > --- a/src/PVE/QemuServer/PCI.pm > +++ b/src/PVE/QemuServer/PCI.pm > @@ -10,9 +10,13 @@ use PVE::Mapping::PCI; > use PVE::SysFSTools; > use PVE::Tools; > > +use PVE::QemuServer::CPUConfig; > use PVE::QemuServer::Helpers; > use PVE::QemuServer::Machine; > > +# avoid warning when parsing long hex values with hex() > +no warnings 'portable'; # Support for 64-bit ints required Please limit this in scope to the code block where it is required. > + > use base 'Exporter'; > > our @EXPORT_OK = qw( > @@ -903,4 +907,82 @@ sub reserve_pci_usage { > die $@ if $@; > } > > +# Returns the size of biggest memory region for a PCI device in bytes > +# This can be used to check if the config is correct for having an MMIO size that is large enough. > +sub get_biggest_memory_region { Nit: Could be private. > + my ($pciid) = @_; Style nit: I'd find $pci_id more readable. Same for is_mmio_size_ok() below. > + > + $pciid = PVE::SysFSTools::normalize_pci_id($pciid); > + > + # read resource regions from sysfs > + my $resource_file = "/sys/bus/pci/devices/$pciid/resource"; > + my $regions = PVE::Tools::file_get_contents($resource_file); > + > + # for each line parse start/end/flags. > + my $size = 0; Nit: I'd prefer calling this ${biggest,max}_size and the inner one $size. > + for my $line (split('\n', $regions)) { > + if ($line =~ m/^0x([a-f0-9]{16})\s0x([a-f0-9]{16})\s0x([a-f0-9]{16})$/) { > + my $start = hex($1); > + my $end = hex($2); > + my $flags = hex($3); > + > + # find largest memory region with 'IORESOURCE_MEM' flag (see include/linux/ioport.h in kernel source) > + if (($flags & 0x200) != 0) { > + my $cur_size = $end - $start + 1; Maybe add a short comment that the end is inclusive and that's why the +1 is required. > + if ($cur_size > ($size // 0)) { Nit: No need to fallback for $size, you already initialized it to 0 above. > + $size = $cur_size; > + } > + } > + } > + } > + > + return $size; > +} > + > +# returns 1 if the vm is configured so that the MMIO size is enough. > +# For PCI devices with memory regions >= 16G, the vm either has to: > +# * boot with seabios > +# * use 'host' type cpu > +# * use high enough 'phys-bits' value (or 'host') and (possibly) 'pdpe1gb' > +# > +# return 0 if vm config does not have either > +sub is_mmio_size_ok { > + my ($conf, $pciid) = @_; > + > + my $size = get_biggest_memory_region($pciid); > + > + return 1 if $size <= 16 * 1024 * 1024 * 1024; > + > + return 1 if ($conf->{bios} // 'seabios') eq 'seabios'; > + > + return 0 if !defined($conf->{cpu}); > + > + my $cpu = PVE::JSONSchema::parse_property_string('pve-vm-cpu-conf', $conf->{cpu}) > + or die "Cannot parse cpu description: $conf->{cpu}\n"; > + > + return 1 if ($cpu->{cputype} // '') eq 'host'; > + > + if (my $phys_bits = $cpu->{'phys-bits'}) { > + return 1 if $phys_bits eq 'host'; > + # if it's not 'host' it must be a number between 8 and 64 > + > + # edk2 limits the phys bits to 40 in case of no 1gb pages > + # and limits the mmio space to a quarter of the overall space. > + # > + # see edk2 source code: OvmfPkg/Library/PlatformInitLib/MemDetect.c > + > + if (($cpu->{flags} // '') !~ m/\+pdpe1gb/) { Should we limit the check to qemu64 and kvm64 models like in check_phys_bits_above_40_compat()? To be precise, we would need the CPU model expansion, but that is not cheap to get right now. Since models of most (and in particular all modern) CPU's have the pdpe1gb flag already, the check here would lead to false positives. > + $phys_bits = 40 if $phys_bits > 40; > + } > + > + my $mmio_bits = $phys_bits - 3; It's mmio_bytes? > + > + my $mmio_space = 2**$mmio_bits; > + > + return 1 if $mmio_space > $size; > + } > + > + return 0; > +} > + > 1; > diff --git a/src/test/Makefile b/src/test/Makefile > index 2ef9073a..1a5aaa95 100644 > --- a/src/test/Makefile > +++ b/src/test/Makefile > @@ -15,6 +15,9 @@ test_qemu_img_convert: run_qemu_img_convert_tests.pl > test_pci_addr_conflicts: run_pci_addr_checks.pl > ./run_pci_addr_checks.pl > > +test_pci_memory_detection: run_pci_memory_detection_tests.pl > + ./run_pci_memory_detection_tests.pl > + > test_pci_reservation: run_pci_reservation_tests.pl > ./run_pci_reservation_tests.pl > > diff --git a/src/test/run_pci_memory_detection_tests.pl b/src/test/run_pci_memory_detection_tests.pl > new file mode 100755 > index 00000000..7b30ab7e > --- /dev/null > +++ b/src/test/run_pci_memory_detection_tests.pl > @@ -0,0 +1,158 @@ > +#!/usr/bin/perl > + > +use strict; > +use warnings; Please use v5.36 and signatures for new modules. > + > +use lib qw(..); > + > +use JSON; > +use Test::More; > +use Test::MockModule; > + > +use PVE::JSONSchema; > +use PVE::QemuServer::CPUConfig; > +use PVE::QemuServer::PCI; > + > +my $tools_module; > +$tools_module = Test::MockModule->new('PVE::Tools'); > +$tools_module->mock( > + 'file_get_contents' => sub { > + my ($path) = @_; > + > + if ($path =~ m/01:00.0/) { > + # 0 B region > + return < +0x0000000000000000 0x0000000000000000 0x0000000000000000 > +EOF > + } elsif ($path =~ m/01:01.0/) { > + # 16 G region > + return < +0x0000017000000000 0x00000173ffffffff 0x000000000014220c > +EOF > + } elsif ($path =~ m/02:00.0/) { > + # 32 G region > + return < +0x0000017000000000 0x00000177ffffffff 0x000000000014220c > +EOF > + } elsif ($path =~ m/03:00.0/) { > + # 512G region > + return < +0x0000000000000000 0x0000008000000000 0x0000000000000200 It's 512G + 1 ;) > +EOF > + } > + }, > +); > +