From: Dominik Csapak <d.csapak@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [PATCH qemu-server 1/2] pci: move mdev related code to own module
Date: Thu, 5 Mar 2026 10:16:54 +0100 [thread overview]
Message-ID: <20260305091711.1221589-11-d.csapak@proxmox.com> (raw)
In-Reply-To: <20260305091711.1221589-1-d.csapak@proxmox.com>
some from PVE::QemuServer::PCI but also from PVE::SysFSTools, since it
makes much more sense to have this here.
Use the current PVE::File module instead of the legacy calls to
PVE::Tools, and modernize the code with perls v5.36 parameter syntax.
While at it change some old uses of SysFSTools generate_mdev_uuid to the
local one.
Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
src/PVE/QemuServer.pm | 5 +-
src/PVE/QemuServer/Makefile | 1 +
src/PVE/QemuServer/PCI.pm | 58 +++----------
src/PVE/QemuServer/PCI/Makefile | 9 ++
src/PVE/QemuServer/PCI/Mdev.pm | 145 ++++++++++++++++++++++++++++++++
5 files changed, 171 insertions(+), 47 deletions(-)
create mode 100644 src/PVE/QemuServer/PCI/Makefile
create mode 100644 src/PVE/QemuServer/PCI/Mdev.pm
diff --git a/src/PVE/QemuServer.pm b/src/PVE/QemuServer.pm
index dbcd8841..b407a1ed 100644
--- a/src/PVE/QemuServer.pm
+++ b/src/PVE/QemuServer.pm
@@ -87,6 +87,7 @@ use PVE::QemuServer::Monitor qw(mon_cmd);
use PVE::QemuServer::Network;
use PVE::QemuServer::OVMF;
use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
+use PVE::QemuServer::PCI::Mdev;
use PVE::QemuServer::QemuImage;
use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel qemu_objectadd qemu_objectdel);
use PVE::QemuServer::QSD;
@@ -5665,7 +5666,7 @@ sub vm_start_nolock {
my $smbios_conf = parse_smbios1($conf->{smbios1});
$uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
}
- $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index)
+ $uuid = PVE::QemuServer::PCI::Mdev::generate_mdev_uuid($vmid, $index)
if !defined($uuid);
}
}
@@ -6095,7 +6096,7 @@ sub cleanup_pci_devices {
foreach my $key (keys %$conf) {
next if $key !~ m/^hostpci(\d+)$/;
my $hostpciindex = $1;
- my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
+ my $uuid = PVE::QemuServer::PCI::Mdev::generate_mdev_uuid($vmid, $hostpciindex);
my $d = parse_hostpci($conf->{$key});
if ($d->{mdev}) {
# NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
diff --git a/src/PVE/QemuServer/Makefile b/src/PVE/QemuServer/Makefile
index 7e48c388..821556ef 100644
--- a/src/PVE/QemuServer/Makefile
+++ b/src/PVE/QemuServer/Makefile
@@ -35,3 +35,4 @@ SOURCES=Agent.pm \
install: $(SOURCES)
for i in $(SOURCES); do install -D -m 0644 $$i $(DESTDIR)$(PERLDIR)/PVE/QemuServer/$$i; done
$(MAKE) -C Cfg2Cmd install
+ $(MAKE) -C PCI install
diff --git a/src/PVE/QemuServer/PCI.pm b/src/PVE/QemuServer/PCI.pm
index c9cf8de0..0b67943c 100644
--- a/src/PVE/QemuServer/PCI.pm
+++ b/src/PVE/QemuServer/PCI.pm
@@ -12,6 +12,7 @@ use PVE::Tools;
use PVE::QemuServer::Helpers;
use PVE::QemuServer::Machine;
+use PVE::QemuServer::PCI::Mdev;
use base 'Exporter';
@@ -282,11 +283,6 @@ sub get_pci_addr_map {
return $pci_addr_map;
}
-sub generate_mdev_uuid {
- my ($vmid, $index) = @_;
- return sprintf("%08d-0000-0000-0000-%012d", $index, $vmid);
-}
-
my $get_addr_mapping_from_id = sub {
my ($map, $id) = @_;
@@ -543,41 +539,6 @@ sub parse_hostpci_devices {
return $parsed_devices;
}
-# set vgpu type of a vf of an nvidia gpu with kernel 6.8 or newer
-my sub create_nvidia_device {
- my ($id, $model) = @_;
-
- $id = PVE::SysFSTools::normalize_pci_id($id);
-
- my $creation = "/sys/bus/pci/devices/$id/nvidia/current_vgpu_type";
-
- die "no nvidia sysfs api for '$id'\n" if !-f $creation;
-
- my $current = PVE::Tools::file_read_firstline($creation);
- if ($current ne "0") {
- return 1 if $current eq $model;
- # reset vgpu type so we can see all available and set the real device
- die "unable to reset vgpu type for '$id'\n" if !PVE::SysFSTools::file_write($creation, "0");
- }
-
- my $types = PVE::SysFSTools::get_mdev_types($id);
- my $selected;
- for my $type_definition ($types->@*) {
- next if $type_definition->{type} ne "nvidia-$model";
- $selected = $type_definition;
- }
-
- if (!defined($selected) || $selected->{available} < 1) {
- die "vgpu type '$model' not available for '$id'\n";
- }
-
- if (!PVE::SysFSTools::file_write($creation, $model)) {
- die "could not set vgpu type to '$model' for '$id'\n";
- }
-
- return 1;
-}
-
# takes the hash returned by parse_hostpci_devices and for all non mdev gpus,
# selects one of the given alternatives by trying to reserve it
#
@@ -612,7 +573,10 @@ sub choose_hostpci_devices {
$add_used_device->($device->{ids});
if ($device->{nvidia} && !$dry_run) {
reserve_pci_usage($device->{ids}->[0]->{id}, $vmid, 10, undef);
- create_nvidia_device($device->{ids}->[0]->{id}, $device->{nvidia});
+ PVE::QemuServer::PCI::Mdev::create_nvidia_device(
+ $device->{ids}->[0]->{id},
+ $device->{nvidia},
+ );
}
next;
}
@@ -628,7 +592,11 @@ sub choose_hostpci_devices {
}
if ($device->{nvidia} && !$dry_run) {
- eval { create_nvidia_device($ids->[0], $device->{nvidia}) };
+ eval {
+ PVE::QemuServer::PCI::Mdev::create_nvidia_device(
+ $ids->[0], $device->{nvidia},
+ );
+ };
if (my $err = $@) {
warn $err;
remove_pci_reservation($vmid, $ids);
@@ -696,7 +664,7 @@ sub print_hostpci_devices {
my $sysfspath;
if ($d->{mdev}) {
- my $uuid = generate_mdev_uuid($vmid, $i);
+ my $uuid = PVE::QemuServer::PCI::Mdev::generate_mdev_uuid($vmid, $i);
$sysfspath = "/sys/bus/mdev/devices/$uuid";
}
@@ -748,8 +716,8 @@ sub prepare_pci_device {
if ($device->{nvidia} || $driver eq "keep") {
# nothing to do
} elsif (my $mdev = $device->{mdev}) {
- my $uuid = generate_mdev_uuid($vmid, $index);
- PVE::SysFSTools::pci_create_mdev_device($pciid, $uuid, $mdev);
+ my $uuid = PVE::QemuServer::PCI::Mdev::generate_mdev_uuid($vmid, $index);
+ PVE::QemuServer::PCI::Mdev::pci_create_mdev_device($pciid, $uuid, $mdev);
} else {
die "can't unbind/bind PCI group to VFIO '$pciid'\n"
if !PVE::SysFSTools::pci_dev_group_bind_to_vfio($pciid);
diff --git a/src/PVE/QemuServer/PCI/Makefile b/src/PVE/QemuServer/PCI/Makefile
new file mode 100644
index 00000000..ecf37411
--- /dev/null
+++ b/src/PVE/QemuServer/PCI/Makefile
@@ -0,0 +1,9 @@
+DESTDIR=
+PREFIX=/usr
+PERLDIR=$(PREFIX)/share/perl5
+
+SOURCES=Mdev.pm
+
+.PHONY: install
+install: $(SOURCES)
+ for i in $(SOURCES); do install -D -m 0644 $$i $(DESTDIR)$(PERLDIR)/PVE/QemuServer/PCI/$$i; done
diff --git a/src/PVE/QemuServer/PCI/Mdev.pm b/src/PVE/QemuServer/PCI/Mdev.pm
new file mode 100644
index 00000000..3b42ce2d
--- /dev/null
+++ b/src/PVE/QemuServer/PCI/Mdev.pm
@@ -0,0 +1,145 @@
+package PVE::QemuServer::PCI::Mdev;
+
+use v5.36;
+
+use PVE::SysFSTools;
+use PVE::File qw(file_read_first_line dir_glob_foreach file_get_contents);
+
+my $pcisysfs = "/sys/bus/pci";
+
+sub generate_mdev_uuid($vmid, $index) {
+ return sprintf("%08d-0000-0000-0000-%012d", $index, $vmid);
+}
+
+#
+# return format:
+# [
+# {
+# type => 'FooType_1',
+# description => "a longer description with custom format\nand newlines",
+# available => 5,
+# },
+# ...
+# ]
+#
+sub get_mdev_types($id) {
+ $id = PVE::SysFSTools::normalize_pci_id($id);
+
+ my $types = [];
+
+ my $dev_path = "$pcisysfs/devices/$id";
+ my $mdev_path = "$dev_path/mdev_supported_types";
+ my $nvidia_path = "$dev_path/nvidia/creatable_vgpu_types";
+ if (-d $mdev_path) {
+ dir_glob_foreach(
+ $mdev_path,
+ '[^\.].*',
+ sub {
+ my ($type) = @_;
+
+ my $type_path = "$mdev_path/$type";
+
+ my $available = int(file_read_first_line("$type_path/available_instances"));
+ my $description = file_get_contents("$type_path/description");
+
+ my $entry = {
+ type => $type,
+ description => $description,
+ available => $available,
+ };
+
+ my $name = file_read_first_line("$type_path/name");
+ $entry->{name} = $name if defined($name);
+
+ push @$types, $entry;
+ },
+ );
+ } elsif (-f $nvidia_path) {
+ my $creatable = PVE::Tools::file_get_contents($nvidia_path);
+ for my $line (split("\n", $creatable)) {
+ next if $line =~ m/^ID/; # header
+ next if $line !~ m/^(.*?)\s*:\s*(.*)$/;
+ my $id = $1;
+ my $name = $2;
+
+ push $types->@*, {
+ type => "nvidia-$id", # backwards compatibility
+ description => "", # TODO, read from xml/nvidia-smi ?
+ available => 1,
+ name => $name,
+ };
+ }
+ }
+
+ return $types;
+}
+
+sub pci_create_mdev_device($pciid, $uuid, $type) {
+ $pciid = PVE::SysFSTools::normalize_pci_id($pciid);
+
+ my $basedir = "$pcisysfs/devices/$pciid";
+ my $mdev_dir = "$basedir/mdev_supported_types";
+
+ die "pci device '$pciid' does not support mediated devices \n"
+ if !-d $mdev_dir;
+
+ die "pci device '$pciid' has no type '$type'\n"
+ if !-d "$mdev_dir/$type";
+
+ if (-d "$basedir/$uuid") {
+ # it already exists, checking type
+ my $typelink = readlink("$basedir/$uuid/mdev_type");
+ my ($existingtype) = $typelink =~ m|/([^/]+)$|;
+ die "mdev instance '$uuid' already exists, but type is not '$type'\n"
+ if $type ne $existingtype;
+
+ # instance exists, so use it but warn the user
+ warn "mdev instance '$uuid' already existed, using it.\n";
+ return undef;
+ }
+
+ my $instances = file_read_first_line("$mdev_dir/$type/available_instances");
+ my ($avail) = $instances =~ m/^(\d+)$/;
+ die "pci device '$pciid' has no available instances of '$type'\n"
+ if $avail < 1;
+
+ die "could not create '$type' for pci devices '$pciid'\n"
+ if !PVE::SysFSTools::file_write("$mdev_dir/$type/create", $uuid);
+
+ return undef;
+}
+
+# set vgpu type of a vf of an nvidia gpu with kernel 6.8 or newer
+sub create_nvidia_device($id, $model) {
+ $id = PVE::SysFSTools::normalize_pci_id($id);
+
+ my $creation = "$pcisysfs/devices/$id/nvidia/current_vgpu_type";
+
+ die "no nvidia sysfs api for '$id'\n" if !-f $creation;
+
+ my $current = file_read_first_line($creation);
+ if ($current ne "0") {
+ return 1 if $current eq $model;
+ # reset vgpu type so we can see all available and set the real device
+ die "unable to reset vgpu type for '$id'\n" if !PVE::SysFSTools::file_write($creation, "0");
+ }
+
+ my $types = get_mdev_types($id);
+ my $selected;
+ for my $type_definition ($types->@*) {
+ next if $type_definition->{type} ne "nvidia-$model";
+ $selected = $type_definition;
+ }
+
+ if (!defined($selected) || $selected->{available} < 1) {
+ die "vgpu type '$model' not available for '$id'\n";
+ }
+
+ if (!PVE::SysFSTools::file_write($creation, $model)) {
+ die "could not set vgpu type to '$model' for '$id'\n";
+ }
+
+ return 1;
+}
+
+1;
--
2.47.3
next prev parent reply other threads:[~2026-03-05 9:16 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-05 9:16 [PATCH common/debcargo-conf/manager/proxmox-perl-rs/qemu-server 00/13] use NVML for vGPU info querying Dominik Csapak
2026-03-05 9:16 ` [PATCH debcargo-conf 1/8] nvml-wrapper-sys: Update to 0.9.0 Dominik Csapak
2026-03-05 9:16 ` [PATCH debcargo-conf 2/8] nvml-wrapper-sys: release 0.9.0-1 Dominik Csapak
2026-03-05 9:16 ` [PATCH debcargo-conf 3/8] nvml-wrapper: Update to 0.11.0 Dominik Csapak
2026-03-05 9:16 ` [PATCH debcargo-conf 4/8] nvml-wrapper: release 0.11.0-1 Dominik Csapak
2026-03-05 9:16 ` [PATCH debcargo-conf 5/8] nvml-wrapper: Update to 0.12.0 Dominik Csapak
2026-03-05 9:16 ` [PATCH debcargo-conf 6/8] nvml-wrapper: add patch for vgpu ids Dominik Csapak
2026-03-05 9:16 ` [PATCH debcargo-conf 7/8] backport nvml-wrapper-sys 0.9.0-1 Dominik Csapak
2026-03-05 9:16 ` [PATCH debcargo-conf 8/8] backport nvml-wrapper 0.12.0-1 Dominik Csapak
2026-03-05 9:16 ` [PATCH proxmox-perl-rs 1/1] pve: add binding for accessing vgpu info Dominik Csapak
2026-03-05 9:16 ` Dominik Csapak [this message]
2026-03-05 9:16 ` [PATCH qemu-server 2/2] pci: mdev: use PVE::RS::NVML for nvidia mdev information Dominik Csapak
2026-03-05 9:16 ` [PATCH manager 1/1] api: hardware: pci: use NVML for querying " Dominik Csapak
2026-03-05 9:16 ` [PATCH common 1/1] sysfs tools: remove moved code Dominik Csapak
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260305091711.1221589-11-d.csapak@proxmox.com \
--to=d.csapak@proxmox.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox