all lists on lists.proxmox.com
 help / color / mirror / Atom feed
From: Dominik Csapak <d.csapak@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [PATCH qemu-server 1/2] pci: move mdev related code to own module
Date: Thu,  5 Mar 2026 10:16:54 +0100	[thread overview]
Message-ID: <20260305091711.1221589-11-d.csapak@proxmox.com> (raw)
In-Reply-To: <20260305091711.1221589-1-d.csapak@proxmox.com>

some from PVE::QemuServer::PCI but also from PVE::SysFSTools, since it
makes much more sense to have this here.

Use the current PVE::File module instead of the legacy calls to
PVE::Tools, and modernize the code with perls v5.36 parameter syntax.

While at it change some old uses of SysFSTools generate_mdev_uuid to the
local one.

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
 src/PVE/QemuServer.pm           |   5 +-
 src/PVE/QemuServer/Makefile     |   1 +
 src/PVE/QemuServer/PCI.pm       |  58 +++----------
 src/PVE/QemuServer/PCI/Makefile |   9 ++
 src/PVE/QemuServer/PCI/Mdev.pm  | 145 ++++++++++++++++++++++++++++++++
 5 files changed, 171 insertions(+), 47 deletions(-)
 create mode 100644 src/PVE/QemuServer/PCI/Makefile
 create mode 100644 src/PVE/QemuServer/PCI/Mdev.pm

diff --git a/src/PVE/QemuServer.pm b/src/PVE/QemuServer.pm
index dbcd8841..b407a1ed 100644
--- a/src/PVE/QemuServer.pm
+++ b/src/PVE/QemuServer.pm
@@ -87,6 +87,7 @@ use PVE::QemuServer::Monitor qw(mon_cmd);
 use PVE::QemuServer::Network;
 use PVE::QemuServer::OVMF;
 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
+use PVE::QemuServer::PCI::Mdev;
 use PVE::QemuServer::QemuImage;
 use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel qemu_objectadd qemu_objectdel);
 use PVE::QemuServer::QSD;
@@ -5665,7 +5666,7 @@ sub vm_start_nolock {
                     my $smbios_conf = parse_smbios1($conf->{smbios1});
                     $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
                 }
-                $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index)
+                $uuid = PVE::QemuServer::PCI::Mdev::generate_mdev_uuid($vmid, $index)
                     if !defined($uuid);
             }
         }
@@ -6095,7 +6096,7 @@ sub cleanup_pci_devices {
     foreach my $key (keys %$conf) {
         next if $key !~ m/^hostpci(\d+)$/;
         my $hostpciindex = $1;
-        my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
+        my $uuid = PVE::QemuServer::PCI::Mdev::generate_mdev_uuid($vmid, $hostpciindex);
         my $d = parse_hostpci($conf->{$key});
         if ($d->{mdev}) {
             # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
diff --git a/src/PVE/QemuServer/Makefile b/src/PVE/QemuServer/Makefile
index 7e48c388..821556ef 100644
--- a/src/PVE/QemuServer/Makefile
+++ b/src/PVE/QemuServer/Makefile
@@ -35,3 +35,4 @@ SOURCES=Agent.pm	\
 install: $(SOURCES)
 	for i in $(SOURCES); do install -D -m 0644 $$i $(DESTDIR)$(PERLDIR)/PVE/QemuServer/$$i; done
 	$(MAKE) -C Cfg2Cmd install
+	$(MAKE) -C PCI install
diff --git a/src/PVE/QemuServer/PCI.pm b/src/PVE/QemuServer/PCI.pm
index c9cf8de0..0b67943c 100644
--- a/src/PVE/QemuServer/PCI.pm
+++ b/src/PVE/QemuServer/PCI.pm
@@ -12,6 +12,7 @@ use PVE::Tools;
 
 use PVE::QemuServer::Helpers;
 use PVE::QemuServer::Machine;
+use PVE::QemuServer::PCI::Mdev;
 
 use base 'Exporter';
 
@@ -282,11 +283,6 @@ sub get_pci_addr_map {
     return $pci_addr_map;
 }
 
-sub generate_mdev_uuid {
-    my ($vmid, $index) = @_;
-    return sprintf("%08d-0000-0000-0000-%012d", $index, $vmid);
-}
-
 my $get_addr_mapping_from_id = sub {
     my ($map, $id) = @_;
 
@@ -543,41 +539,6 @@ sub parse_hostpci_devices {
     return $parsed_devices;
 }
 
-# set vgpu type of a vf of an nvidia gpu with kernel 6.8 or newer
-my sub create_nvidia_device {
-    my ($id, $model) = @_;
-
-    $id = PVE::SysFSTools::normalize_pci_id($id);
-
-    my $creation = "/sys/bus/pci/devices/$id/nvidia/current_vgpu_type";
-
-    die "no nvidia sysfs api for '$id'\n" if !-f $creation;
-
-    my $current = PVE::Tools::file_read_firstline($creation);
-    if ($current ne "0") {
-        return 1 if $current eq $model;
-        # reset vgpu type so we can see all available and set the real device
-        die "unable to reset vgpu type for '$id'\n" if !PVE::SysFSTools::file_write($creation, "0");
-    }
-
-    my $types = PVE::SysFSTools::get_mdev_types($id);
-    my $selected;
-    for my $type_definition ($types->@*) {
-        next if $type_definition->{type} ne "nvidia-$model";
-        $selected = $type_definition;
-    }
-
-    if (!defined($selected) || $selected->{available} < 1) {
-        die "vgpu type '$model' not available for '$id'\n";
-    }
-
-    if (!PVE::SysFSTools::file_write($creation, $model)) {
-        die "could not set vgpu type to '$model' for '$id'\n";
-    }
-
-    return 1;
-}
-
 # takes the hash returned by parse_hostpci_devices and for all non mdev gpus,
 # selects one of the given alternatives by trying to reserve it
 #
@@ -612,7 +573,10 @@ sub choose_hostpci_devices {
             $add_used_device->($device->{ids});
             if ($device->{nvidia} && !$dry_run) {
                 reserve_pci_usage($device->{ids}->[0]->{id}, $vmid, 10, undef);
-                create_nvidia_device($device->{ids}->[0]->{id}, $device->{nvidia});
+                PVE::QemuServer::PCI::Mdev::create_nvidia_device(
+                    $device->{ids}->[0]->{id},
+                    $device->{nvidia},
+                );
             }
             next;
         }
@@ -628,7 +592,11 @@ sub choose_hostpci_devices {
             }
 
             if ($device->{nvidia} && !$dry_run) {
-                eval { create_nvidia_device($ids->[0], $device->{nvidia}) };
+                eval {
+                    PVE::QemuServer::PCI::Mdev::create_nvidia_device(
+                        $ids->[0], $device->{nvidia},
+                    );
+                };
                 if (my $err = $@) {
                     warn $err;
                     remove_pci_reservation($vmid, $ids);
@@ -696,7 +664,7 @@ sub print_hostpci_devices {
 
         my $sysfspath;
         if ($d->{mdev}) {
-            my $uuid = generate_mdev_uuid($vmid, $i);
+            my $uuid = PVE::QemuServer::PCI::Mdev::generate_mdev_uuid($vmid, $i);
             $sysfspath = "/sys/bus/mdev/devices/$uuid";
         }
 
@@ -748,8 +716,8 @@ sub prepare_pci_device {
     if ($device->{nvidia} || $driver eq "keep") {
         # nothing to do
     } elsif (my $mdev = $device->{mdev}) {
-        my $uuid = generate_mdev_uuid($vmid, $index);
-        PVE::SysFSTools::pci_create_mdev_device($pciid, $uuid, $mdev);
+        my $uuid = PVE::QemuServer::PCI::Mdev::generate_mdev_uuid($vmid, $index);
+        PVE::QemuServer::PCI::Mdev::pci_create_mdev_device($pciid, $uuid, $mdev);
     } else {
         die "can't unbind/bind PCI group to VFIO '$pciid'\n"
             if !PVE::SysFSTools::pci_dev_group_bind_to_vfio($pciid);
diff --git a/src/PVE/QemuServer/PCI/Makefile b/src/PVE/QemuServer/PCI/Makefile
new file mode 100644
index 00000000..ecf37411
--- /dev/null
+++ b/src/PVE/QemuServer/PCI/Makefile
@@ -0,0 +1,9 @@
+DESTDIR=
+PREFIX=/usr
+PERLDIR=$(PREFIX)/share/perl5
+
+SOURCES=Mdev.pm
+
+.PHONY: install
+install: $(SOURCES)
+	for i in $(SOURCES); do install -D -m 0644 $$i $(DESTDIR)$(PERLDIR)/PVE/QemuServer/PCI/$$i; done
diff --git a/src/PVE/QemuServer/PCI/Mdev.pm b/src/PVE/QemuServer/PCI/Mdev.pm
new file mode 100644
index 00000000..3b42ce2d
--- /dev/null
+++ b/src/PVE/QemuServer/PCI/Mdev.pm
@@ -0,0 +1,145 @@
+package PVE::QemuServer::PCI::Mdev;
+
+use v5.36;
+
+use PVE::SysFSTools;
+use PVE::File qw(file_read_first_line dir_glob_foreach file_get_contents);
+
+my $pcisysfs = "/sys/bus/pci";
+
+sub generate_mdev_uuid($vmid, $index) {
+    return sprintf("%08d-0000-0000-0000-%012d", $index, $vmid);
+}
+
+#
+# return format:
+# [
+#     {
+#         type => 'FooType_1',
+#         description => "a longer description with custom format\nand newlines",
+#         available => 5,
+#     },
+#     ...
+# ]
+#
+sub get_mdev_types($id) {
+    $id = PVE::SysFSTools::normalize_pci_id($id);
+
+    my $types = [];
+
+    my $dev_path = "$pcisysfs/devices/$id";
+    my $mdev_path = "$dev_path/mdev_supported_types";
+    my $nvidia_path = "$dev_path/nvidia/creatable_vgpu_types";
+    if (-d $mdev_path) {
+        dir_glob_foreach(
+            $mdev_path,
+            '[^\.].*',
+            sub {
+                my ($type) = @_;
+
+                my $type_path = "$mdev_path/$type";
+
+                my $available = int(file_read_first_line("$type_path/available_instances"));
+                my $description = file_get_contents("$type_path/description");
+
+                my $entry = {
+                    type => $type,
+                    description => $description,
+                    available => $available,
+                };
+
+                my $name = file_read_first_line("$type_path/name");
+                $entry->{name} = $name if defined($name);
+
+                push @$types, $entry;
+            },
+        );
+    } elsif (-f $nvidia_path) {
+        my $creatable = PVE::Tools::file_get_contents($nvidia_path);
+        for my $line (split("\n", $creatable)) {
+            next if $line =~ m/^ID/; # header
+            next if $line !~ m/^(.*?)\s*:\s*(.*)$/;
+            my $id = $1;
+            my $name = $2;
+
+            push $types->@*, {
+                type => "nvidia-$id", # backwards compatibility
+                description => "", # TODO, read from xml/nvidia-smi ?
+                available => 1,
+                name => $name,
+            };
+        }
+    }
+
+    return $types;
+}
+
+sub pci_create_mdev_device($pciid, $uuid, $type) {
+    $pciid = PVE::SysFSTools::normalize_pci_id($pciid);
+
+    my $basedir = "$pcisysfs/devices/$pciid";
+    my $mdev_dir = "$basedir/mdev_supported_types";
+
+    die "pci device '$pciid' does not support mediated devices \n"
+        if !-d $mdev_dir;
+
+    die "pci device '$pciid' has no type '$type'\n"
+        if !-d "$mdev_dir/$type";
+
+    if (-d "$basedir/$uuid") {
+        # it already exists, checking type
+        my $typelink = readlink("$basedir/$uuid/mdev_type");
+        my ($existingtype) = $typelink =~ m|/([^/]+)$|;
+        die "mdev instance '$uuid' already exists, but type is not '$type'\n"
+            if $type ne $existingtype;
+
+        # instance exists, so use it but warn the user
+        warn "mdev instance '$uuid' already existed, using it.\n";
+        return undef;
+    }
+
+    my $instances = file_read_first_line("$mdev_dir/$type/available_instances");
+    my ($avail) = $instances =~ m/^(\d+)$/;
+    die "pci device '$pciid' has no available instances of '$type'\n"
+        if $avail < 1;
+
+    die "could not create '$type' for pci devices '$pciid'\n"
+        if !PVE::SysFSTools::file_write("$mdev_dir/$type/create", $uuid);
+
+    return undef;
+}
+
+# set vgpu type of a vf of an nvidia gpu with kernel 6.8 or newer
+sub create_nvidia_device($id, $model) {
+    $id = PVE::SysFSTools::normalize_pci_id($id);
+
+    my $creation = "$pcisysfs/devices/$id/nvidia/current_vgpu_type";
+
+    die "no nvidia sysfs api for '$id'\n" if !-f $creation;
+
+    my $current = file_read_first_line($creation);
+    if ($current ne "0") {
+        return 1 if $current eq $model;
+        # reset vgpu type so we can see all available and set the real device
+        die "unable to reset vgpu type for '$id'\n" if !PVE::SysFSTools::file_write($creation, "0");
+    }
+
+    my $types = get_mdev_types($id);
+    my $selected;
+    for my $type_definition ($types->@*) {
+        next if $type_definition->{type} ne "nvidia-$model";
+        $selected = $type_definition;
+    }
+
+    if (!defined($selected) || $selected->{available} < 1) {
+        die "vgpu type '$model' not available for '$id'\n";
+    }
+
+    if (!PVE::SysFSTools::file_write($creation, $model)) {
+        die "could not set vgpu type to '$model' for '$id'\n";
+    }
+
+    return 1;
+}
+
+1;
-- 
2.47.3





  parent reply	other threads:[~2026-03-05  9:16 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-05  9:16 [PATCH common/debcargo-conf/manager/proxmox-perl-rs/qemu-server 00/13] use NVML for vGPU info querying Dominik Csapak
2026-03-05  9:16 ` [PATCH debcargo-conf 1/8] nvml-wrapper-sys: Update to 0.9.0 Dominik Csapak
2026-03-05  9:16 ` [PATCH debcargo-conf 2/8] nvml-wrapper-sys: release 0.9.0-1 Dominik Csapak
2026-03-05  9:16 ` [PATCH debcargo-conf 3/8] nvml-wrapper: Update to 0.11.0 Dominik Csapak
2026-03-05  9:16 ` [PATCH debcargo-conf 4/8] nvml-wrapper: release 0.11.0-1 Dominik Csapak
2026-03-05  9:16 ` [PATCH debcargo-conf 5/8] nvml-wrapper: Update to 0.12.0 Dominik Csapak
2026-03-05  9:16 ` [PATCH debcargo-conf 6/8] nvml-wrapper: add patch for vgpu ids Dominik Csapak
2026-03-05  9:16 ` [PATCH debcargo-conf 7/8] backport nvml-wrapper-sys 0.9.0-1 Dominik Csapak
2026-03-05  9:16 ` [PATCH debcargo-conf 8/8] backport nvml-wrapper 0.12.0-1 Dominik Csapak
2026-03-05  9:16 ` [PATCH proxmox-perl-rs 1/1] pve: add binding for accessing vgpu info Dominik Csapak
2026-03-05  9:16 ` Dominik Csapak [this message]
2026-03-05  9:16 ` [PATCH qemu-server 2/2] pci: mdev: use PVE::RS::NVML for nvidia mdev information Dominik Csapak
2026-03-05  9:16 ` [PATCH manager 1/1] api: hardware: pci: use NVML for querying " Dominik Csapak
2026-03-05  9:16 ` [PATCH common 1/1] sysfs tools: remove moved code Dominik Csapak

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260305091711.1221589-11-d.csapak@proxmox.com \
    --to=d.csapak@proxmox.com \
    --cc=pve-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal