public inbox for pve-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Dominik Csapak <d.csapak@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [PATCH qemu-server 1/2] pci: move mdev related code to own module
Date: Thu,  5 Mar 2026 10:16:54 +0100	[thread overview]
Message-ID: <20260305091711.1221589-11-d.csapak@proxmox.com> (raw)
In-Reply-To: <20260305091711.1221589-1-d.csapak@proxmox.com>

some from PVE::QemuServer::PCI but also from PVE::SysFSTools, since it
makes much more sense to have this here.

Use the current PVE::File module instead of the legacy calls to
PVE::Tools, and modernize the code with perls v5.36 parameter syntax.

While at it change some old uses of SysFSTools generate_mdev_uuid to the
local one.

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
 src/PVE/QemuServer.pm           |   5 +-
 src/PVE/QemuServer/Makefile     |   1 +
 src/PVE/QemuServer/PCI.pm       |  58 +++----------
 src/PVE/QemuServer/PCI/Makefile |   9 ++
 src/PVE/QemuServer/PCI/Mdev.pm  | 145 ++++++++++++++++++++++++++++++++
 5 files changed, 171 insertions(+), 47 deletions(-)
 create mode 100644 src/PVE/QemuServer/PCI/Makefile
 create mode 100644 src/PVE/QemuServer/PCI/Mdev.pm

diff --git a/src/PVE/QemuServer.pm b/src/PVE/QemuServer.pm
index dbcd8841..b407a1ed 100644
--- a/src/PVE/QemuServer.pm
+++ b/src/PVE/QemuServer.pm
@@ -87,6 +87,7 @@ use PVE::QemuServer::Monitor qw(mon_cmd);
 use PVE::QemuServer::Network;
 use PVE::QemuServer::OVMF;
 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
+use PVE::QemuServer::PCI::Mdev;
 use PVE::QemuServer::QemuImage;
 use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel qemu_objectadd qemu_objectdel);
 use PVE::QemuServer::QSD;
@@ -5665,7 +5666,7 @@ sub vm_start_nolock {
                     my $smbios_conf = parse_smbios1($conf->{smbios1});
                     $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
                 }
-                $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index)
+                $uuid = PVE::QemuServer::PCI::Mdev::generate_mdev_uuid($vmid, $index)
                     if !defined($uuid);
             }
         }
@@ -6095,7 +6096,7 @@ sub cleanup_pci_devices {
     foreach my $key (keys %$conf) {
         next if $key !~ m/^hostpci(\d+)$/;
         my $hostpciindex = $1;
-        my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
+        my $uuid = PVE::QemuServer::PCI::Mdev::generate_mdev_uuid($vmid, $hostpciindex);
         my $d = parse_hostpci($conf->{$key});
         if ($d->{mdev}) {
             # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
diff --git a/src/PVE/QemuServer/Makefile b/src/PVE/QemuServer/Makefile
index 7e48c388..821556ef 100644
--- a/src/PVE/QemuServer/Makefile
+++ b/src/PVE/QemuServer/Makefile
@@ -35,3 +35,4 @@ SOURCES=Agent.pm	\
 install: $(SOURCES)
 	for i in $(SOURCES); do install -D -m 0644 $$i $(DESTDIR)$(PERLDIR)/PVE/QemuServer/$$i; done
 	$(MAKE) -C Cfg2Cmd install
+	$(MAKE) -C PCI install
diff --git a/src/PVE/QemuServer/PCI.pm b/src/PVE/QemuServer/PCI.pm
index c9cf8de0..0b67943c 100644
--- a/src/PVE/QemuServer/PCI.pm
+++ b/src/PVE/QemuServer/PCI.pm
@@ -12,6 +12,7 @@ use PVE::Tools;
 
 use PVE::QemuServer::Helpers;
 use PVE::QemuServer::Machine;
+use PVE::QemuServer::PCI::Mdev;
 
 use base 'Exporter';
 
@@ -282,11 +283,6 @@ sub get_pci_addr_map {
     return $pci_addr_map;
 }
 
-sub generate_mdev_uuid {
-    my ($vmid, $index) = @_;
-    return sprintf("%08d-0000-0000-0000-%012d", $index, $vmid);
-}
-
 my $get_addr_mapping_from_id = sub {
     my ($map, $id) = @_;
 
@@ -543,41 +539,6 @@ sub parse_hostpci_devices {
     return $parsed_devices;
 }
 
-# set vgpu type of a vf of an nvidia gpu with kernel 6.8 or newer
-my sub create_nvidia_device {
-    my ($id, $model) = @_;
-
-    $id = PVE::SysFSTools::normalize_pci_id($id);
-
-    my $creation = "/sys/bus/pci/devices/$id/nvidia/current_vgpu_type";
-
-    die "no nvidia sysfs api for '$id'\n" if !-f $creation;
-
-    my $current = PVE::Tools::file_read_firstline($creation);
-    if ($current ne "0") {
-        return 1 if $current eq $model;
-        # reset vgpu type so we can see all available and set the real device
-        die "unable to reset vgpu type for '$id'\n" if !PVE::SysFSTools::file_write($creation, "0");
-    }
-
-    my $types = PVE::SysFSTools::get_mdev_types($id);
-    my $selected;
-    for my $type_definition ($types->@*) {
-        next if $type_definition->{type} ne "nvidia-$model";
-        $selected = $type_definition;
-    }
-
-    if (!defined($selected) || $selected->{available} < 1) {
-        die "vgpu type '$model' not available for '$id'\n";
-    }
-
-    if (!PVE::SysFSTools::file_write($creation, $model)) {
-        die "could not set vgpu type to '$model' for '$id'\n";
-    }
-
-    return 1;
-}
-
 # takes the hash returned by parse_hostpci_devices and for all non mdev gpus,
 # selects one of the given alternatives by trying to reserve it
 #
@@ -612,7 +573,10 @@ sub choose_hostpci_devices {
             $add_used_device->($device->{ids});
             if ($device->{nvidia} && !$dry_run) {
                 reserve_pci_usage($device->{ids}->[0]->{id}, $vmid, 10, undef);
-                create_nvidia_device($device->{ids}->[0]->{id}, $device->{nvidia});
+                PVE::QemuServer::PCI::Mdev::create_nvidia_device(
+                    $device->{ids}->[0]->{id},
+                    $device->{nvidia},
+                );
             }
             next;
         }
@@ -628,7 +592,11 @@ sub choose_hostpci_devices {
             }
 
             if ($device->{nvidia} && !$dry_run) {
-                eval { create_nvidia_device($ids->[0], $device->{nvidia}) };
+                eval {
+                    PVE::QemuServer::PCI::Mdev::create_nvidia_device(
+                        $ids->[0], $device->{nvidia},
+                    );
+                };
                 if (my $err = $@) {
                     warn $err;
                     remove_pci_reservation($vmid, $ids);
@@ -696,7 +664,7 @@ sub print_hostpci_devices {
 
         my $sysfspath;
         if ($d->{mdev}) {
-            my $uuid = generate_mdev_uuid($vmid, $i);
+            my $uuid = PVE::QemuServer::PCI::Mdev::generate_mdev_uuid($vmid, $i);
             $sysfspath = "/sys/bus/mdev/devices/$uuid";
         }
 
@@ -748,8 +716,8 @@ sub prepare_pci_device {
     if ($device->{nvidia} || $driver eq "keep") {
         # nothing to do
     } elsif (my $mdev = $device->{mdev}) {
-        my $uuid = generate_mdev_uuid($vmid, $index);
-        PVE::SysFSTools::pci_create_mdev_device($pciid, $uuid, $mdev);
+        my $uuid = PVE::QemuServer::PCI::Mdev::generate_mdev_uuid($vmid, $index);
+        PVE::QemuServer::PCI::Mdev::pci_create_mdev_device($pciid, $uuid, $mdev);
     } else {
         die "can't unbind/bind PCI group to VFIO '$pciid'\n"
             if !PVE::SysFSTools::pci_dev_group_bind_to_vfio($pciid);
diff --git a/src/PVE/QemuServer/PCI/Makefile b/src/PVE/QemuServer/PCI/Makefile
new file mode 100644
index 00000000..ecf37411
--- /dev/null
+++ b/src/PVE/QemuServer/PCI/Makefile
@@ -0,0 +1,9 @@
+DESTDIR=
+PREFIX=/usr
+PERLDIR=$(PREFIX)/share/perl5
+
+SOURCES=Mdev.pm
+
+.PHONY: install
+install: $(SOURCES)
+	for i in $(SOURCES); do install -D -m 0644 $$i $(DESTDIR)$(PERLDIR)/PVE/QemuServer/PCI/$$i; done
diff --git a/src/PVE/QemuServer/PCI/Mdev.pm b/src/PVE/QemuServer/PCI/Mdev.pm
new file mode 100644
index 00000000..3b42ce2d
--- /dev/null
+++ b/src/PVE/QemuServer/PCI/Mdev.pm
@@ -0,0 +1,145 @@
+package PVE::QemuServer::PCI::Mdev;
+
+use v5.36;
+
+use PVE::SysFSTools;
+use PVE::File qw(file_read_first_line dir_glob_foreach file_get_contents);
+
+my $pcisysfs = "/sys/bus/pci";
+
+sub generate_mdev_uuid($vmid, $index) {
+    return sprintf("%08d-0000-0000-0000-%012d", $index, $vmid);
+}
+
+#
+# return format:
+# [
+#     {
+#         type => 'FooType_1',
+#         description => "a longer description with custom format\nand newlines",
+#         available => 5,
+#     },
+#     ...
+# ]
+#
+sub get_mdev_types($id) {
+    $id = PVE::SysFSTools::normalize_pci_id($id);
+
+    my $types = [];
+
+    my $dev_path = "$pcisysfs/devices/$id";
+    my $mdev_path = "$dev_path/mdev_supported_types";
+    my $nvidia_path = "$dev_path/nvidia/creatable_vgpu_types";
+    if (-d $mdev_path) {
+        dir_glob_foreach(
+            $mdev_path,
+            '[^\.].*',
+            sub {
+                my ($type) = @_;
+
+                my $type_path = "$mdev_path/$type";
+
+                my $available = int(file_read_first_line("$type_path/available_instances"));
+                my $description = file_get_contents("$type_path/description");
+
+                my $entry = {
+                    type => $type,
+                    description => $description,
+                    available => $available,
+                };
+
+                my $name = file_read_first_line("$type_path/name");
+                $entry->{name} = $name if defined($name);
+
+                push @$types, $entry;
+            },
+        );
+    } elsif (-f $nvidia_path) {
+        my $creatable = PVE::Tools::file_get_contents($nvidia_path);
+        for my $line (split("\n", $creatable)) {
+            next if $line =~ m/^ID/; # header
+            next if $line !~ m/^(.*?)\s*:\s*(.*)$/;
+            my $id = $1;
+            my $name = $2;
+
+            push $types->@*, {
+                type => "nvidia-$id", # backwards compatibility
+                description => "", # TODO, read from xml/nvidia-smi ?
+                available => 1,
+                name => $name,
+            };
+        }
+    }
+
+    return $types;
+}
+
+sub pci_create_mdev_device($pciid, $uuid, $type) {
+    $pciid = PVE::SysFSTools::normalize_pci_id($pciid);
+
+    my $basedir = "$pcisysfs/devices/$pciid";
+    my $mdev_dir = "$basedir/mdev_supported_types";
+
+    die "pci device '$pciid' does not support mediated devices \n"
+        if !-d $mdev_dir;
+
+    die "pci device '$pciid' has no type '$type'\n"
+        if !-d "$mdev_dir/$type";
+
+    if (-d "$basedir/$uuid") {
+        # it already exists, checking type
+        my $typelink = readlink("$basedir/$uuid/mdev_type");
+        my ($existingtype) = $typelink =~ m|/([^/]+)$|;
+        die "mdev instance '$uuid' already exists, but type is not '$type'\n"
+            if $type ne $existingtype;
+
+        # instance exists, so use it but warn the user
+        warn "mdev instance '$uuid' already existed, using it.\n";
+        return undef;
+    }
+
+    my $instances = file_read_first_line("$mdev_dir/$type/available_instances");
+    my ($avail) = $instances =~ m/^(\d+)$/;
+    die "pci device '$pciid' has no available instances of '$type'\n"
+        if $avail < 1;
+
+    die "could not create '$type' for pci devices '$pciid'\n"
+        if !PVE::SysFSTools::file_write("$mdev_dir/$type/create", $uuid);
+
+    return undef;
+}
+
+# set vgpu type of a vf of an nvidia gpu with kernel 6.8 or newer
+sub create_nvidia_device($id, $model) {
+    $id = PVE::SysFSTools::normalize_pci_id($id);
+
+    my $creation = "$pcisysfs/devices/$id/nvidia/current_vgpu_type";
+
+    die "no nvidia sysfs api for '$id'\n" if !-f $creation;
+
+    my $current = file_read_first_line($creation);
+    if ($current ne "0") {
+        return 1 if $current eq $model;
+        # reset vgpu type so we can see all available and set the real device
+        die "unable to reset vgpu type for '$id'\n" if !PVE::SysFSTools::file_write($creation, "0");
+    }
+
+    my $types = get_mdev_types($id);
+    my $selected;
+    for my $type_definition ($types->@*) {
+        next if $type_definition->{type} ne "nvidia-$model";
+        $selected = $type_definition;
+    }
+
+    if (!defined($selected) || $selected->{available} < 1) {
+        die "vgpu type '$model' not available for '$id'\n";
+    }
+
+    if (!PVE::SysFSTools::file_write($creation, $model)) {
+        die "could not set vgpu type to '$model' for '$id'\n";
+    }
+
+    return 1;
+}
+
+1;
-- 
2.47.3





  parent reply	other threads:[~2026-03-05  9:16 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-05  9:16 [PATCH common/debcargo-conf/manager/proxmox-perl-rs/qemu-server 00/13] use NVML for vGPU info querying Dominik Csapak
2026-03-05  9:16 ` [PATCH debcargo-conf 1/8] nvml-wrapper-sys: Update to 0.9.0 Dominik Csapak
2026-03-05  9:16 ` [PATCH debcargo-conf 2/8] nvml-wrapper-sys: release 0.9.0-1 Dominik Csapak
2026-03-05  9:16 ` [PATCH debcargo-conf 3/8] nvml-wrapper: Update to 0.11.0 Dominik Csapak
2026-03-05  9:16 ` [PATCH debcargo-conf 4/8] nvml-wrapper: release 0.11.0-1 Dominik Csapak
2026-03-05  9:16 ` [PATCH debcargo-conf 5/8] nvml-wrapper: Update to 0.12.0 Dominik Csapak
2026-03-05  9:16 ` [PATCH debcargo-conf 6/8] nvml-wrapper: add patch for vgpu ids Dominik Csapak
2026-03-05  9:16 ` [PATCH debcargo-conf 7/8] backport nvml-wrapper-sys 0.9.0-1 Dominik Csapak
2026-03-05  9:16 ` [PATCH debcargo-conf 8/8] backport nvml-wrapper 0.12.0-1 Dominik Csapak
2026-03-05  9:16 ` [PATCH proxmox-perl-rs 1/1] pve: add binding for accessing vgpu info Dominik Csapak
2026-03-05  9:16 ` Dominik Csapak [this message]
2026-03-05  9:16 ` [PATCH qemu-server 2/2] pci: mdev: use PVE::RS::NVML for nvidia mdev information Dominik Csapak
2026-03-05  9:16 ` [PATCH manager 1/1] api: hardware: pci: use NVML for querying " Dominik Csapak
2026-03-05  9:16 ` [PATCH common 1/1] sysfs tools: remove moved code Dominik Csapak

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260305091711.1221589-11-d.csapak@proxmox.com \
    --to=d.csapak@proxmox.com \
    --cc=pve-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal