[pve-devel] [PATCH proxmox-ve-rs 2/4] vfio: add rust-native interface for accessing NVIDIA vGPU info

From: Christoph Heiss <c.heiss@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH proxmox-ve-rs 2/4] vfio: add rust-native interface for accessing NVIDIA vGPU info
Date: Tue, 20 Jan 2026 14:13:10 +0100	[thread overview]
Message-ID: <20260120131319.949986-3-c.heiss@proxmox.com> (raw)
In-Reply-To: <20260120131319.949986-1-c.heiss@proxmox.com>

Add a "rusty" interface on top of the raw NVML bindings for retrieving
information about creatable vGPU. Will be used to e.g. show a proper
description for each creatable vGPU type.

Signed-off-by: Christoph Heiss <c.heiss@proxmox.com>
---
 .../examples/nv_list_creatable_vgpus.rs       |  15 ++
 proxmox-ve-vfio/src/nvidia/mod.rs             | 123 ++++++++++
 proxmox-ve-vfio/src/nvidia/nvml/mod.rs        | 224 ++++++++++++++++++
 3 files changed, 362 insertions(+)
 create mode 100644 proxmox-ve-vfio/examples/nv_list_creatable_vgpus.rs

diff --git a/proxmox-ve-vfio/examples/nv_list_creatable_vgpus.rs b/proxmox-ve-vfio/examples/nv_list_creatable_vgpus.rs
new file mode 100644
index 0000000..b2f276a
--- /dev/null
+++ b/proxmox-ve-vfio/examples/nv_list_creatable_vgpus.rs
@@ -0,0 +1,15 @@
+use std::env;
+
+use proxmox_ve_vfio::nvidia::creatable_vgpu_types_for_dev;
+
+fn main() {
+    let bus_id = env::args()
+        .nth(1)
+        .expect("vGPU bus id expected as first argument, e.g. 00:01.0");
+
+    let types = creatable_vgpu_types_for_dev(&bus_id).expect("failed to retrieve vGPU info");
+
+    for t in types {
+        println!("{}", t.description());
+    }
+}
diff --git a/proxmox-ve-vfio/src/nvidia/mod.rs b/proxmox-ve-vfio/src/nvidia/mod.rs
index 08a414c..bc2ef17 100644
--- a/proxmox-ve-vfio/src/nvidia/mod.rs
+++ b/proxmox-ve-vfio/src/nvidia/mod.rs
@@ -1,3 +1,126 @@
 //! Provides access to the state of NVIDIA (v)GPU devices connected to the system.
 
+use anyhow::Result;
+use serde::Serialize;
+
 mod nvml;
+
+use nvml::bindings::{nvmlDevice_t, nvmlVgpuTypeId_t};
+
+/// A single vGPU type that is either supported and/or currently creatable
+/// for a given GPU.
+#[derive(Serialize)]
+#[serde(rename_all = "kebab-case")]
+pub struct VgpuTypeInfo {
+    /// Unique vGPU type ID.
+    pub id: u32,
+    /// An alphanumeric string that denotes a particular vGPU, e.g. GRID M60-2Q.
+    pub name: String,
+    /// Class of the vGPU, e.g. Quadro.
+    pub class_name: String,
+    /// Maximum number of vGPU instances creatable of this vGPU type.
+    pub max_instances: u32,
+    /// Maximum number of vGPU instances supported per VM for this vGPU type.
+    pub max_instances_per_vm: u32,
+    /// vGPU framebuffer size in bytes.
+    pub framebuffer_size: u64,
+    /// Number of supported display heads by this vGPU type.
+    pub num_heads: u32,
+    /// Maximum resolution of a single head available across all display heads
+    /// supported by this vGPU type.
+    pub max_resolution: (u32, u32),
+    /// License types and versions required to run this specified vGPU type,
+    /// each in the form "\<license name\>,\<version\>", for example
+    /// "GRID-Virtual-PC,2.0".
+    /// A vGPU type might also be runnable with more than one type of license,
+    /// in which cases each license is separated by a semicolon.
+    pub license: String,
+    /// Static frame limit for this vGPU, if the frame limiter is enabled for
+    /// this vGPU type.
+    pub fps_limit: Option<u32>,
+}
+
+impl VgpuTypeInfo {
+    fn get_with(nvml: &nvml::Nvml, dev: nvmlDevice_t, type_id: nvmlVgpuTypeId_t) -> Result<Self> {
+        let num_heads = nvml.vgpu_type_num_display_heads(type_id)?;
+
+        // Take the best resolution among all available display heads
+        let max_resolution = (0..num_heads)
+            .filter_map(|i| nvml.vgpu_type_max_resolution(type_id, i).ok())
+            .max()
+            .unwrap_or((0, 0));
+
+        Ok(VgpuTypeInfo {
+            id: type_id,
+            name: nvml.vgpu_type_name(type_id)?,
+            class_name: nvml.vgpu_type_class_name(type_id)?,
+            max_instances: nvml.vgpu_type_max_instances(dev, type_id)?,
+            max_instances_per_vm: nvml.vgpu_type_max_instances_per_vm(type_id)?,
+            framebuffer_size: nvml.vgpu_type_framebuffer_size(type_id)?,
+            num_heads,
+            max_resolution,
+            license: nvml.vgpu_type_license(type_id)?,
+            fps_limit: nvml.vgpu_type_frame_rate_limit(type_id)?,
+        })
+    }
+
+    /// Formats the descriptive fields of the vGPU type information as a property string.
+    pub fn description(&self) -> String {
+        let VgpuTypeInfo {
+            class_name,
+            max_instances,
+            max_instances_per_vm,
+            framebuffer_size,
+            num_heads,
+            max_resolution,
+            license,
+            ..
+        } = self;
+
+        let framebuffer_size = framebuffer_size / 1024 / 1024;
+        let (max_res_x, max_res_y) = max_resolution;
+
+        format!(
+            "class={class_name}\
+            ,max-instances={max_instances}\
+            ,max-instances-per-vm={max_instances_per_vm}\
+            ,framebuffer-size={framebuffer_size}MiB\
+            ,num-heads={num_heads}\
+            ,max-resolution={max_res_x}x{max_res_y}\
+            ,license={license}"
+        )
+    }
+}
+
+/// Given a concrete GPU device, enumerates all *creatable* vGPU types for this
+/// device.
+fn enumerate_creatable_vgpu_types_by_dev(
+    nvml: &nvml::Nvml,
+    dev: nvmlDevice_t,
+) -> Result<Vec<VgpuTypeInfo>> {
+    let mut vgpu_info = vec![];
+    let type_ids = nvml.device_get_creatable_vgpus(dev)?;
+
+    for type_id in type_ids {
+        vgpu_info.push(VgpuTypeInfo::get_with(nvml, dev, type_id)?);
+    }
+
+    Ok(vgpu_info)
+}
+
+/// Retrieves a list of *creatable* vGPU types for the specified GPU by bus id.
+///
+/// The `bus_id` must be of format "\<domain\>:\<bus\>:\<device\>.\<function\>", e.g.
+/// "0000:01:01.0".
+/// \<domain\> is optional and can be left out if there is only one.
+///
+/// # See also
+///
+/// [`nvmlDeviceGetHandleByPciBusId_v2()`]: <https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1gea7484bb9eac412c28e8a73842254c05>
+/// [`struct nvmlPciInto_t`]: <https://docs.nvidia.com/deploy/nvml-api/structnvmlPciInfo__t.html#structnvmlPciInfo__t_1a4d54ad9b596d7cab96ecc34613adbe4>
+pub fn creatable_vgpu_types_for_dev(bus_id: &str) -> Result<Vec<VgpuTypeInfo>> {
+    let nvml = nvml::Nvml::new()?;
+    let handle = nvml.device_handle_by_bus_id(bus_id)?;
+
+    enumerate_creatable_vgpu_types_by_dev(&nvml, handle)
+}
diff --git a/proxmox-ve-vfio/src/nvidia/nvml/mod.rs b/proxmox-ve-vfio/src/nvidia/nvml/mod.rs
index 10ad3c9..1259095 100644
--- a/proxmox-ve-vfio/src/nvidia/nvml/mod.rs
+++ b/proxmox-ve-vfio/src/nvidia/nvml/mod.rs
@@ -3,6 +3,13 @@
 //!
 //! [NVML]: <https://developer.nvidia.com/management-library-nvml>
 
+use anyhow::{bail, Result};
+use std::{
+    borrow::Cow,
+    ffi::{c_uint, c_ulonglong, CStr},
+    ptr,
+};
+
 #[allow(
     dead_code,
     non_camel_case_types,
@@ -11,3 +18,220 @@
     unused_imports
 )]
 pub mod bindings;
+
+use bindings::{
+    nvmlDevice_t, nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE,
+    nvmlReturn_enum_NVML_ERROR_NOT_SUPPORTED, nvmlReturn_enum_NVML_SUCCESS, nvmlReturn_t,
+    nvmlVgpuTypeId_t, NvmlLib, NVML_DEVICE_NAME_BUFFER_SIZE, NVML_GRID_LICENSE_BUFFER_SIZE,
+};
+
+/// SONAME/filename of the native NVML, pin it to SOVERSION 1 explicitly to be sure.
+const NVML_LIB_NAME: &str = "libnvidia-ml.so.1";
+
+pub struct Nvml(NvmlLib);
+
+impl Nvml {
+    pub fn new() -> Result<Self> {
+        let lib = unsafe {
+            let lib = Self(NvmlLib::new(NVML_LIB_NAME)?);
+            lib.to_err(lib.0.nvmlInit_v2())?;
+            lib
+        };
+
+        Ok(lib)
+    }
+
+    pub fn device_handle_by_bus_id(&self, bus_id: &str) -> Result<nvmlDevice_t> {
+        let mut handle: nvmlDevice_t = ptr::null_mut();
+        unsafe {
+            self.to_err(
+                self.0
+                    .nvmlDeviceGetHandleByPciBusId_v2(bus_id.as_ptr() as *const i8, &mut handle),
+            )?;
+        }
+
+        Ok(handle)
+    }
+
+    /// Retrieves a list of vGPU types supported by the given device.
+    ///
+    /// # See also
+    ///
+    /// <https://docs.nvidia.com/deploy/nvml-api/group__nvmlVgpu.html#group__nvmlVgpu>
+    pub fn device_get_creatable_vgpus(&self, dev: nvmlDevice_t) -> Result<Vec<nvmlVgpuTypeId_t>> {
+        let mut count: c_uint = 0;
+        let mut ids = vec![];
+
+        unsafe {
+            // First retrieve the number of supported vGPUs by passing count == 0,
+            // which will set `count` to the actual number.
+            let result = self
+                .0
+                .nvmlDeviceGetCreatableVgpus(dev, &mut count, ids.as_mut_ptr());
+
+            #[allow(non_upper_case_globals)]
+            if !matches!(
+                result,
+                nvmlReturn_enum_NVML_SUCCESS | nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE
+            ) {
+                self.to_err(result)?;
+            }
+
+            ids.resize(count as usize, 0);
+            self.to_err(
+                self.0
+                    .nvmlDeviceGetCreatableVgpus(dev, &mut count, ids.as_mut_ptr()),
+            )?;
+        }
+
+        Ok(ids)
+    }
+
+    pub fn vgpu_type_class_name(&self, type_id: nvmlVgpuTypeId_t) -> Result<String> {
+        let mut buffer: Vec<u8> = vec![0; NVML_DEVICE_NAME_BUFFER_SIZE as usize];
+        let mut buffer_size = buffer.len() as u32;
+
+        unsafe {
+            self.to_err(self.0.nvmlVgpuTypeGetClass(
+                type_id,
+                buffer.as_mut_ptr() as *mut i8,
+                &mut buffer_size,
+            ))?;
+        }
+
+        slice_to_string(&buffer)
+    }
+
+    pub fn vgpu_type_license(&self, type_id: nvmlVgpuTypeId_t) -> Result<String> {
+        let mut buffer: Vec<u8> = vec![0; NVML_GRID_LICENSE_BUFFER_SIZE as usize];
+
+        unsafe {
+            self.to_err(self.0.nvmlVgpuTypeGetLicense(
+                type_id,
+                buffer.as_mut_ptr() as *mut i8,
+                buffer.len() as u32,
+            ))?;
+        }
+
+        slice_to_string(&buffer)
+    }
+
+    pub fn vgpu_type_name(&self, type_id: nvmlVgpuTypeId_t) -> Result<String> {
+        let mut buffer: Vec<u8> = vec![0; NVML_DEVICE_NAME_BUFFER_SIZE as usize];
+        let mut buffer_size = buffer.len() as u32;
+
+        unsafe {
+            self.to_err(self.0.nvmlVgpuTypeGetName(
+                type_id,
+                buffer.as_mut_ptr() as *mut i8,
+                &mut buffer_size,
+            ))?;
+        }
+
+        slice_to_string(&buffer)
+    }
+
+    pub fn vgpu_type_max_instances(
+        &self,
+        dev: nvmlDevice_t,
+        type_id: nvmlVgpuTypeId_t,
+    ) -> Result<u32> {
+        let mut count: c_uint = 0;
+        unsafe {
+            self.to_err(self.0.nvmlVgpuTypeGetMaxInstances(dev, type_id, &mut count))?;
+        }
+
+        Ok(count)
+    }
+
+    pub fn vgpu_type_max_instances_per_vm(&self, type_id: nvmlVgpuTypeId_t) -> Result<u32> {
+        let mut count: c_uint = 0;
+        unsafe {
+            self.to_err(self.0.nvmlVgpuTypeGetMaxInstancesPerVm(type_id, &mut count))?;
+        }
+
+        Ok(count)
+    }
+
+    pub fn vgpu_type_framebuffer_size(&self, type_id: nvmlVgpuTypeId_t) -> Result<u64> {
+        let mut size: c_ulonglong = 0;
+        unsafe {
+            self.to_err(self.0.nvmlVgpuTypeGetFramebufferSize(type_id, &mut size))?;
+        }
+
+        Ok(size)
+    }
+
+    pub fn vgpu_type_num_display_heads(&self, type_id: nvmlVgpuTypeId_t) -> Result<u32> {
+        let mut num: c_uint = 0;
+        unsafe {
+            self.to_err(self.0.nvmlVgpuTypeGetNumDisplayHeads(type_id, &mut num))?;
+        }
+
+        Ok(num)
+    }
+
+    pub fn vgpu_type_max_resolution(
+        &self,
+        type_id: nvmlVgpuTypeId_t,
+        head: u32,
+    ) -> Result<(u32, u32)> {
+        let (mut x, mut y): (c_uint, c_uint) = (0, 0);
+        unsafe {
+            self.to_err(
+                self.0
+                    .nvmlVgpuTypeGetResolution(type_id, head, &mut x, &mut y),
+            )?;
+        }
+
+        Ok((x, y))
+    }
+
+    pub fn vgpu_type_frame_rate_limit(&self, type_id: nvmlVgpuTypeId_t) -> Result<Option<u32>> {
+        let mut limit: c_uint = 0;
+        let result = unsafe { self.0.nvmlVgpuTypeGetFrameRateLimit(type_id, &mut limit) };
+
+        if !Self::err_is_unsupported(result) {
+            Ok(None)
+        } else {
+            self.to_err(result)?;
+            Ok(Some(limit))
+        }
+    }
+
+    fn to_err(&self, result: nvmlReturn_t) -> Result<()> {
+        if result == nvmlReturn_enum_NVML_SUCCESS {
+            Ok(())
+        } else {
+            bail!("{}", self.error_str(result))
+        }
+    }
+
+    fn err_is_unsupported(result: nvmlReturn_t) -> bool {
+        result == nvmlReturn_enum_NVML_ERROR_NOT_SUPPORTED
+    }
+
+    fn error_str(&self, err_code: nvmlReturn_t) -> Cow<'_, str> {
+        let cstr = unsafe {
+            let raw = self.0.nvmlErrorString(err_code);
+            CStr::from_ptr(raw)
+        };
+
+        cstr.to_string_lossy()
+    }
+}
+
+impl Drop for Nvml {
+    fn drop(&mut self) {
+        if let Ok(sym) = self.0.nvmlShutdown.as_ref() {
+            // Although nvmlShutdown() provides a return code (or error) indicating
+            // whether the operation was successful, at this point there isn't
+            // really anything we can do if it throws an error.
+            unsafe { sym() };
+        }
+    }
+}
+
+fn slice_to_string(s: &[u8]) -> Result<String> {
+    Ok(CStr::from_bytes_until_nul(s)?.to_str()?.into())
+}
-- 
2.52.0



_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel