From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 4C3F11FF13E for ; Fri, 06 Feb 2026 14:38:40 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 2091E353F; Fri, 6 Feb 2026 14:39:12 +0100 (CET) Message-ID: <009e9fc4-5c43-444b-a744-835b8e609806@proxmox.com> Date: Fri, 6 Feb 2026 14:38:35 +0100 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird Beta Subject: Re: [pve-devel] [PATCH proxmox-ve-rs 2/4] vfio: add rust-native interface for accessing NVIDIA vGPU info To: Proxmox VE development discussion , Christoph Heiss References: <20260120131319.949986-1-c.heiss@proxmox.com> <20260120131319.949986-3-c.heiss@proxmox.com> Content-Language: en-US From: Dominik Csapak In-Reply-To: <20260120131319.949986-3-c.heiss@proxmox.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1770385036991 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.031 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: QNBZX4G5GUF3TJOSZ2LQERJIBKP2ZUZF X-Message-ID-Hash: QNBZX4G5GUF3TJOSZ2LQERJIBKP2ZUZF X-MailFrom: d.csapak@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox VE development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: resending to the list, since i misclicked 'reply' the first time^^: a bit of a high level comment: when we already go to the length of adding a custom 'rusty' wrapper around it, I'd probably like it to have even more 'rusty' for example: instead of having a VgpuTypeInfo type that is returned with 'get_with' and the nvml and device_t handles, why not reverse that and have. e.g. a struct NvmlDevice that is returned from an nvml instance which can return a list of vgputype's and so on. e.g. i'd imagine the resulting code looking like this (note, just pseudo code) ``` let nvml = Nvml::new()?; let device: NvmlDevice = nvml.get_device("01:00.0")?; let vgpu_types: Vec = device.get_vgpu_types(Kind::Creatable)?; ``` there is probably some reference shenanigans going on since the device handle needs a reference to the nvml handle and so on, but I'd think this would be much cleaner than using: ``` let nvml = Nvml::new()?; let dev: nvmlDevice_t = nvml.device_handle_by_bus_id("01:00.0")?; let mut vgpu_info = vec![]; let type_ids: Vec Add a "rusty" interface on top of the raw NVML bindings for retrieving > information about creatable vGPU. Will be used to e.g. show a proper > description for each creatable vGPU type. > > Signed-off-by: Christoph Heiss > --- > .../examples/nv_list_creatable_vgpus.rs | 15 ++ > proxmox-ve-vfio/src/nvidia/mod.rs | 123 ++++++++++ > proxmox-ve-vfio/src/nvidia/nvml/mod.rs | 224 ++++++++++++++++++ > 3 files changed, 362 insertions(+) > create mode 100644 proxmox-ve-vfio/examples/nv_list_creatable_vgpus.rs > > diff --git a/proxmox-ve-vfio/examples/nv_list_creatable_vgpus.rs b/proxmox-ve-vfio/examples/nv_list_creatable_vgpus.rs > new file mode 100644 > index 0000000..b2f276a > --- /dev/null > +++ b/proxmox-ve-vfio/examples/nv_list_creatable_vgpus.rs > @@ -0,0 +1,15 @@ > +use std::env; > + > +use proxmox_ve_vfio::nvidia::creatable_vgpu_types_for_dev; > + > +fn main() { > + let bus_id = env::args() > + .nth(1) > + .expect("vGPU bus id expected as first argument, e.g. 00:01.0"); > + > + let types = creatable_vgpu_types_for_dev(&bus_id).expect("failed to retrieve vGPU info"); > + > + for t in types { > + println!("{}", t.description()); > + } > +} > diff --git a/proxmox-ve-vfio/src/nvidia/mod.rs b/proxmox-ve-vfio/src/nvidia/mod.rs > index 08a414c..bc2ef17 100644 > --- a/proxmox-ve-vfio/src/nvidia/mod.rs > +++ b/proxmox-ve-vfio/src/nvidia/mod.rs > @@ -1,3 +1,126 @@ > //! Provides access to the state of NVIDIA (v)GPU devices connected to the system. > > +use anyhow::Result; > +use serde::Serialize; > + > mod nvml; > + > +use nvml::bindings::{nvmlDevice_t, nvmlVgpuTypeId_t}; > + > +/// A single vGPU type that is either supported and/or currently creatable > +/// for a given GPU. > +#[derive(Serialize)] > +#[serde(rename_all = "kebab-case")] > +pub struct VgpuTypeInfo { > + /// Unique vGPU type ID. > + pub id: u32, > + /// An alphanumeric string that denotes a particular vGPU, e.g. GRID M60-2Q. > + pub name: String, > + /// Class of the vGPU, e.g. Quadro. > + pub class_name: String, > + /// Maximum number of vGPU instances creatable of this vGPU type. > + pub max_instances: u32, > + /// Maximum number of vGPU instances supported per VM for this vGPU type. > + pub max_instances_per_vm: u32, > + /// vGPU framebuffer size in bytes. > + pub framebuffer_size: u64, > + /// Number of supported display heads by this vGPU type. > + pub num_heads: u32, > + /// Maximum resolution of a single head available across all display heads > + /// supported by this vGPU type. > + pub max_resolution: (u32, u32), > + /// License types and versions required to run this specified vGPU type, > + /// each in the form "\,\", for example > + /// "GRID-Virtual-PC,2.0". > + /// A vGPU type might also be runnable with more than one type of license, > + /// in which cases each license is separated by a semicolon. > + pub license: String, > + /// Static frame limit for this vGPU, if the frame limiter is enabled for > + /// this vGPU type. > + pub fps_limit: Option, > +} > + > +impl VgpuTypeInfo { > + fn get_with(nvml: &nvml::Nvml, dev: nvmlDevice_t, type_id: nvmlVgpuTypeId_t) -> Result { > + let num_heads = nvml.vgpu_type_num_display_heads(type_id)?; > + > + // Take the best resolution among all available display heads > + let max_resolution = (0..num_heads) > + .filter_map(|i| nvml.vgpu_type_max_resolution(type_id, i).ok()) > + .max() > + .unwrap_or((0, 0)); > + > + Ok(VgpuTypeInfo { > + id: type_id, > + name: nvml.vgpu_type_name(type_id)?, > + class_name: nvml.vgpu_type_class_name(type_id)?, > + max_instances: nvml.vgpu_type_max_instances(dev, type_id)?, > + max_instances_per_vm: nvml.vgpu_type_max_instances_per_vm(type_id)?, > + framebuffer_size: nvml.vgpu_type_framebuffer_size(type_id)?, > + num_heads, > + max_resolution, > + license: nvml.vgpu_type_license(type_id)?, > + fps_limit: nvml.vgpu_type_frame_rate_limit(type_id)?, > + }) > + } > + > + /// Formats the descriptive fields of the vGPU type information as a property string. > + pub fn description(&self) -> String { > + let VgpuTypeInfo { > + class_name, > + max_instances, > + max_instances_per_vm, > + framebuffer_size, > + num_heads, > + max_resolution, > + license, > + .. > + } = self; > + > + let framebuffer_size = framebuffer_size / 1024 / 1024; > + let (max_res_x, max_res_y) = max_resolution; > + > + format!( > + "class={class_name}\ > + ,max-instances={max_instances}\ > + ,max-instances-per-vm={max_instances_per_vm}\ > + ,framebuffer-size={framebuffer_size}MiB\ > + ,num-heads={num_heads}\ > + ,max-resolution={max_res_x}x{max_res_y}\ > + ,license={license}" > + ) > + } > +} > + > +/// Given a concrete GPU device, enumerates all *creatable* vGPU types for this > +/// device. > +fn enumerate_creatable_vgpu_types_by_dev( > + nvml: &nvml::Nvml, > + dev: nvmlDevice_t, > +) -> Result> { > + let mut vgpu_info = vec![]; > + let type_ids = nvml.device_get_creatable_vgpus(dev)?; > + > + for type_id in type_ids { > + vgpu_info.push(VgpuTypeInfo::get_with(nvml, dev, type_id)?); > + } > + > + Ok(vgpu_info) > +} > + > +/// Retrieves a list of *creatable* vGPU types for the specified GPU by bus id. > +/// > +/// The `bus_id` must be of format "\:\:\.\", e.g. > +/// "0000:01:01.0". > +/// \ is optional and can be left out if there is only one. > +/// > +/// # See also > +/// > +/// [`nvmlDeviceGetHandleByPciBusId_v2()`]: > +/// [`struct nvmlPciInto_t`]: > +pub fn creatable_vgpu_types_for_dev(bus_id: &str) -> Result> { > + let nvml = nvml::Nvml::new()?; > + let handle = nvml.device_handle_by_bus_id(bus_id)?; > + > + enumerate_creatable_vgpu_types_by_dev(&nvml, handle) > +} > diff --git a/proxmox-ve-vfio/src/nvidia/nvml/mod.rs b/proxmox-ve-vfio/src/nvidia/nvml/mod.rs > index 10ad3c9..1259095 100644 > --- a/proxmox-ve-vfio/src/nvidia/nvml/mod.rs > +++ b/proxmox-ve-vfio/src/nvidia/nvml/mod.rs > @@ -3,6 +3,13 @@ > //! > //! [NVML]: > > +use anyhow::{bail, Result}; > +use std::{ > + borrow::Cow, > + ffi::{c_uint, c_ulonglong, CStr}, > + ptr, > +}; > + > #[allow( > dead_code, > non_camel_case_types, > @@ -11,3 +18,220 @@ > unused_imports > )] > pub mod bindings; > + > +use bindings::{ > + nvmlDevice_t, nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE, > + nvmlReturn_enum_NVML_ERROR_NOT_SUPPORTED, nvmlReturn_enum_NVML_SUCCESS, nvmlReturn_t, > + nvmlVgpuTypeId_t, NvmlLib, NVML_DEVICE_NAME_BUFFER_SIZE, NVML_GRID_LICENSE_BUFFER_SIZE, > +}; > + > +/// SONAME/filename of the native NVML, pin it to SOVERSION 1 explicitly to be sure. > +const NVML_LIB_NAME: &str = "libnvidia-ml.so.1"; > + > +pub struct Nvml(NvmlLib); > + > +impl Nvml { > + pub fn new() -> Result { > + let lib = unsafe { > + let lib = Self(NvmlLib::new(NVML_LIB_NAME)?); > + lib.to_err(lib.0.nvmlInit_v2())?; > + lib > + }; > + > + Ok(lib) > + } > + > + pub fn device_handle_by_bus_id(&self, bus_id: &str) -> Result { > + let mut handle: nvmlDevice_t = ptr::null_mut(); > + unsafe { > + self.to_err( > + self.0 > + .nvmlDeviceGetHandleByPciBusId_v2(bus_id.as_ptr() as *const i8, &mut handle), > + )?; > + } > + > + Ok(handle) > + } > + > + /// Retrieves a list of vGPU types supported by the given device. > + /// > + /// # See also > + /// > + /// > + pub fn device_get_creatable_vgpus(&self, dev: nvmlDevice_t) -> Result> { > + let mut count: c_uint = 0; > + let mut ids = vec![]; > + > + unsafe { > + // First retrieve the number of supported vGPUs by passing count == 0, > + // which will set `count` to the actual number. > + let result = self > + .0 > + .nvmlDeviceGetCreatableVgpus(dev, &mut count, ids.as_mut_ptr()); > + > + #[allow(non_upper_case_globals)] > + if !matches!( > + result, > + nvmlReturn_enum_NVML_SUCCESS | nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE > + ) { > + self.to_err(result)?; > + } > + > + ids.resize(count as usize, 0); > + self.to_err( > + self.0 > + .nvmlDeviceGetCreatableVgpus(dev, &mut count, ids.as_mut_ptr()), > + )?; > + } > + > + Ok(ids) > + } > + > + pub fn vgpu_type_class_name(&self, type_id: nvmlVgpuTypeId_t) -> Result { > + let mut buffer: Vec = vec![0; NVML_DEVICE_NAME_BUFFER_SIZE as usize]; > + let mut buffer_size = buffer.len() as u32; > + > + unsafe { > + self.to_err(self.0.nvmlVgpuTypeGetClass( > + type_id, > + buffer.as_mut_ptr() as *mut i8, > + &mut buffer_size, > + ))?; > + } > + > + slice_to_string(&buffer) > + } > + > + pub fn vgpu_type_license(&self, type_id: nvmlVgpuTypeId_t) -> Result { > + let mut buffer: Vec = vec![0; NVML_GRID_LICENSE_BUFFER_SIZE as usize]; > + > + unsafe { > + self.to_err(self.0.nvmlVgpuTypeGetLicense( > + type_id, > + buffer.as_mut_ptr() as *mut i8, > + buffer.len() as u32, > + ))?; > + } > + > + slice_to_string(&buffer) > + } > + > + pub fn vgpu_type_name(&self, type_id: nvmlVgpuTypeId_t) -> Result { > + let mut buffer: Vec = vec![0; NVML_DEVICE_NAME_BUFFER_SIZE as usize]; > + let mut buffer_size = buffer.len() as u32; > + > + unsafe { > + self.to_err(self.0.nvmlVgpuTypeGetName( > + type_id, > + buffer.as_mut_ptr() as *mut i8, > + &mut buffer_size, > + ))?; > + } > + > + slice_to_string(&buffer) > + } > + > + pub fn vgpu_type_max_instances( > + &self, > + dev: nvmlDevice_t, > + type_id: nvmlVgpuTypeId_t, > + ) -> Result { > + let mut count: c_uint = 0; > + unsafe { > + self.to_err(self.0.nvmlVgpuTypeGetMaxInstances(dev, type_id, &mut count))?; > + } > + > + Ok(count) > + } > + > + pub fn vgpu_type_max_instances_per_vm(&self, type_id: nvmlVgpuTypeId_t) -> Result { > + let mut count: c_uint = 0; > + unsafe { > + self.to_err(self.0.nvmlVgpuTypeGetMaxInstancesPerVm(type_id, &mut count))?; > + } > + > + Ok(count) > + } > + > + pub fn vgpu_type_framebuffer_size(&self, type_id: nvmlVgpuTypeId_t) -> Result { > + let mut size: c_ulonglong = 0; > + unsafe { > + self.to_err(self.0.nvmlVgpuTypeGetFramebufferSize(type_id, &mut size))?; > + } > + > + Ok(size) > + } > + > + pub fn vgpu_type_num_display_heads(&self, type_id: nvmlVgpuTypeId_t) -> Result { > + let mut num: c_uint = 0; > + unsafe { > + self.to_err(self.0.nvmlVgpuTypeGetNumDisplayHeads(type_id, &mut num))?; > + } > + > + Ok(num) > + } > + > + pub fn vgpu_type_max_resolution( > + &self, > + type_id: nvmlVgpuTypeId_t, > + head: u32, > + ) -> Result<(u32, u32)> { > + let (mut x, mut y): (c_uint, c_uint) = (0, 0); > + unsafe { > + self.to_err( > + self.0 > + .nvmlVgpuTypeGetResolution(type_id, head, &mut x, &mut y), > + )?; > + } > + > + Ok((x, y)) > + } > + > + pub fn vgpu_type_frame_rate_limit(&self, type_id: nvmlVgpuTypeId_t) -> Result> { > + let mut limit: c_uint = 0; > + let result = unsafe { self.0.nvmlVgpuTypeGetFrameRateLimit(type_id, &mut limit) }; > + > + if !Self::err_is_unsupported(result) { > + Ok(None) > + } else { > + self.to_err(result)?; > + Ok(Some(limit)) > + } > + } > + > + fn to_err(&self, result: nvmlReturn_t) -> Result<()> { > + if result == nvmlReturn_enum_NVML_SUCCESS { > + Ok(()) > + } else { > + bail!("{}", self.error_str(result)) > + } > + } > + > + fn err_is_unsupported(result: nvmlReturn_t) -> bool { > + result == nvmlReturn_enum_NVML_ERROR_NOT_SUPPORTED > + } > + > + fn error_str(&self, err_code: nvmlReturn_t) -> Cow<'_, str> { > + let cstr = unsafe { > + let raw = self.0.nvmlErrorString(err_code); > + CStr::from_ptr(raw) > + }; > + > + cstr.to_string_lossy() > + } > +} > + > +impl Drop for Nvml { > + fn drop(&mut self) { > + if let Ok(sym) = self.0.nvmlShutdown.as_ref() { > + // Although nvmlShutdown() provides a return code (or error) indicating > + // whether the operation was successful, at this point there isn't > + // really anything we can do if it throws an error. > + unsafe { sym() }; > + } > + } > +} > + > +fn slice_to_string(s: &[u8]) -> Result { > + Ok(CStr::from_bytes_until_nul(s)?.to_str()?.into()) > +}