From: Ryosuke Nakayama <ryosuke.nakayama@ryskn.com>
To: pve-devel@lists.proxmox.com
Cc: unixtech <ryosuke_666@icloud.com>
Subject: [RFC PATCH qemu-server 1/2] qemu: add VPP vhost-user dataplane support
Date: Tue, 17 Mar 2026 20:14:02 +0900 [thread overview]
Message-ID: <20260317111404.37254-2-ryosuke.nakayama@ryskn.com> (raw)
In-Reply-To: <20260317111404.37254-1-ryosuke.nakayama@ryskn.com>
From: unixtech <ryosuke_666@icloud.com>
- generate vhost-user netdev/chardev for VPP bridge interfaces
- add vpp_connect_vhost_nets() to connect VPP vhost-user server sockets
before QEMU starts (VPP server mode, QEMU client mode)
- add memfd shared memory backend in Memory.pm for vhost-user without
hugepages (has_vpp_bridge detection)
- support hotplug of VPP vhost-user interfaces via QMP
Signed-off-by: Ryosuke Nakayama <ryosuke.nakayama@ryskn.com>
Signed-off-by: unixtech <ryosuke.nakayama@ryskn.com>
---
src/PVE/QemuServer.pm | 159 +++++++++++++++++++++++++++--------
src/PVE/QemuServer/Memory.pm | 16 +++-
2 files changed, 137 insertions(+), 38 deletions(-)
diff --git a/src/PVE/QemuServer.pm b/src/PVE/QemuServer.pm
index 09e7a19b..cf1c9e9f 100644
--- a/src/PVE/QemuServer.pm
+++ b/src/PVE/QemuServer.pm
@@ -95,7 +95,6 @@ use PVE::QemuServer::RunState;
use PVE::QemuServer::StateFile;
use PVE::QemuServer::USB;
use PVE::QemuServer::Virtiofs qw(max_virtiofs start_all_virtiofsd);
-use PVE::QemuServer::VolumeChain;
use PVE::QemuServer::DBusVMState;
my $have_ha_config;
@@ -316,8 +315,7 @@ my $confdesc = {
optional => 1,
type => 'integer',
description =>
- "Amount of target RAM for the VM in MiB. The balloon driver is enabled by default,"
- . " unless it is explicitly disabled by setting the value to zero.",
+ "Amount of target RAM for the VM in MiB. Using zero disables the ballon driver.",
minimum => 0,
},
shares => {
@@ -639,7 +637,12 @@ EODESCR
. ' This is used internally for snapshots.',
},
machine => get_standard_option('pve-qemu-machine'),
- arch => get_standard_option('pve-qm-cpu-arch', { optional => 1 }),
+ arch => {
+ description => "Virtual processor architecture. Defaults to the host.",
+ optional => 1,
+ type => 'string',
+ enum => [qw(x86_64 aarch64)],
+ },
smbios1 => {
description => "Specify SMBIOS type 1 fields.",
type => 'string',
@@ -1442,7 +1445,10 @@ sub print_netdev_full {
my $netdev = "";
my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
- if ($net->{bridge}) {
+ if ($net->{bridge} && $net->{bridge} =~ /^vppbr\d+$/) {
+ # VPP bridge: use vhost-user socket instead of tap
+ $netdev = "type=vhost-user,id=$netid,chardev=vhost-user-${netid}";
+ } elsif ($net->{bridge}) {
$netdev = "type=tap,id=$netid,ifname=${ifname},script=/usr/libexec/qemu-server/$script"
. ",downscript=/usr/libexec/qemu-server/pve-bridgedown$vhostparam";
} else {
@@ -2586,8 +2592,7 @@ sub vmstatus {
$d->{uptime} = int(($uptime - $pstat->{starttime}) / $cpuinfo->{user_hz});
my $cgroup = PVE::QemuServer::CGroup->new($vmid);
- my $cgroup_mem = eval { $cgroup->get_memory_stat() } // {};
- warn "unable to get memory stat for $vmid - $@" if $@;
+ my $cgroup_mem = $cgroup->get_memory_stat();
$d->{memhost} = $cgroup_mem->{mem} // 0;
$d->{mem} = $d->{memhost}; # default to cgroup, balloon info can override this below
@@ -2713,7 +2718,7 @@ sub vmstatus {
$qmpclient->queue_cmd($qmp_peer, $blockstatscb, 'query-blockstats');
$qmpclient->queue_cmd($qmp_peer, $machinecb, 'query-machines');
$qmpclient->queue_cmd($qmp_peer, $versioncb, 'query-version');
- # this fails if balloon driver is not loaded, so this must be
+ # this fails if ballon driver is not loaded, so this must be
# the last command (following command are aborted if this fails).
$qmpclient->queue_cmd($qmp_peer, $ballooncb, 'query-balloon');
@@ -2936,13 +2941,17 @@ sub vga_conf_has_spice {
sub query_supported_cpu_flags {
my ($arch) = @_;
- my $host_arch = get_host_arch();
- $arch //= $host_arch;
+ $arch //= get_host_arch();
my $default_machine = PVE::QemuServer::Machine::default_machine_for_arch($arch);
my $flags = {};
- my $kvm_supported = defined(kvm_version()) && $arch eq $host_arch;
+ # FIXME: Once this is merged, the code below should work for ARM as well:
+ # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
+ die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n"
+ if $arch eq "aarch64";
+
+ my $kvm_supported = defined(kvm_version());
my $qemu_cmd = PVE::QemuServer::Helpers::get_command_for_arch($arch);
my $fakevmid = -1;
my $pidfile = PVE::QemuServer::Helpers::vm_pidfile_name($fakevmid);
@@ -2970,8 +2979,6 @@ sub query_supported_cpu_flags {
if (!$kvm) {
push @$cmd, '-accel', 'tcg';
- } else {
- push @$cmd, '-cpu', 'host';
}
my $rc = run_command($cmd, noerr => 1, quiet => 0);
@@ -2982,7 +2989,7 @@ sub query_supported_cpu_flags {
$fakevmid,
'query-cpu-model-expansion',
type => 'full',
- model => { name => $kvm ? 'host' : 'max' },
+ model => { name => 'host' },
);
my $props = $cmd_result->{model}->{props};
@@ -3125,7 +3132,7 @@ sub config_to_command {
die "Detected old QEMU binary ('$kvmver', at least 6.0 is required)\n";
}
- my $machine_type = PVE::QemuServer::Machine::get_vm_machine($conf, $forcemachine);
+ my $machine_type = PVE::QemuServer::Machine::get_vm_machine($conf, $forcemachine, $arch);
my $machine_version = extract_version($machine_type, $kvmver);
$kvm //= 1 if is_native_arch($arch);
@@ -3661,6 +3668,11 @@ sub config_to_command {
$d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
+ if ($d->{bridge} && $d->{bridge} =~ /^vppbr\d+$/) {
+ my $socket = "/var/run/vpp/qemu-${vmid}-${netname}.sock";
+ push @$devices, '-chardev',
+ "socket,id=vhost-user-${netname},path=${socket}";
+ }
push @$devices, '-netdev', $netdevfull;
# force +pve1 if machine version 10.0, for host_mtu differentiation
@@ -3730,7 +3742,7 @@ sub config_to_command {
push @$machineFlags, 'accel=tcg';
}
my $power_state_flags =
- PVE::QemuServer::Machine::get_power_state_flags($machine_conf, $arch, $version_guard);
+ PVE::QemuServer::Machine::get_power_state_flags($machine_conf, $version_guard);
push $cmd->@*, $power_state_flags->@* if defined($power_state_flags);
push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga, $machine_type);
@@ -4129,7 +4141,7 @@ sub qemu_devicedelverify {
sleep 1;
}
- die "error on hot-unplugging device '$deviceid' - still busy in guest?\n";
+ die "error on hot-unplugging device '$deviceid'\n";
}
sub qemu_findorcreatescsihw {
@@ -4217,6 +4229,29 @@ sub qemu_netdevadd {
my ($vmid, $conf, $arch, $device, $deviceid) = @_;
my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
+
+ # For VPP bridges, add chardev first then netdev via QMP
+ if ($device->{bridge} && $device->{bridge} =~ /^vppbr\d+$/) {
+ my $socket = "/var/run/vpp/qemu-${vmid}-${deviceid}.sock";
+ mon_cmd(
+ $vmid, "chardev-add",
+ id => "vhost-user-${deviceid}",
+ backend => {
+ type => 'socket',
+ data => {
+ addr => { type => 'unix', data => { path => $socket } },
+ server => JSON::true,
+ wait => JSON::false,
+ },
+ },
+ );
+ my %options = split(/[=,]/, $netdev);
+ mon_cmd($vmid, "netdev_add", %options);
+ # Connect VPP side
+ vpp_connect_vhost_nets($conf, $vmid);
+ return 1;
+ }
+
my %options = split(/[=,]/, $netdev);
if (defined(my $vhost = $options{vhost})) {
@@ -4356,7 +4391,7 @@ sub qemu_volume_snapshot {
print "external qemu snapshot\n";
my $snapshots = PVE::Storage::volume_snapshot_info($storecfg, $volid);
my $parent_snap = $snapshots->{'current'}->{parent};
- PVE::QemuServer::VolumeChain::blockdev_external_snapshot(
+ PVE::QemuServer::Blockdev::blockdev_external_snapshot(
$storecfg, $vmid, $machine_version, $deviceid, $drive, $snap, $parent_snap,
);
} elsif ($do_snapshots_type eq 'storage') {
@@ -4414,7 +4449,7 @@ sub qemu_volume_snapshot_delete {
# improve-me: if firstsnap > child : commit, if firstsnap < child do a stream.
if (!$parentsnap) {
print "delete first snapshot $snap\n";
- PVE::QemuServer::VolumeChain::blockdev_commit(
+ PVE::QemuServer::Blockdev::blockdev_commit(
$storecfg,
$vmid,
$machine_version,
@@ -4426,7 +4461,7 @@ sub qemu_volume_snapshot_delete {
PVE::Storage::rename_snapshot($storecfg, $volid, $snap, $childsnap);
- PVE::QemuServer::VolumeChain::blockdev_replace(
+ PVE::QemuServer::Blockdev::blockdev_replace(
$storecfg,
$vmid,
$machine_version,
@@ -4439,7 +4474,7 @@ sub qemu_volume_snapshot_delete {
} else {
#intermediate snapshot, we always stream the snapshot to child snapshot
print "stream intermediate snapshot $snap to $childsnap\n";
- PVE::QemuServer::VolumeChain::blockdev_stream(
+ PVE::QemuServer::Blockdev::blockdev_stream(
$storecfg,
$vmid,
$machine_version,
@@ -4556,7 +4591,7 @@ sub vmconfig_hotplug_pending {
my $defaults = load_defaults();
my $arch = PVE::QemuServer::Helpers::get_vm_arch($conf);
- my $machine_type = PVE::QemuServer::Machine::get_vm_machine($conf);
+ my $machine_type = PVE::QemuServer::Machine::get_vm_machine($conf, undef, $arch);
# commit values which do not have any impact on running VM first
# Note: those option cannot raise errors, we we do not care about
@@ -4759,7 +4794,7 @@ sub vmconfig_hotplug_pending {
die "skip\n" if !$hotplug_features->{cpu};
qemu_cpu_hotplug($vmid, $conf, $value);
} elsif ($opt eq 'balloon') {
- # enable/disable ballooning device is not hotpluggable
+ # enable/disable balloning device is not hotpluggable
my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
my $new_balloon_enabled =
!!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
@@ -4971,7 +5006,6 @@ sub vmconfig_apply_pending {
$old_drive,
$new_drive,
);
- $conf->{pending}->{$opt} = print_drive($new_drive);
}
} elsif (defined($conf->{pending}->{$opt}) && $opt =~ m/^net\d+$/) {
my $new_net = PVE::QemuServer::Network::parse_net($conf->{pending}->{$opt});
@@ -5138,6 +5172,49 @@ sub vmconfig_update_net {
}
}
+sub vpp_connect_vhost_nets {
+ my ($conf, $vmid) = @_;
+
+ return if !-x '/usr/bin/vppctl';
+
+ foreach my $opt (keys %$conf) {
+ next if $opt !~ m/^net(\d+)$/;
+ my $net = PVE::QemuServer::Network::parse_net($conf->{$opt});
+ next if !$net || !$net->{bridge} || $net->{bridge} !~ /^vppbr(\d+)$/;
+
+ my $bd_id = $1;
+ my $socket = "/var/run/vpp/qemu-${vmid}-${opt}.sock";
+
+ eval {
+ my $iface_name = '';
+ PVE::Tools::run_command(
+ [
+ '/usr/bin/vppctl', 'create', 'vhost-user',
+ 'socket', $socket, 'server',
+ ],
+ outfunc => sub { $iface_name .= $_[0]; },
+ timeout => 10,
+ );
+ $iface_name =~ s/^\s+|\s+$//g;
+ die "vppctl did not return interface name\n" if !$iface_name;
+
+ PVE::Tools::run_command(
+ ['/usr/bin/vppctl', 'set', 'interface', 'state', $iface_name, 'up'],
+ timeout => 5,
+ );
+ PVE::Tools::run_command(
+ [
+ '/usr/bin/vppctl', 'set', 'interface', 'l2', 'bridge',
+ $iface_name, $bd_id,
+ ],
+ timeout => 5,
+ );
+ print "VPP: connected $iface_name to bridge-domain $bd_id via $socket\n";
+ };
+ warn "VPP vhost-user setup failed for $opt: $@" if $@;
+ }
+}
+
sub vmconfig_update_agent {
my ($conf, $opt, $value) = @_;
@@ -5402,18 +5479,16 @@ my sub check_efi_vars {
return if PVE::QemuConfig->is_template($conf);
return if !$conf->{efidisk0};
+ return if !$conf->{ostype};
+ return if $conf->{ostype} ne 'win10' && $conf->{ostype} ne 'win11';
my $efidisk = parse_drive('efidisk0', $conf->{efidisk0});
if (PVE::QemuServer::OVMF::should_enroll_ms_2023_cert($efidisk)) {
# TODO: make the first print a log_warn with PVE 9.2 to make it more noticeable!
- print "EFI disk without 'ms-cert=2023k' option, suggesting that not all UEFI 2023\n";
- print "certificates from Microsoft are enrolled yet. The UEFI 2011 certificates expire\n";
- print
- "in June 2026! The new certificates are required for secure boot update for Windows\n";
- print "and common Linux distributions. Use 'Disk Action > Enroll Updated Certificates'\n";
- print "in the UI or, while the VM is shut down, run 'qm enroll-efi-keys $vmid' to enroll\n";
- print "the new certificates.\n\n";
- print "For Windows with BitLocker, run the following command inside Powershell:\n";
+ print "EFI disk without 'ms-cert=2023w' option, suggesting that the Microsoft UEFI 2023"
+ . " certificate is not enrolled yet. The UEFI 2011 certificate expires in June 2026!\n";
+ print "While the VM is shut down, run 'qm enroll-efi-keys $vmid' to enroll it.\n";
+ print "If the VM uses BitLocker, run the following command inside Windows Powershell:\n";
print " manage-bde -protectors -disable <drive>\n";
print "for each drive with BitLocker (for example, <drive> could be 'C:').\n";
}
@@ -5564,6 +5639,9 @@ sub vm_start_nolock {
PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
+ # VPP bridges require shared memory (hugepages) for vhost-user to work
+
+
my $forcemachine = $params->{forcemachine};
my $forcecpu = $params->{forcecpu};
my $nets_host_mtu = $params->{'nets-host-mtu'};
@@ -5756,6 +5834,7 @@ sub vm_start_nolock {
}
}
+ vpp_connect_vhost_nets($conf, $vmid);
my $exitcode = run_command($cmd, %run_params);
eval { PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets); };
log_warn("closing virtiofs sockets failed - $@") if $@;
@@ -7246,7 +7325,7 @@ sub pbs_live_restore {
$live_restore_backing->{$confname} = { name => $pbs_name };
# add blockdev information
- my $machine_type = PVE::QemuServer::Machine::get_vm_machine($conf);
+ my $machine_type = PVE::QemuServer::Machine::get_vm_machine($conf, undef, $conf->{arch});
my $machine_version = PVE::QemuServer::Machine::extract_version(
$machine_type,
PVE::QemuServer::Helpers::kvm_user_version(),
@@ -7297,7 +7376,9 @@ sub pbs_live_restore {
}
mon_cmd($vmid, 'cont');
- PVE::QemuServer::BlockJob::monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
+ PVE::QemuServer::BlockJob::qemu_drive_mirror_monitor(
+ $vmid, undef, $jobs, 'auto', 0, 'stream',
+ );
print "restore-drive jobs finished successfully, removing all tracking block devices"
. " to disconnect from Proxmox Backup Server\n";
@@ -7353,7 +7434,7 @@ sub live_import_from_files {
$live_restore_backing->{$dev} = { name => "drive-$dev-restore" };
- my $machine_type = PVE::QemuServer::Machine::get_vm_machine($conf);
+ my $machine_type = PVE::QemuServer::Machine::get_vm_machine($conf, undef, $conf->{arch});
my $machine_version = PVE::QemuServer::Machine::extract_version(
$machine_type,
PVE::QemuServer::Helpers::kvm_user_version(),
@@ -7416,7 +7497,9 @@ sub live_import_from_files {
}
mon_cmd($vmid, 'cont');
- PVE::QemuServer::BlockJob::monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
+ PVE::QemuServer::BlockJob::qemu_drive_mirror_monitor(
+ $vmid, undef, $jobs, 'auto', 0, 'stream',
+ );
print "restore-drive jobs finished successfully, removing all tracking block devices\n";
@@ -7924,7 +8007,9 @@ sub clone_disk {
# if this is the case, we have to complete any block-jobs still there from
# previous drive-mirrors
if (($completion && $completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
- PVE::QemuServer::BlockJob::monitor($vmid, $newvmid, $jobs, $completion, $qga);
+ PVE::QemuServer::BlockJob::qemu_drive_mirror_monitor(
+ $vmid, $newvmid, $jobs, $completion, $qga,
+ );
}
goto no_data_clone;
}
diff --git a/src/PVE/QemuServer/Memory.pm b/src/PVE/QemuServer/Memory.pm
index 7ebfc545..2765751e 100644
--- a/src/PVE/QemuServer/Memory.pm
+++ b/src/PVE/QemuServer/Memory.pm
@@ -476,6 +476,10 @@ sub config {
push @$cmd, '-object',
'memory-backend-memfd,id=virtiofs-mem' . ",size=$conf->{memory}M,share=on";
push @$machine_flags, 'memory-backend=virtiofs-mem';
+ } elsif (has_vpp_bridge($conf)) {
+ push @$cmd, '-object',
+ 'memory-backend-memfd,id=vpp-mem' . ",size=$conf->{memory}M,share=on";
+ push @$machine_flags, 'memory-backend=vpp-mem';
}
if ($hotplug) {
@@ -499,6 +503,16 @@ sub config {
}
}
+sub has_vpp_bridge {
+ my ($conf) = @_;
+ for my $opt (keys %$conf) {
+ next if $opt !~ m/^net\d+$/;
+ my $net = PVE::QemuServer::Network::parse_net($conf->{$opt});
+ return 1 if $net && $net->{bridge} && $net->{bridge} =~ /^vppbr\d+$/;
+ }
+ return 0;
+}
+
sub print_mem_object {
my ($conf, $id, $size) = @_;
@@ -508,7 +522,7 @@ sub print_mem_object {
my $path = hugepages_mount_path($hugepages_size);
return "memory-backend-file,id=$id,size=${size}M,mem-path=$path,share=on,prealloc=yes";
- } elsif ($id =~ m/^virtiofs-mem/) {
+ } elsif ($id =~ m/^(?:virtiofs|vpp)-mem/) {
return "memory-backend-memfd,id=$id,size=${size}M,share=on";
} else {
return "memory-backend-ram,id=$id,size=${size}M";
--
2.50.1 (Apple Git-155)
next prev parent reply other threads:[~2026-03-17 11:14 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-16 22:28 [RFC PATCH 0/2] network: add VPP (fd.io) as alternative dataplane Ryosuke Nakayama
2026-03-16 22:28 ` [RFC PATCH manager 1/2] api: network: add VPP (fd.io) dataplane bridge support Ryosuke Nakayama
2026-03-16 22:28 ` [RFC PATCH widget-toolkit 2/2] ui: network: add VPP (fd.io) bridge type support Ryosuke Nakayama
2026-03-17 6:39 ` [RFC PATCH 0/2] network: add VPP (fd.io) as alternative dataplane Stefan Hanreich
2026-03-17 10:18 ` DERUMIER, Alexandre
2026-03-17 11:14 ` Ryosuke Nakayama
2026-03-17 11:14 ` Ryosuke Nakayama [this message]
2026-03-17 11:14 ` [RFC PATCH qemu-server 2/2] qemu: VPP: clean up vhost-user interfaces on stop, fix tx_queue_size Ryosuke Nakayama
2026-03-17 11:26 ` [RFC PATCH qemu-server 1/2] qemu: add VPP vhost-user dataplane support Ryosuke Nakayama
2026-03-17 11:21 ` [RFC PATCH 0/2] network: add VPP (fd.io) as alternative dataplane Ryosuke Nakayama
2026-03-17 11:21 ` [RFC PATCH pve-common] network: add VPP bridge helpers for vhost-user dataplane Ryosuke Nakayama
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260317111404.37254-2-ryosuke.nakayama@ryskn.com \
--to=ryosuke.nakayama@ryskn.com \
--cc=pve-devel@lists.proxmox.com \
--cc=ryosuke_666@icloud.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.