From: "Fabian Grünbichler" <f.gruenbichler@proxmox.com>
To: Proxmox VE development discussion <pve-devel@lists.proxmox.com>
Subject: Re: [pve-devel] [PATCH v3 qemu-server 08/11] blockdev: convert drive_mirror to blockdev_mirror
Date: Wed, 8 Jan 2025 16:19:40 +0100 (CET) [thread overview]
Message-ID: <1003019493.55.1736349580140@webmail.proxmox.com> (raw)
In-Reply-To: <mailman.222.1734340400.332.pve-devel@lists.proxmox.com>
> Alexandre Derumier via pve-devel <pve-devel@lists.proxmox.com> hat am 16.12.2024 10:12 CET geschrieben:
> Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
> ---
> PVE/QemuMigrate.pm | 2 +-
> PVE/QemuServer.pm | 106 +++++++++++++++++++++++++++++++++++----------
> 2 files changed, 83 insertions(+), 25 deletions(-)
>
> diff --git a/PVE/QemuMigrate.pm b/PVE/QemuMigrate.pm
> index ed5ede30..88627ce4 100644
> --- a/PVE/QemuMigrate.pm
> +++ b/PVE/QemuMigrate.pm
> @@ -1134,7 +1134,7 @@ sub phase2 {
> my $bitmap = $target->{bitmap};
>
> $self->log('info', "$drive: start migration to $nbd_uri");
> - PVE::QemuServer::qemu_drive_mirror($vmid, $drive, $nbd_uri, $vmid, undef, $self->{storage_migration_jobs}, 'skip', undef, $bwlimit, $bitmap);
> + PVE::QemuServer::qemu_drive_mirror($vmid, $drive, $source_drive, $nbd_uri, $vmid, undef, $self->{storage_migration_jobs}, 'skip', undef, $bwlimit, $bitmap);
> }
>
> if (PVE::QemuServer::QMPHelpers::runs_at_least_qemu_version($vmid, 8, 2)) {
> diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
> index 6bebb906..3d7c41ee 100644
> --- a/PVE/QemuServer.pm
> +++ b/PVE/QemuServer.pm
> @@ -8184,59 +8184,85 @@ sub qemu_img_convert {
> }
>
> sub qemu_drive_mirror {
> - my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
> + my ($vmid, $driveid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
the $driveid is contained in $drive (in the form of index and interface). this would still be a breaking change since $drive before was the $driveid, and now it's the parsed drive ;)
>
> $jobs = {} if !$jobs;
> + my $deviceid = "drive-$driveid";
> + my $dst_format;
> + my $dst_path = $dst_volid;
> + my $jobid = "mirror-$deviceid";
> + $jobs->{$jobid} = {};
>
> - my $qemu_target;
> - my $format;
> - $jobs->{"drive-$drive"} = {};
> + my $storecfg = PVE::Storage::config();
>
> if ($dst_volid =~ /^nbd:/) {
> - $qemu_target = $dst_volid;
> - $format = "nbd";
> + $dst_format = "nbd";
> } else {
> - my $storecfg = PVE::Storage::config();
> -
> - $format = checked_volume_format($storecfg, $dst_volid);
> -
> - my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
> -
> - $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
> + $dst_format = checked_volume_format($storecfg, $dst_volid);
> + $dst_path = PVE::Storage::path($storecfg, $dst_volid);
> + }
> +
> + # copy original drive config (aio,cache,discard,...)
> + my $dst_drive = dclone($drive);
> + $dst_drive->{format} = $dst_format;
> + $dst_drive->{file} = $dst_path;
> + $dst_drive->{zeroinit} = 1 if $is_zero_initialized;
> + #improve: if target storage don't support aio uring,change it to default native
> + #and remove clone_disk_check_io_uring()
> +
> + #add new block device
> + my $nodes = get_blockdev_nodes($vmid);
> +
> + my $target_fmt_nodename = get_blockdev_nextid("fmt-$deviceid", $nodes);
> + my $target_file_nodename = get_blockdev_nextid("file-$deviceid", $nodes);
> + my $target_file_blockdev = generate_file_blockdev($storecfg, $dst_drive, $target_file_nodename);
> + my $target_nodename = undef;
> +
> + if ($dst_format eq 'nbd') {
> + #nbd file don't have fmt
> + $target_nodename = $target_file_nodename;
> + PVE::QemuServer::Monitor::mon_cmd($vmid, 'blockdev-add', %$target_file_blockdev);
> + } else {
> + $target_nodename = $target_fmt_nodename;
> + my $target_fmt_blockdev = generate_format_blockdev($storecfg, $dst_drive, $target_fmt_nodename, $target_file_blockdev);
> + PVE::QemuServer::Monitor::mon_cmd($vmid, 'blockdev-add', %$target_fmt_blockdev);
> }
>
> + #we replace the original src_fmt node in the blockdev graph
> + my $src_fmt_nodename = find_fmt_nodename_drive($storecfg, $vmid, $drive, $nodes);
> my $opts = {
> + 'job-id' => $jobid,
> timeout => 10,
> - device => "drive-$drive",
> - mode => "existing",
> + device => $deviceid,
> + replaces => $src_fmt_nodename,
> sync => "full",
> - target => $qemu_target,
> + target => $target_nodename,
> 'auto-dismiss' => JSON::false,
> };
> - $opts->{format} = $format if $format;
>
> if (defined($src_bitmap)) {
> $opts->{sync} = 'incremental';
> - $opts->{bitmap} = $src_bitmap;
> + $opts->{bitmap} = $src_bitmap; ##FIXME: how to handle bitmap ? special proxmox patch ?
> print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
> }
>
> if (defined($bwlimit)) {
> $opts->{speed} = $bwlimit * 1024;
> - print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
> + print "drive mirror is starting for $deviceid with bandwidth limit: ${bwlimit} KB/s\n";
> } else {
> - print "drive mirror is starting for drive-$drive\n";
> + print "drive mirror is starting for $deviceid\n";
> }
>
> # if a job already runs for this device we get an error, catch it for cleanup
> - eval { mon_cmd($vmid, "drive-mirror", %$opts); };
> + eval { mon_cmd($vmid, "blockdev-mirror", %$opts); };
> +
> if (my $err = $@) {
> eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
> + #FIXME: delete blockdev after job cancel
wouldn't we also need to keep track of the device IDs and pass those to the monitor invocation below? if the block job fails or gets canceled, we also need cleanup there..
> warn "$@\n" if $@;
> die "mirroring error: $err\n";
> }
> -
> - qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
> + qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga, 'mirror');
> }
>
> # $completion can be either
> @@ -8595,7 +8621,7 @@ sub clone_disk {
>
> my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
> if ($use_drive_mirror) {
> - qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
> + qemu_drive_mirror($vmid, $src_drivename, $drive, $newvolid, $newvmid, $sparseinit, $jobs,
> $completion, $qga, $bwlimit);
> } else {
> if ($dst_drivename eq 'efidisk0') {
> @@ -9130,6 +9156,38 @@ sub delete_ifaces_ipams_ips {
> }
> }
>
> +sub find_fmt_nodename_drive {
> + my ($storecfg, $vmid, $drive, $nodes) = @_;
> +
> + my $volid = $drive->{file};
> + my $format = checked_volume_format($storecfg, $volid);
$format is not used?
> + my $path = PVE::Storage::path($storecfg, $volid);
is this guaranteed to be stable? also across versions? and including external storage plugins?
> +
> + my $node = find_blockdev_node($nodes, $path, 'fmt');
that one is only added in a later patch.. but I don't think lookups by path are a good idea, we should probably have a deterministic node naming concept instead? e.g., encode the drive + snapshot name?
> + return $node->{'node-name'};
> +}
> +
> +sub get_blockdev_nextid {
> + my ($nodename, $nodes) = @_;
> + my $version = 0;
> + for my $nodeid (keys %$nodes) {
> + if ($nodeid =~ m/^$nodename-(\d+)$/) {
> + my $current_version = $1;
> + $version = $current_version if $current_version >= $version;
> + }
> + }
> + $version++;
> + return "$nodename-$version";
since we shouldn't ever have more than one job for a drive running (right?), couldn't we just have a deterministic name for this? that would also simplify cleanup, including cleanup of a failed cleanup ;)
> +}
> +
> +sub get_blockdev_nodes {
> + my ($vmid) = @_;
> +
> + my $nodes = PVE::QemuServer::Monitor::mon_cmd($vmid, "query-named-block-nodes");
> + $nodes = { map { $_->{'node-name'} => $_ } $nodes->@* };
> + return $nodes;
> +}
> +
> sub encode_json_ordered {
> return JSON->new->canonical->allow_nonref->encode( $_[0] );
> }
> --
> 2.39.5
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
next prev parent reply other threads:[~2025-01-08 15:20 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20241216091229.3142660-1-alexandre.derumier@groupe-cyllene.com>
2024-12-16 9:12 ` [pve-devel] [PATCH v1 pve-qemu 1/1] add block-commit-replaces option patch Alexandre Derumier via pve-devel
2025-01-08 13:27 ` Fabian Grünbichler
2025-01-10 7:55 ` DERUMIER, Alexandre via pve-devel
[not found] ` <34a164520eba035d1db5f70761b0f4aa59fecfa5.camel@groupe-cyllene.com>
2025-01-10 9:15 ` Fiona Ebner
2025-01-10 9:32 ` DERUMIER, Alexandre via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 01/11] blockdev: cmdline: convert drive to blockdev syntax Alexandre Derumier via pve-devel
2025-01-08 14:17 ` Fabian Grünbichler
2025-01-10 13:50 ` DERUMIER, Alexandre via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 pve-storage 1/3] qcow2: add external snapshot support Alexandre Derumier via pve-devel
2025-01-09 12:36 ` Fabian Grünbichler
2025-01-10 9:10 ` DERUMIER, Alexandre via pve-devel
[not found] ` <f25028d41a9588e82889b3ef869a14f33cbd216e.camel@groupe-cyllene.com>
2025-01-10 11:02 ` Fabian Grünbichler
2025-01-10 11:51 ` DERUMIER, Alexandre via pve-devel
[not found] ` <1caecaa23e5d57030a9e31f2f0e67648f1930d6a.camel@groupe-cyllene.com>
2025-01-10 12:20 ` Fabian Grünbichler
2025-01-10 13:14 ` DERUMIER, Alexandre via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 02/11] blockdev: fix cfg2cmd tests Alexandre Derumier via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 pve-storage 2/3] lvmplugin: add qcow2 snapshot Alexandre Derumier via pve-devel
2025-01-09 13:55 ` Fabian Grünbichler
2025-01-10 10:16 ` DERUMIER, Alexandre via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 03/11] blockdev : convert qemu_driveadd && qemu_drivedel Alexandre Derumier via pve-devel
2025-01-08 14:26 ` Fabian Grünbichler
2025-01-10 14:08 ` DERUMIER, Alexandre via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 pve-storage 3/3] storage: vdisk_free: remove external snapshots Alexandre Derumier via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 04/11] blockdev: vm_devices_list : fix block-query Alexandre Derumier via pve-devel
2025-01-08 14:31 ` Fabian Grünbichler
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 05/11] blockdev: convert cdrom media eject/insert Alexandre Derumier via pve-devel
2025-01-08 14:34 ` Fabian Grünbichler
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 06/11] blockdev: block_resize: convert to blockdev Alexandre Derumier via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 07/11] blockdev: nbd_export: block-export-add : use drive-$id for nodename Alexandre Derumier via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 08/11] blockdev: convert drive_mirror to blockdev_mirror Alexandre Derumier via pve-devel
2025-01-08 15:19 ` Fabian Grünbichler [this message]
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 09/11] blockdev: mirror: change aio on target if io_uring is not default Alexandre Derumier via pve-devel
2025-01-09 9:51 ` Fabian Grünbichler
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 10/11] blockdev: add backing_chain support Alexandre Derumier via pve-devel
2025-01-09 11:57 ` Fabian Grünbichler
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 11/11] qcow2: add external snapshot support Alexandre Derumier via pve-devel
2025-01-09 11:57 ` Fabian Grünbichler
2025-01-09 13:19 ` Fabio Fantoni via pve-devel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1003019493.55.1736349580140@webmail.proxmox.com \
--to=f.gruenbichler@proxmox.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox