From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 1AE8F63D57 for ; Thu, 27 Jan 2022 15:02:34 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id D798C2393F for ; Thu, 27 Jan 2022 15:02:03 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 4DED4237FE for ; Thu, 27 Jan 2022 15:02:00 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 2754E46CC7 for ; Thu, 27 Jan 2022 15:02:00 +0100 (CET) From: Fabian Ebner To: pve-devel@lists.proxmox.com Date: Thu, 27 Jan 2022 15:01:53 +0100 Message-Id: <20220127140155.66141-3-f.ebner@proxmox.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220127140155.66141-1-f.ebner@proxmox.com> References: <20220127140155.66141-1-f.ebner@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.133 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pve-devel] [PATCH qemu-server 2/4] api: clone: fork before locking X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 27 Jan 2022 14:02:34 -0000 using the familiar early+repeated checks pattern from other API calls. Only intended functional changes are with regard to locking/forking. For a full clone of a running VM without guest agent, this also fixes issuing vm_{resume,suspend} calls for drive mirror completion. Previously, those just timed out, because of not getting the lock: > create full clone of drive scsi0 (rbdkvm:vm-104-disk-0) > Formatting '/var/lib/vz/images/105/vm-105-disk-0.raw', fmt=raw > size=4294967296 preallocation=off > drive mirror is starting for drive-scsi0 > drive-scsi0: transferred 2.0 MiB of 4.0 GiB (0.05%) in 0s > drive-scsi0: transferred 635.0 MiB of 4.0 GiB (15.50%) in 1s > drive-scsi0: transferred 1.6 GiB of 4.0 GiB (40.50%) in 2s > drive-scsi0: transferred 3.6 GiB of 4.0 GiB (90.23%) in 3s > drive-scsi0: transferred 4.0 GiB of 4.0 GiB (100.00%) in 4s, ready > all 'mirror' jobs are ready > suspend vm > trying to acquire lock... > can't lock file '/var/lock/qemu-server/lock-104.conf' - got timeout > drive-scsi0: Cancelling block job > drive-scsi0: Done. > resume vm > trying to acquire lock... > can't lock file '/var/lock/qemu-server/lock-104.conf' - got timeout Signed-off-by: Fabian Ebner --- Best viewed with -w. PVE/API2/Qemu.pm | 220 ++++++++++++++++++++++++----------------------- 1 file changed, 112 insertions(+), 108 deletions(-) diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm index 6992f6f..38e08c8 100644 --- a/PVE/API2/Qemu.pm +++ b/PVE/API2/Qemu.pm @@ -3079,9 +3079,7 @@ __PACKAGE__->register_method({ my $running = PVE::QemuServer::check_running($vmid) || 0; - my $clonefn = sub { - # do all tests after lock but before forking worker - if possible - + my $load_and_check = sub { my $conf = PVE::QemuConfig->load_config($vmid); PVE::QemuConfig->check_lock($conf); @@ -3091,7 +3089,7 @@ __PACKAGE__->register_method({ die "snapshot '$snapname' does not exist\n" if $snapname && !defined( $conf->{snapshots}->{$snapname}); - my $full = extract_param($param, 'full') // !PVE::QemuConfig->is_template($conf); + my $full = $param->{full} // !PVE::QemuConfig->is_template($conf); die "parameter 'storage' not allowed for linked clones\n" if defined($storage) && !$full; @@ -3156,7 +3154,13 @@ __PACKAGE__->register_method({ } } - # auto generate a new uuid + return ($conffile, $newconf, $oldconf, $vollist, $drives, $fullclone); + }; + + my $clonefn = sub { + my ($conffile, $newconf, $oldconf, $vollist, $drives, $fullclone) = $load_and_check->(); + + # auto generate a new uuid my $smbios1 = PVE::QemuServer::parse_smbios1($newconf->{smbios1} || ''); $smbios1->{uuid} = PVE::QemuServer::generate_uuid(); $newconf->{smbios1} = PVE::QemuServer::print_smbios1($smbios1); @@ -3181,105 +3185,99 @@ __PACKAGE__->register_method({ # FIXME use PVE::QemuConfig->create_and_lock_config and adapt code PVE::Tools::file_set_contents($conffile, "# qmclone temporary file\nlock: clone\n"); - my $realcmd = sub { - my $upid = shift; - - my $newvollist = []; - my $jobs = {}; - - eval { - local $SIG{INT} = - local $SIG{TERM} = - local $SIG{QUIT} = - local $SIG{HUP} = sub { die "interrupted by signal\n"; }; - - PVE::Storage::activate_volumes($storecfg, $vollist, $snapname); - - my $bwlimit = extract_param($param, 'bwlimit'); - - my $total_jobs = scalar(keys %{$drives}); - my $i = 1; - - foreach my $opt (sort keys %$drives) { - my $drive = $drives->{$opt}; - my $skipcomplete = ($total_jobs != $i); # finish after last drive - my $completion = $skipcomplete ? 'skip' : 'complete'; - - my $src_sid = PVE::Storage::parse_volume_id($drive->{file}); - my $storage_list = [ $src_sid ]; - push @$storage_list, $storage if defined($storage); - my $clonelimit = PVE::Storage::get_bandwidth_limit('clone', $storage_list, $bwlimit); - - my $newdrive = PVE::QemuServer::clone_disk( - $storecfg, - $vmid, - $running, - $opt, - $drive, - $snapname, - $newid, - $storage, - $format, - $fullclone->{$opt}, - $newvollist, - $jobs, - $completion, - $oldconf->{agent}, - $clonelimit, - $oldconf - ); - - $newconf->{$opt} = PVE::QemuServer::print_drive($newdrive); - - PVE::QemuConfig->write_config($newid, $newconf); - $i++; - } - - delete $newconf->{lock}; - - # do not write pending changes - if (my @changes = keys %{$newconf->{pending}}) { - my $pending = join(',', @changes); - warn "found pending changes for '$pending', discarding for clone\n"; - delete $newconf->{pending}; - } - - PVE::QemuConfig->write_config($newid, $newconf); - - if ($target) { - # always deactivate volumes - avoid lvm LVs to be active on several nodes - PVE::Storage::deactivate_volumes($storecfg, $vollist, $snapname) if !$running; - PVE::Storage::deactivate_volumes($storecfg, $newvollist); - - my $newconffile = PVE::QemuConfig->config_file($newid, $target); - die "Failed to move config to node '$target' - rename failed: $!\n" - if !rename($conffile, $newconffile); - } - - PVE::AccessControl::add_vm_to_pool($newid, $pool) if $pool; - }; - if (my $err = $@) { - eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) }; - sleep 1; # some storage like rbd need to wait before release volume - really? - - foreach my $volid (@$newvollist) { - eval { PVE::Storage::vdisk_free($storecfg, $volid); }; - warn $@ if $@; - } - - PVE::Firewall::remove_vmfw_conf($newid); - - unlink $conffile; # avoid races -> last thing before die - - die "clone failed: $err"; - } - - return; - }; - PVE::Firewall::clone_vmfw_conf($vmid, $newid); - return $rpcenv->fork_worker('qmclone', $vmid, $authuser, $realcmd); + my $newvollist = []; + my $jobs = {}; + + eval { + local $SIG{INT} = + local $SIG{TERM} = + local $SIG{QUIT} = + local $SIG{HUP} = sub { die "interrupted by signal\n"; }; + + PVE::Storage::activate_volumes($storecfg, $vollist, $snapname); + + my $bwlimit = extract_param($param, 'bwlimit'); + + my $total_jobs = scalar(keys %{$drives}); + my $i = 1; + + foreach my $opt (sort keys %$drives) { + my $drive = $drives->{$opt}; + my $skipcomplete = ($total_jobs != $i); # finish after last drive + my $completion = $skipcomplete ? 'skip' : 'complete'; + + my $src_sid = PVE::Storage::parse_volume_id($drive->{file}); + my $storage_list = [ $src_sid ]; + push @$storage_list, $storage if defined($storage); + my $clonelimit = PVE::Storage::get_bandwidth_limit('clone', $storage_list, $bwlimit); + + my $newdrive = PVE::QemuServer::clone_disk( + $storecfg, + $vmid, + $running, + $opt, + $drive, + $snapname, + $newid, + $storage, + $format, + $fullclone->{$opt}, + $newvollist, + $jobs, + $completion, + $oldconf->{agent}, + $clonelimit, + $oldconf + ); + + $newconf->{$opt} = PVE::QemuServer::print_drive($newdrive); + + PVE::QemuConfig->write_config($newid, $newconf); + $i++; + } + + delete $newconf->{lock}; + + # do not write pending changes + if (my @changes = keys %{$newconf->{pending}}) { + my $pending = join(',', @changes); + warn "found pending changes for '$pending', discarding for clone\n"; + delete $newconf->{pending}; + } + + PVE::QemuConfig->write_config($newid, $newconf); + + if ($target) { + # always deactivate volumes - avoid lvm LVs to be active on several nodes + PVE::Storage::deactivate_volumes($storecfg, $vollist, $snapname) if !$running; + PVE::Storage::deactivate_volumes($storecfg, $newvollist); + + my $newconffile = PVE::QemuConfig->config_file($newid, $target); + die "Failed to move config to node '$target' - rename failed: $!\n" + if !rename($conffile, $newconffile); + } + + PVE::AccessControl::add_vm_to_pool($newid, $pool) if $pool; + }; + if (my $err = $@) { + eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) }; + sleep 1; # some storage like rbd need to wait before release volume - really? + + foreach my $volid (@$newvollist) { + eval { PVE::Storage::vdisk_free($storecfg, $volid); }; + warn $@ if $@; + } + + PVE::Firewall::remove_vmfw_conf($newid); + + unlink $conffile; # avoid races -> last thing before die + + die "clone failed: $err"; + } + + return; }; # Aquire exclusive lock lock for $newid @@ -3287,12 +3285,18 @@ __PACKAGE__->register_method({ return PVE::QemuConfig->lock_config_full($newid, 1, $clonefn); }; - # exclusive lock if VM is running - else shared lock is enough; - if ($running) { - return PVE::QemuConfig->lock_config_full($vmid, 1, $lock_target_vm); - } else { - return PVE::QemuConfig->lock_config_shared($vmid, 1, $lock_target_vm); - } + my $lock_source_vm = sub { + # exclusive lock if VM is running - else shared lock is enough; + if ($running) { + return PVE::QemuConfig->lock_config_full($vmid, 1, $lock_target_vm); + } else { + return PVE::QemuConfig->lock_config_shared($vmid, 1, $lock_target_vm); + } + }; + + $load_and_check->(); # early checks before forking/locking + + return $rpcenv->fork_worker('qmclone', $vmid, $authuser, $lock_source_vm); }}); __PACKAGE__->register_method({ -- 2.30.2