public inbox for pve-devel@lists.proxmox.com
 help / color / mirror / Atom feed
* [pve-devel] [PATCH qemu-server 1/2] vm_resume: fix nocheck/migrate handling
@ 2022-11-21 12:16 Fabian Grünbichler
  2022-11-21 12:16 ` [pve-devel] [PATCH qemu-server 2/2] explain 'nocheck' in more places Fabian Grünbichler
  2022-11-21 13:08 ` [pve-devel] applied: [PATCH qemu-server 1/2] vm_resume: fix nocheck/migrate handling Thomas Lamprecht
  0 siblings, 2 replies; 3+ messages in thread
From: Fabian Grünbichler @ 2022-11-21 12:16 UTC (permalink / raw)
  To: pve-devel

it's not deterministic whether the rename/move of the VM config
triggered on the source side of a migration is already visible on the
target side when vm_resume is executed. check the vmlist for the node
where the config is currently located if $nocheck is set - it is now
needed to add the forwarding DB entries to the bridge.

this fixes an issue on busier or slower clusters, where pmxcfs hasn't
yet processed the rename, and resuming would fail with an error about
the config not existing.

Reported-by: Dominik Csapak <d.csapak@proxmox.com>

Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
 PVE/QemuServer.pm | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 721633d8..29110c0f 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -6366,7 +6366,17 @@ sub vm_resume {
 	my $res = mon_cmd($vmid, 'query-status');
 	my $resume_cmd = 'cont';
 	my $reset = 0;
-	my $conf = PVE::QemuConfig->load_config($vmid);
+	my $conf;
+	if ($nocheck) {
+ 	    my $vmlist = PVE::Cluster::get_vmlist();
+	    my $node;
+	    if (exists($vmlist->{ids}->{$vmid})) {
+		$node = $vmlist->{ids}->{$vmid}->{node};
+	    }
+	    $conf = PVE::QemuConfig->load_config($vmid, $node);
+	} else {
+	    $conf = PVE::QemuConfig->load_config($vmid);
+    	}
 
 	if ($res->{status}) {
 	    return if $res->{status} eq 'running'; # job done, go home
@@ -6375,7 +6385,6 @@ sub vm_resume {
 	}
 
 	if (!$nocheck) {
-
 	    PVE::QemuConfig->check_lock($conf)
 		if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
 	}
-- 
2.30.2





^ permalink raw reply	[flat|nested] 3+ messages in thread

* [pve-devel] [PATCH qemu-server 2/2] explain 'nocheck' in more places
  2022-11-21 12:16 [pve-devel] [PATCH qemu-server 1/2] vm_resume: fix nocheck/migrate handling Fabian Grünbichler
@ 2022-11-21 12:16 ` Fabian Grünbichler
  2022-11-21 13:08 ` [pve-devel] applied: [PATCH qemu-server 1/2] vm_resume: fix nocheck/migrate handling Thomas Lamprecht
  1 sibling, 0 replies; 3+ messages in thread
From: Fabian Grünbichler @ 2022-11-21 12:16 UTC (permalink / raw)
  To: pve-devel

was only explained in git history and vm_stop, add comments in other
relevant places to avoid future breakage.

Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
 PVE/API2/Qemu.pm   | 2 ++
 PVE/CLI/qm.pm      | 2 ++
 PVE/QemuMigrate.pm | 3 ++-
 PVE/QemuServer.pm  | 9 +++++++++
 4 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
index 6bdcce21..badfc37b 100644
--- a/PVE/API2/Qemu.pm
+++ b/PVE/API2/Qemu.pm
@@ -3200,6 +3200,8 @@ __PACKAGE__->register_method({
 	raise_param_exc({ skiplock => "Only root may use this option." })
 	    if $skiplock && $authuser ne 'root@pam';
 
+	# nocheck is used as part of migration when config file might be still
+	# be on source node
 	my $nocheck = extract_param($param, 'nocheck');
 	raise_param_exc({ nocheck => "Only root may use this option." })
 	    if $nocheck && $authuser ne 'root@pam';
diff --git a/PVE/CLI/qm.pm b/PVE/CLI/qm.pm
index 66feecce..3e0f1289 100755
--- a/PVE/CLI/qm.pm
+++ b/PVE/CLI/qm.pm
@@ -426,6 +426,8 @@ __PACKAGE__->register_method ({
 		last;
 	    } elsif ($line =~ /^resume (\d+)$/) {
 		my $vmid = $1;
+		# check_running and vm_resume with nocheck, since local node
+		# might not have processed config move/rename yet
 		if (PVE::QemuServer::check_running($vmid, 1)) {
 		    eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
 		    if ($@) {
diff --git a/PVE/QemuMigrate.pm b/PVE/QemuMigrate.pm
index 5941cce6..5e466d95 100644
--- a/PVE/QemuMigrate.pm
+++ b/PVE/QemuMigrate.pm
@@ -1500,6 +1500,7 @@ sub phase3_cleanup {
 		    $self->{errors} = 1;
 		}
 	    } else {
+		# nocheck in case target node hasn't processed the config move/rename yet
 		my $cmd = [@{$self->{rem_ssh}}, 'qm', 'resume', $vmid, '--skiplock', '--nocheck'];
 		my $logf = sub {
 		    my $line = shift;
@@ -1561,7 +1562,7 @@ sub phase3_cleanup {
 	}
     };
 
-    # always stop local VM
+    # always stop local VM with nocheck, since config is moved already
     eval { PVE::QemuServer::vm_stop($self->{storecfg}, $vmid, 1, 1); };
     if (my $err = $@) {
 	$self->log('err', "stopping vm failed - $err");
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 29110c0f..e1ece548 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -2788,6 +2788,12 @@ sub check_local_storage_availability {
 sub check_running {
     my ($vmid, $nocheck, $node) = @_;
 
+    # $nocheck is set when called during a migration, in which case the config
+    # file might still or already reside on the *other* node
+    # - because rename has already happened, and current node is source
+    # - because rename hasn't happened yet, and current node is target
+    # - because rename has happened, current node is target, but hasn't yet
+    # processed it yet
     PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
     return PVE::QemuServer::Helpers::vm_running_locally($vmid);
 }
@@ -6359,6 +6365,9 @@ sub vm_suspend {
     }
 }
 
+# $nocheck is set when called as part of a migration - in this context the
+# location of the config file (source or target node) is not deterministic,
+# since migration cannot wait for pmxcfs to process the rename
 sub vm_resume {
     my ($vmid, $skiplock, $nocheck) = @_;
 
-- 
2.30.2





^ permalink raw reply	[flat|nested] 3+ messages in thread

* [pve-devel] applied: [PATCH qemu-server 1/2] vm_resume: fix nocheck/migrate handling
  2022-11-21 12:16 [pve-devel] [PATCH qemu-server 1/2] vm_resume: fix nocheck/migrate handling Fabian Grünbichler
  2022-11-21 12:16 ` [pve-devel] [PATCH qemu-server 2/2] explain 'nocheck' in more places Fabian Grünbichler
@ 2022-11-21 13:08 ` Thomas Lamprecht
  1 sibling, 0 replies; 3+ messages in thread
From: Thomas Lamprecht @ 2022-11-21 13:08 UTC (permalink / raw)
  To: Proxmox VE development discussion, Fabian Grünbichler

Am 21/11/2022 um 13:16 schrieb Fabian Grünbichler:
> it's not deterministic whether the rename/move of the VM config
> triggered on the source side of a migration is already visible on the
> target side when vm_resume is executed. check the vmlist for the node
> where the config is currently located if $nocheck is set - it is now
> needed to add the forwarding DB entries to the bridge.
> 
> this fixes an issue on busier or slower clusters, where pmxcfs hasn't
> yet processed the rename, and resuming would fail with an error about
> the config not existing.
> 
> Reported-by: Dominik Csapak <d.csapak@proxmox.com>
> 
> Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
> ---
>  PVE/QemuServer.pm | 13 +++++++++++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
> 
>

applied both patches and fixed up the indentation errors in this one, thanks!




^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-11-21 13:09 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-21 12:16 [pve-devel] [PATCH qemu-server 1/2] vm_resume: fix nocheck/migrate handling Fabian Grünbichler
2022-11-21 12:16 ` [pve-devel] [PATCH qemu-server 2/2] explain 'nocheck' in more places Fabian Grünbichler
2022-11-21 13:08 ` [pve-devel] applied: [PATCH qemu-server 1/2] vm_resume: fix nocheck/migrate handling Thomas Lamprecht

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal