From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 35D747EBC1 for ; Thu, 11 Nov 2021 13:33:51 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 25B78B728 for ; Thu, 11 Nov 2021 13:33:51 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id F140FB71B for ; Thu, 11 Nov 2021 13:33:49 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id AC4F6421AD for ; Thu, 11 Nov 2021 13:33:49 +0100 (CET) Date: Thu, 11 Nov 2021 13:33:42 +0100 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Fabian Ebner , pve-devel@lists.proxmox.com References: <20211105130359.40803-1-f.gruenbichler@proxmox.com> <20211105130359.40803-23-f.gruenbichler@proxmox.com> In-Reply-To: MIME-Version: 1.0 User-Agent: astroid/0.15.0 (https://github.com/astroidmail/astroid) Message-Id: <1636633657.57xkp8eh1r.astroid@nora.none> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-SPAM-LEVEL: Spam detection results: 0 AWL 0.279 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pve-devel] [PATCH qemu-server 10/10] api: add remote migrate endpoint X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 11 Nov 2021 12:33:51 -0000 On November 10, 2021 1:29 pm, Fabian Ebner wrote: > Am 05.11.21 um 14:03 schrieb Fabian Gr=C3=BCnbichler: >> Signed-off-by: Fabian Gr=C3=BCnbichler >> --- >>=20 >> Notes: >> the checks currently done before the actual migration worker is >> forked could be either moved to the client calling this (that then >> makes the required API calls) or extracted into a precond API call >> like for regular migration. >> =20 >> for testing it helps catch trivial mistakes early on, and the calls= shouldn't >> be too expensive, so I left them in for now.. >> =20 >> requires >> - pve-common with bridge-pair format >> - pve-guest-common with AbstractMigrate handling remote migration >>=20 >> PVE/API2/Qemu.pm | 205 ++++++++++++++++++++++++++++++++++++++++++++++- >> debian/control | 2 + >> 2 files changed, 205 insertions(+), 2 deletions(-) >>=20 >> diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm >> index 24f5b98..b931f04 100644 >> --- a/PVE/API2/Qemu.pm >> +++ b/PVE/API2/Qemu.pm >> @@ -14,6 +14,7 @@ use URI::Escape; >> use Crypt::OpenSSL::Random; >> use Socket qw(SOCK_STREAM); >> =20 >> +use PVE::APIClient::LWP; >> use PVE::Cluster qw (cfs_read_file cfs_write_file);; >> use PVE::RRD; >> use PVE::SafeSyslog; >> @@ -51,8 +52,6 @@ BEGIN { >> } >> } >> =20 >> -use Data::Dumper; # fixme: remove >> - >> use base qw(PVE::RESTHandler); >> =20 >> my $opt_force_description =3D "Force physical removal. Without this, w= e simple remove the disk from the config file and create an additional conf= iguration entry called 'unused[n]', which contains the volume ID. Unlink of= unused[n] always cause physical removal."; >> @@ -3778,6 +3777,208 @@ __PACKAGE__->register_method({ >> =20 >> }}); >> =20 >> +__PACKAGE__->register_method({ >> + name =3D> 'remote_migrate_vm', >> + path =3D> '{vmid}/remote_migrate', >> + method =3D> 'POST', >> + protected =3D> 1, >> + proxyto =3D> 'node', >> + description =3D> "Migrate virtual machine to a remote cluster. Crea= tes a new migration task.", >> + permissions =3D> { >> + check =3D> ['perm', '/vms/{vmid}', [ 'VM.Migrate' ]], >> + }, >> + parameters =3D> { >> + additionalProperties =3D> 0, >> + properties =3D> { >> + node =3D> get_standard_option('pve-node'), >> + vmid =3D> get_standard_option('pve-vmid', { completion =3D> \&PVE:= :QemuServer::complete_vmid }), >> + 'target-vmid' =3D> get_standard_option('pve-vmid', { optional =3D>= 1 }), >> + 'target-node' =3D> get_standard_option('pve-node', { >> + description =3D> "Target node on remote cluster.", >> + }), >> + 'target-endpoint' =3D> get_standard_option('proxmox-remote', { >> + description =3D> "Remote target endpoint", >> + }), >> + online =3D> { >> + type =3D> 'boolean', >> + description =3D> "Use online/live migration if VM is running. Ignored= if VM is stopped.", >> + optional =3D> 1, >> + }, >> + 'migration-network' =3D> { >> + type =3D> 'string', format =3D> 'CIDR', >> + description =3D> "CIDR of the (sub) network that is used for migratio= n.", >> + optional =3D> 1, >> + }, >> + 'with-local-disks' =3D> { >> + type =3D> 'boolean', >> + description =3D> "Enable live storage migration for local disk", >> + optional =3D> 1, >> + }, >> + delete =3D> { >> + type =3D> 'boolean', >> + description =3D> "Delete the original VM and related data after succe= ssful migration. By default the original VM is kept on the source cluster i= n a stopped state.", >> + optional =3D> 1, >> + default =3D> 0, >> + }, >> + 'target-storage' =3D> get_standard_option('pve-targetstorag= e', { >> + completion =3D> \&PVE::QemuServer::complete_migration_storage, >> + optional =3D> 0, >> + }), >> + 'target-bridge' =3D> { >> + type =3D> 'string', >> + description =3D> "Mapping from source to target bridges. Providing on= ly a single bridge ID maps all source bridges to that bridge. Providing the= special value '1' will map each source bridge to itself.", >> + format =3D> 'bridge-pair-list', >> + }, >> + bwlimit =3D> { >> + description =3D> "Override I/O bandwidth limit (in KiB/s).", >> + optional =3D> 1, >> + type =3D> 'integer', >> + minimum =3D> '0', >> + default =3D> 'migrate limit from datacenter or storage config', >> + }, >> + }, >> + }, >> + returns =3D> { >> + type =3D> 'string', >> + description =3D> "the task ID.", >> + }, >> + code =3D> sub { >> + my ($param) =3D @_; >> + >> + my $rpcenv =3D PVE::RPCEnvironment::get(); >> + my $authuser =3D $rpcenv->get_user(); >> + >> + my $source_vmid =3D extract_param($param, 'vmid'); >> + my $target_endpoint =3D extract_param($param, 'target-endpoint'); >> + my $target_node =3D extract_param($param, 'target-node'); >> + my $target_vmid =3D extract_param($param, 'target-vmid') // $source_vm= id; >> + >> + my $localnode =3D PVE::INotify::nodename(); >=20 > Nit: not used (and could've been $param->{node}). >=20 >> + my $network =3D extract_param($param, 'migration-network'); >> + my $delete =3D extract_param($param, 'delete') // 0; >> + >> + PVE::Cluster::check_cfs_quorum(); >> + >> + raise_param_exc({ 'migration-network' =3D> "Only root may use this opt= ion." }) >> + if $network && $authuser ne 'root@pam'; >=20 > I might be missing something obvious, but where is the migration network=20 > actually used down the line for the remote migration? >=20 ha - no. this is leftover from the previous version, where we had a=20 remote config file specifying endpoints, and those might be reachable=20 over another network that could be specified here. since we now specify=20 the API endpoint info directly, that network selection can simply happen=20 with that (a client can connect however, query for fingerprint and=20 address, then call this endpoint with the already correct info). we might need to re-introduce it if we ever want to support 'insecure'=20 migration over websocket tunnels, since the insecure migration might=20 than be over another network than the API traffic/control tunnel. but=20 for now, I'll drop it in v2! >> + >> + # test if VM exists >> + my $conf =3D PVE::QemuConfig->load_config($source_vmid); >> + >> + PVE::QemuConfig->check_lock($conf); >> + >> + raise_param_exc({ vmid =3D> "cannot migrate HA-manage VM to remote clu= ster" }) >=20 > s/manage/managed/ >=20 >> + if PVE::HA::Config::vm_is_ha_managed($source_vmid); >> + >> + my $remote =3D PVE::JSONSchema::parse_property_string('proxmox-remote'= , $target_endpoint); >> + >> + # TODO: move this as helper somewhere appropriate? >> + my $conn_args =3D { >> + protocol =3D> 'https', >> + host =3D> $remote->{host}, >> + port =3D> $remote->{port} // 8006, >> + apitoken =3D> $remote->{apitoken}, >> + }; >> + >> + my $fp; >> + if ($fp =3D $remote->{fingerprint}) { >> + $conn_args->{cached_fingerprints} =3D { uc($fp) =3D> 1 }; >> + } >> + >> + print "Establishing API connection with remote at '$remote->{host}'\n"= ; >> + >> + my $api_client =3D PVE::APIClient::LWP->new(%$conn_args); >> + my $version =3D $api_client->get("/version"); >> + print "remote: version '$version->{version}\n"; >> + >> + if (!defined($fp)) { >> + my $cert_info =3D $api_client->get("/nodes/$target_node/certificat= es/info"); >> + foreach my $cert (@$cert_info) { >> + $fp =3D $cert->{fingerprint} if $cert->{filename} ne 'pve-root-ca.pem= '; >> + last if $cert->{filename} eq 'pveproxy-ssl.pem'; >=20 > Not future-proof if the API call is ever extended to return an=20 > additional certificate which is not a valid fall-back here. switched it to only look at pveproxy-ssl.pem and pve-ssl.pem >=20 >> + } >> + $conn_args->{cached_fingerprints} =3D { uc($fp) =3D> 1 } >> + if defined($fp); >> + } >> + >> + if (PVE::QemuServer::check_running($source_vmid)) { >> + die "can't migrate running VM without --online\n" if !$param->{onl= ine}; >> + >> + my $repl_conf =3D PVE::ReplicationConfig->new(); >> + my $is_replicated =3D $repl_conf->check_for_existing_jobs($source_= vmid, 1); >> + die "cannot remote-migrate replicated VM\n" if $is_replicated; >> + } else { >> + warn "VM isn't running. Doing offline migration instead.\n" if $pa= ram->{online}; >> + $param->{online} =3D 0; >> + } >> + >> + # FIXME: fork worker hear to avoid timeout? or poll these periodically >> + # in pvestatd and access cached info here? all of the below is actuall= y >> + # checked at the remote end anyway once we call the mtunnel endpoint, >> + # we could also punt it to the client and not do it here at all.. >> + my $resources =3D $api_client->get("/cluster/resources"); >> + if (grep { defined($_->{vmid}) && $_->{vmid} eq $target_vmid } @$resou= rces) { >> + raise_param_exc({ target_vmid =3D> "Guest with ID '$target_vmid' a= lready exists on remote cluster" }); >> + } >> + >> + my $storages =3D [ grep { $_->{type} eq 'storage' && $_->{node} eq $ta= rget_node } @$resources ]; >> + my $storecfg =3D PVE::Storage::config(); >> + my $target_storage =3D extract_param($param, 'target-storage'); >> + my $storagemap =3D eval { PVE::JSONSchema::parse_idmap($target_storage= , 'pve-storage-id') }; >> + raise_param_exc({ 'target-storage' =3D> "failed to parse storage map: = $@" }) >> + if $@; >> + >> + my $target_bridge =3D extract_param($param, 'target-bridge'); >> + my $bridgemap =3D eval { PVE::JSONSchema::parse_idmap($target_bridge, = 'pve-bridge-id') }; >> + raise_param_exc({ 'target-bridge' =3D> "failed to parse bridge map: $@= " }) >> + if $@; >> + >> + my $check_remote_storage =3D sub { >> + my ($storage) =3D @_; >> + my $found =3D [ grep { $_->{storage} eq $storage } @$storages ]; >> + die "remote: storage '$storage' does not exist!\n" >> + if !@$found; >> + >> + $found =3D @$found[0]; >> + >> + my $content_types =3D [ PVE::Tools::split_list($found->{content}) = ]; >> + die "remote: storage '$storage' cannot store images\n" >> + if !grep { $_ eq 'images' } @$content_types; >> + }; >> + >> + foreach my $target_sid (values %{$storagemap->{entries}}) { >> + $check_remote_storage->($target_sid); >> + } >> + >> + $check_remote_storage->($storagemap->{default}) >> + if $storagemap->{default}; >> + >> + # TODO: or check all referenced storages? >> + die "remote migration requires explicit storage mapping!\n" >> + if $storagemap->{identity}; >> + >> + $param->{storagemap} =3D $storagemap; >> + $param->{bridgemap} =3D $bridgemap; >> + $param->{remote} =3D { >> + conn =3D> $conn_args, # re-use fingerprint for tunnel >> + client =3D> $api_client, >> + vmid =3D> $target_vmid, >> + }; >> + $param->{migration_type} =3D 'websocket'; >> + $param->{migration_network} =3D $network if $network; >> + $param->{delete} =3D $delete if $delete; >> + >> + my $realcmd =3D sub { >> + PVE::QemuMigrate->migrate($target_node, $remote->{host}, $source_v= mid, $param); >> + }; >> + >> + my $worker =3D sub { >> + return PVE::GuestHelpers::guest_migration_lock($source_vmid, 10, $= realcmd); >> + }; >> + >> + return $rpcenv->fork_worker('qmigrate', $source_vmid, $authuser, $work= er); >> + }}); >> + >> __PACKAGE__->register_method({ >> name =3D> 'monitor', >> path =3D> '{vmid}/monitor', >> diff --git a/debian/control b/debian/control >> index 8032ae5..33e3916 100644 >> --- a/debian/control >> +++ b/debian/control >> @@ -6,6 +6,7 @@ Build-Depends: debhelper (>=3D 12~), >> libglib2.0-dev, >> libio-multiplex-perl, >> libjson-c-dev, >> + libpve-apiclient-perl, >> libpve-cluster-perl, >> libpve-common-perl (>=3D 6.3-3), >> libpve-guest-common-perl (>=3D 3.1-3), >> @@ -34,6 +35,7 @@ Depends: dbus, >> libjson-xs-perl, >> libnet-ssleay-perl, >> libpve-access-control (>=3D 5.0-7), >> + libpve-apiclient-perl, >> libpve-cluster-perl, >> libpve-common-perl (>=3D 7.0-3), >> libpve-guest-common-perl (>=3D 3.1-3), >>=20 >=20