From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <pve-devel-bounces@lists.proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
	by lore.proxmox.com (Postfix) with ESMTPS id BF6931FF15F
	for <inbox@lore.proxmox.com>; Mon, 26 Aug 2024 13:01:19 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
	by firstgate.proxmox.com (Proxmox) with ESMTP id D9BA611406;
	Mon, 26 Aug 2024 13:01:12 +0200 (CEST)
To: pve-devel@lists.proxmox.com
Date: Mon, 26 Aug 2024 13:00:21 +0200
In-Reply-To: <20240826110030.1744732-1-alexandre.derumier@groupe-cyllene.com>
References: <20240826110030.1744732-1-alexandre.derumier@groupe-cyllene.com>
MIME-Version: 1.0
Message-ID: <mailman.409.1724670071.302.pve-devel@lists.proxmox.com>
List-Id: Proxmox VE development discussion <pve-devel.lists.proxmox.com>
List-Post: <mailto:pve-devel@lists.proxmox.com>
From: Alexandre Derumier via pve-devel <pve-devel@lists.proxmox.com>
Precedence: list
Cc: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
X-Mailman-Version: 2.1.29
X-BeenThere: pve-devel@lists.proxmox.com
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=subscribe>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pve-devel/>
Reply-To: Proxmox VE development discussion <pve-devel@lists.proxmox.com>
List-Help: <mailto:pve-devel-request@lists.proxmox.com?subject=help>
Subject: [pve-devel] [PATCH pve-manager 1/1] pvestatd: lvmqcow2 : extend
 disk on io-error
Content-Type: multipart/mixed; boundary="===============2722703455525637922=="
Errors-To: pve-devel-bounces@lists.proxmox.com
Sender: "pve-devel" <pve-devel-bounces@lists.proxmox.com>

--===============2722703455525637922==
Content-Type: message/rfc822
Content-Disposition: inline

Return-Path: <root@formationkvm1.odiso.net>
X-Original-To: pve-devel@lists.proxmox.com
Delivered-To: pve-devel@lists.proxmox.com
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
	(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
	 key-exchange X25519 server-signature RSA-PSS (2048 bits))
	(No client certificate requested)
	by lists.proxmox.com (Postfix) with ESMTPS id 89F66C694F
	for <pve-devel@lists.proxmox.com>; Mon, 26 Aug 2024 13:01:11 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
	by firstgate.proxmox.com (Proxmox) with ESMTP id 2C66310C67
	for <pve-devel@lists.proxmox.com>; Mon, 26 Aug 2024 13:00:41 +0200 (CEST)
Received: from bastiontest.odiso.net (unknown [IPv6:2a0a:1580:2000:6700::14])
	(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
	 key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256)
	(No client certificate requested)
	by firstgate.proxmox.com (Proxmox) with ESMTPS
	for <pve-devel@lists.proxmox.com>; Mon, 26 Aug 2024 13:00:39 +0200 (CEST)
Received: from formationkvm1.odiso.net (unknown [10.11.201.57])
	by bastiontest.odiso.net (Postfix) with ESMTP id 371428267D3;
	Mon, 26 Aug 2024 13:00:33 +0200 (CEST)
Received: by formationkvm1.odiso.net (Postfix, from userid 0)
	id 013C6102037A; Mon, 26 Aug 2024 13:00:31 +0200 (CEST)
From: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
To: pve-devel@lists.proxmox.com
Subject: [PATCH pve-manager 1/1] pvestatd: lvmqcow2 : extend disk on io-error
Date: Mon, 26 Aug 2024 13:00:21 +0200
Message-Id: <20240826110030.1744732-4-alexandre.derumier@groupe-cyllene.com>
X-Mailer: git-send-email 2.39.2
In-Reply-To: <20240826110030.1744732-1-alexandre.derumier@groupe-cyllene.com>
References: <20240826110030.1744732-1-alexandre.derumier@groupe-cyllene.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
	AWL                    -0.007 Adjusted score from AWL reputation of From: address
	BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
	DMARC_NONE                0.1 DMARC none policy
	HEADER_FROM_DIFFERENT_DOMAINS  0.249 From and EnvelopeFrom 2nd level mail domains are different
	KAM_DMARC_NONE           0.25 DKIM has Failed or SPF has failed on the message and the domain has no DMARC policy
	KAM_DMARC_STATUS         0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
	KAM_LAZY_DOMAIN_SECURITY      1 Sending domain does not have any anti-forgery methods
	RDNS_NONE               0.793 Delivered to internal network by a host with no rDNS
	SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
	SPF_NONE                0.001 SPF: sender does not publish an SPF Record
	T_SCC_BODY_TEXT_LINE    -0.01 -

if the write are really too fast, and the auto extend from
qmp event is too slow, the vm could try to write to an qcow2 offset
highter than the lvm underlay.

In this case, the vm will be paused in "io-error" mode.

To fix it, try to extend drive and resume the vm.

Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
---
 PVE/Service/pvestatd.pm | 62 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/PVE/Service/pvestatd.pm b/PVE/Service/pvestatd.pm
index 8661f774..72244451 100755
--- a/PVE/Service/pvestatd.pm
+++ b/PVE/Service/pvestatd.pm
@@ -230,12 +230,74 @@ sub auto_balloning {
     }
 }
 
+sub auto_extend_vm_disk_on_error {
+    my ($vmstatus) =  @_;
+
+    my $storecfg = PVE::Storage::config();
+
+    foreach my $vmid (keys %$vmstatus) {
+	my $d = $vmstatus->{$vmid};
+	my $status = $d->{qmpstatus} || $d->{status} || 'stopped';
+	next if $status ne 'io-error';
+	my $resume = undef;
+
+	my $blockstats = PVE::QemuServer::mon_cmd($vmid, "query-blockstats");
+	$blockstats = { map { $_->{device} => $_ } $blockstats->@* };
+
+	my $conf = eval { PVE::QemuConfig->load_config($vmid) };
+	if (my $err = $@) {
+	    warn $err;
+	    next;
+	}
+
+	PVE::QemuConfig->foreach_volume($conf, sub {
+	    my ($ds, $drive) = @_;
+
+	    my $volid = $drive->{file};
+	    return if !$volid;
+
+	    my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
+	    return if !$sid;
+
+	    my $scfg = PVE::Storage::storage_config($storecfg, $sid);
+	    return if $scfg->{type} ne 'lvmqcow2';
+
+	    my $blockinfo = PVE::QemuServer::get_block_info($vmid, $ds, $blockstats);
+	    my $wr_highest_offset = $blockinfo->{wr_highest_offset};
+
+	    my $size = PVE::Storage::volume_size_info($storecfg, $volid, 5);
+
+	    #if offset is bigger than size, increase lvm size to highest offset + chunksize
+	    if ($wr_highest_offset >= $size) {
+		my $chunksize = $scfg->{chunksize} // 1024 * 1024 * 1024;
+		my $newsize = $wr_highest_offset + $chunksize;
+		syslog('info', "auto extend disk underlay storage of $blockinfo->{deviceid} to $newsize");
+		PVE::Storage::volume_resize($storecfg, $volid, $newsize, 1, 1);
+		my $threshold = compute_write_threshold($newsize);
+		qemu_block_set_write_threshold($vmid, $blockinfo->{blocknodeid}, $threshold);
+	    }
+	    #if offset is lower, than mean that size has already been increased async but not fast enough
+	    #we just need to resume
+	    $resume = 1;
+	});
+
+	if($resume) {
+	    syslog('info', "resume $vmid");
+	    eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
+	    warn $@ if $@;
+	}
+    }
+}
+
 sub update_qemu_status {
     my ($status_cfg) = @_;
 
     my $ctime = time();
     my $vmstatus = PVE::QemuServer::vmstatus(undef, 1);
 
+    eval { auto_extend_vm_disk_on_error($vmstatus); };
+    syslog('err', "auto extend disk error: $@") if $@;
+
     eval { auto_balloning($vmstatus); };
     syslog('err', "auto ballooning error: $@") if $@;
 
-- 
2.39.2



--===============2722703455525637922==
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Disposition: inline

_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

--===============2722703455525637922==--