From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <c.heiss@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id 25C9C910FB
 for <pve-devel@lists.proxmox.com>; Tue, 13 Feb 2024 16:14:16 +0100 (CET)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id 9D16F38340
 for <pve-devel@lists.proxmox.com>; Tue, 13 Feb 2024 16:14:15 +0100 (CET)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS
 for <pve-devel@lists.proxmox.com>; Tue, 13 Feb 2024 16:14:14 +0100 (CET)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 3EADB47D0F
 for <pve-devel@lists.proxmox.com>; Tue, 13 Feb 2024 16:14:14 +0100 (CET)
From: Christoph Heiss <c.heiss@proxmox.com>
To: pve-devel@lists.proxmox.com
Date: Tue, 13 Feb 2024 16:13:59 +0100
Message-ID: <20240213151405.1282639-3-c.heiss@proxmox.com>
X-Mailer: git-send-email 2.43.0
In-Reply-To: <20240213151405.1282639-1-c.heiss@proxmox.com>
References: <20240213151405.1282639-1-c.heiss@proxmox.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.003 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 T_SCC_BODY_TEXT_LINE    -0.01 -
Subject: [pve-devel] [PATCH installer v2 2/6] sys: command: factor out
 kill() + waitpid() from run_command()
X-BeenThere: pve-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox VE development discussion <pve-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pve-devel/>
List-Post: <mailto:pve-devel@lists.proxmox.com>
List-Help: <mailto:pve-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Tue, 13 Feb 2024 15:14:16 -0000

This moves the kill() + waitpid() combo into a separate subroutine,
avoiding open-coding that sequence. wait_for_process() also handles
properly unkillable process (e.g. in D-state) and avoids completely
locking up the installer in such cases. See [0].

For the latter case, a timeout exists (with a default of 5 seconds) in
which to wait for the process to exit after sending an optional
TERM/KILL signal.

Also while at it, add a few basic tests for run_command().

[0] https://lists.proxmox.com/pipermail/pve-devel/2024-February/061697.html

Signed-off-by: Christoph Heiss <c.heiss@proxmox.com>
---
Changes since v1:
  * new patch

 Proxmox/Sys/Command.pm | 60 +++++++++++++++++++++++++++++++++++++-----
 test/Makefile          |  5 +++-
 test/run-command.pl    | 35 ++++++++++++++++++++++++
 3 files changed, 92 insertions(+), 8 deletions(-)
 create mode 100755 test/run-command.pl

diff --git a/Proxmox/Sys/Command.pm b/Proxmox/Sys/Command.pm
index c3e24b3..e64e0ee 100644
--- a/Proxmox/Sys/Command.pm
+++ b/Proxmox/Sys/Command.pm
@@ -33,12 +33,55 @@ my sub cmd2string {
     return join (' ', $quoted_args->@*);
 }

+# Safely for the (sub-)process specified by $pid to exit, using a timeout.
+#
+# When kill => 1 is set, at first a TERM-signal is sent to the process before
+# checking if it exited.
+# If that fails, KILL is sent to process and then up to timeout => $timeout
+# seconds (default: 5) are waited for the process to exit.
+#
+# On sucess, the exitcode of the process is returned, otherwise `undef` (aka.
+# the process was unkillable).
+my sub wait_for_process {
+    my ($pid, %params) = @_;
+
+    kill('TERM', $pid) if $params{kill};
+
+    my $terminated = waitpid($pid, WNOHANG);
+    return $? if $terminated > 0;
+
+    kill('KILL', $pid) if $params{kill};
+
+    my $timeout = $params{timeout} // 5;
+    for (1 .. $timeout) {
+	$terminated = waitpid($pid, WNOHANG);
+	return $? if $terminated > 0;
+	sleep(1);
+    }
+
+    log_warn("failed to kill child pid $pid, probably stuck in D-state?\n");
+
+    # We tried our best, better let the child hang in the back then completely
+    # blocking installer progress .. it's a rather short-lived environment anyway
+}
+
 sub syscmd {
     my ($cmd) = @_;

     return run_command($cmd, undef, undef, 1);
 }

+# Runs a command an a subprocess, properly handling IO via piping, cleaning up and passing back the
+# exit code.
+#
+# If $cmd contains a pipe |, the command will be executed inside a bash shell.
+# If $cmd contains 'chpasswd', the input will be specially quoted for that purpose.
+#
+# Arguments:
+# * $cmd - The command to run, either a single string or array with individual arguments
+# * $func - Logging subroutine to call, receives both stdout and stderr
+# * $input - Stdin contents for the spawned subprocess
+# * $noout - Whether to append any process output to the return value
 sub run_command {
     my ($cmd, $func, $input, $noout) = @_;

@@ -104,8 +147,7 @@ sub run_command {
 	    my $count = sysread ($h, $buf, 4096);
 	    if (!defined ($count)) {
 		my $err = $!;
-		kill (9, $pid);
-		waitpid ($pid, 0);
+		wait_for_process($pid, kill => 1);
 		die "command '$cmd' failed: $err";
 	    }
 	    $select->remove($h) if !$count;
@@ -128,15 +170,19 @@ sub run_command {

     &$func($logout) if $func;

-    my $rv = waitpid ($pid, 0);
+    my $ec = wait_for_process($pid);

-    return $? if $noout; # behave like standard system();
+    # behave like standard system(); returns -1 in case of errors too
+    return ($ec // -1) if $noout;

-    if ($? == -1) {
+    if (!defined($ec)) {
+	# Don't fail completely here to let the install continue
+	warn "command '$cmdstr' failed to exit properly\n";
+    } elsif ($ec == -1) {
 	croak "command '$cmdstr' failed to execute\n";
-    } elsif (my $sig = ($? & 127)) {
+    } elsif (my $sig = ($ec & 127)) {
 	croak "command '$cmdstr' failed - got signal $sig\n";
-    } elsif (my $exitcode = ($? >> 8)) {
+    } elsif (my $exitcode = ($ec >> 8)) {
 	croak "command '$cmdstr' failed with exit code $exitcode";
     }

diff --git a/test/Makefile b/test/Makefile
index fb80fc4..ae80a94 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -3,8 +3,11 @@ all:
 export PERLLIB=..

 .PHONY: check
-check: test-zfs-arc-max
+check: test-zfs-arc-max test-run-command

 .PHONY: test-zfs-arc-max
 test-zfs-arc-max:
 	./zfs-arc-max.pl
+
+test-run-command:
+	./run-command.pl
diff --git a/test/run-command.pl b/test/run-command.pl
new file mode 100755
index 0000000..7d5805e
--- /dev/null
+++ b/test/run-command.pl
@@ -0,0 +1,35 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use File::Temp;
+use Test::More;
+
+use Proxmox::Sys::Command qw(run_command CMD_FINISHED);
+use Proxmox::Sys::File qw(file_read_all);
+use Proxmox::UI;
+
+my $log_file = File::Temp->new();
+Proxmox::Log::init($log_file->filename);
+
+Proxmox::UI::init_stdio();
+
+is(run_command('echo test'), "test\n", 'basic usage');
+
+is(run_command('echo test', undef, undef, 1), 0, 'system()-mode');
+
+my $ret = run_command('bash -c "echo test; sleep 1000; echo test"', sub {
+    my $line = shift;
+    is($line, 'test', 'using CMD_FINISHED - produced correct log line');
+
+    return CMD_FINISHED;
+});
+is($ret, '', 'using CMD_FINISHED');
+
+# Check the log for errors/warnings
+my $log = file_read_all($log_file->filename);
+ok($log !~ m/(WARN|ERROR): /, 'no warnings or errors logged');
+print $log if $log =~ m/(WARN|ERROR): /;
+
+done_testing();
--
2.43.0