From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id B7B6B67029 for ; Wed, 29 Jul 2020 13:50:28 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id B4599E561 for ; Wed, 29 Jul 2020 13:50:28 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 1B443E559 for ; Wed, 29 Jul 2020 13:50:28 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id D5441433CC for ; Wed, 29 Jul 2020 13:50:27 +0200 (CEST) From: Dominik Csapak To: pbs-devel@lists.proxmox.com Date: Wed, 29 Jul 2020 13:50:27 +0200 Message-Id: <20200729115027.23553-1-d.csapak@proxmox.com> X-Mailer: git-send-email 2.20.1 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.047 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_LAZY_DOMAIN_SECURITY 1 Sending domain does not have any anti-forgery methods NO_DNS_FOR_FROM 0.379 Envelope sender has no MX or A DNS records RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_NONE 0.001 SPF: sender does not publish an SPF Record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [node.rs] Subject: [pbs-devel] [PATCH proxmox-backup v2] api2/node/termproxy: fix zombies on worker abort X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 29 Jul 2020 11:50:28 -0000 tokios kill_on_drop sometimes leaves zombies around, especially when there is not another tokio::process::Command spawned after so instead of relying on the 'kill_on_drop' feature, we explicitly kill the child on a worker abort. to be able to do this we have to use 'tokio::select' instead of 'futures::select' since the latter requires the future to be fused, which consumes the child handle, leaving us no possibility to kill it after fusing. (tokio::select does not need the futures to be fused, so we can reuse the child future after the select again) Signed-off-by: Dominik Csapak --- changes from v1: * if the worker future is ok, bubble errors up from child kill/await if the worker future is err, log errors from kill/await and return the error from the abort src/api2/node.rs | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/src/api2/node.rs b/src/api2/node.rs index 9be5fe6d..5a3ea0ff 100644 --- a/src/api2/node.rs +++ b/src/api2/node.rs @@ -2,10 +2,7 @@ use std::net::TcpListener; use std::os::unix::io::AsRawFd; use anyhow::{bail, format_err, Error}; -use futures::{ - future::{FutureExt, TryFutureExt}, - select, -}; +use futures::future::{FutureExt, TryFutureExt}; use hyper::body::Body; use hyper::http::request::Parts; use hyper::upgrade::Upgraded; @@ -172,7 +169,6 @@ async fn termproxy( let mut cmd = tokio::process::Command::new("/usr/bin/termproxy"); cmd.args(&arguments) - .kill_on_drop(true) .stdout(std::process::Stdio::piped()) .stderr(std::process::Stdio::piped()); @@ -199,8 +195,9 @@ async fn termproxy( Ok::<(), Error>(()) }; - select!{ - res = child.fuse() => { + let mut needs_kill = false; + let res = tokio::select!{ + res = &mut child => { let exit_code = res?; if !exit_code.success() { match exit_code.code() { @@ -210,10 +207,29 @@ async fn termproxy( } Ok(()) }, - res = stdout_fut.fuse() => res, - res = stderr_fut.fuse() => res, - res = worker.abort_future().fuse() => res.map_err(Error::from), + res = stdout_fut => res, + res = stderr_fut => res, + res = worker.abort_future() => { + needs_kill = true; + res.map_err(Error::from) + } + }; + + if needs_kill { + if res.is_ok() { + child.kill()?; + child.await?; + return Ok(()); + } + + if let Err(err) = child.kill() { + worker.warn(format!("error killing termproxy: {}", err)); + } else if let Err(err) = child.await { + worker.warn(format!("error awaiting termproxy: {}", err)); + } } + + res }, )?; -- 2.20.1