From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <d.csapak@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id 0F527617A9
 for <pbs-devel@lists.proxmox.com>; Thu, 17 Dec 2020 15:50:20 +0100 (CET)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id 073D2284AA
 for <pbs-devel@lists.proxmox.com>; Thu, 17 Dec 2020 15:50:20 +0100 (CET)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [212.186.127.180])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS id 6CD70284A2
 for <pbs-devel@lists.proxmox.com>; Thu, 17 Dec 2020 15:50:19 +0100 (CET)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 31BAC45250
 for <pbs-devel@lists.proxmox.com>; Thu, 17 Dec 2020 15:50:19 +0100 (CET)
From: Dominik Csapak <d.csapak@proxmox.com>
To: pbs-devel@lists.proxmox.com
Date: Thu, 17 Dec 2020 15:50:18 +0100
Message-Id: <20201217145018.2902-1-d.csapak@proxmox.com>
X-Mailer: git-send-email 2.20.1
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.281 Adjusted score from AWL reputation of From: address
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 RCVD_IN_DNSWL_MED        -2.3 Sender listed at https://www.dnswl.org/,
 medium trust
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See
 http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more
 information. [daemon.rs]
Subject: [pbs-devel] [PATCH proxmox-backup v2] tools/daemon: improve reload
 behaviour
X-BeenThere: pbs-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Thu, 17 Dec 2020 14:50:20 -0000

it seems that sometimes, the child process signal gets handled
before the parent process signal. Systemd then ignores the
childs signal (finished reloading) and only after going into
reloading state because of the parent. this will never finish.

Instead, wait for the state to change to 'reloading' after sending
that signal in the parent, an only fork afterwards. This way
we ensure that systemd knows about the reloading before actually trying
to do it.

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
changes from v1:
* introduce wait_service_is_(not_)state
    it is a bit more generic
    has a better name
* factor the common code out into get_service_state

 src/tools/daemon.rs | 45 ++++++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/src/tools/daemon.rs b/src/tools/daemon.rs
index 6bb4a41b..0e3a174a 100644
--- a/src/tools/daemon.rs
+++ b/src/tools/daemon.rs
@@ -291,6 +291,7 @@ where
         if let Err(e) = systemd_notify(SystemdNotify::Reloading) {
             log::error!("failed to notify systemd about the state change: {}", e);
         }
+        wait_service_is_state(service_name, "reloading").await?;
         if let Err(e) = reloader.take().unwrap().fork_restart() {
             log::error!("error during reload: {}", e);
             let _ = systemd_notify(SystemdNotify::Status("error during reload".to_string()));
@@ -305,7 +306,7 @@ where
 
     // FIXME: this is a hack, replace with sd_notify_barrier when available
     if server::is_reload_request() {
-        wait_service_is_active(service_name).await?;
+        wait_service_is_not_state(service_name, "reloading").await?;
     }
 
     log::info!("daemon shut down...");
@@ -313,26 +314,36 @@ where
 }
 
 // hack, do not use if unsure!
-async fn wait_service_is_active(service: &str) -> Result<(), Error> {
+async fn get_service_state(service: &str) -> Result<String, Error> {
+    let text = match tokio::process::Command::new("systemctl")
+        .args(&["is-active", service])
+        .output()
+        .await
+    {
+        Ok(output) => match String::from_utf8(output.stdout) {
+            Ok(text) => text,
+            Err(err) => bail!("output of 'systemctl is-active' not valid UTF-8 - {}", err),
+        },
+        Err(err) => bail!("executing 'systemctl is-active' failed - {}", err),
+    };
+
+    Ok(text.trim().trim_start().to_string())
+}
+
+async fn wait_service_is_state(service: &str, state: &str) -> Result<(), Error> {
     tokio::time::delay_for(std::time::Duration::new(1, 0)).await;
-    loop {
-        let text = match tokio::process::Command::new("systemctl")
-            .args(&["is-active", service])
-            .output()
-            .await
-        {
-            Ok(output) => match String::from_utf8(output.stdout) {
-                Ok(text) => text,
-                Err(err) => bail!("output of 'systemctl is-active' not valid UTF-8 - {}", err),
-            },
-            Err(err) => bail!("executing 'systemctl is-active' failed - {}", err),
-        };
+    while get_service_state(service).await? != state {
+        tokio::time::delay_for(std::time::Duration::new(5, 0)).await;
+    }
+    Ok(())
+}
 
-        if text.trim().trim_start() != "reloading" {
-            return Ok(());
-        }
+async fn wait_service_is_not_state(service: &str, state: &str) -> Result<(), Error> {
+    tokio::time::delay_for(std::time::Duration::new(1, 0)).await;
+    while get_service_state(service).await? == state {
         tokio::time::delay_for(std::time::Duration::new(5, 0)).await;
     }
+    Ok(())
 }
 
 #[link(name = "systemd")]
-- 
2.20.1