all lists on lists.proxmox.com
 help / color / mirror / Atom feed
* [pbs-devel] [PATCH proxmox-backup] rest-server/daemon: use sd_notify_barrier for service reloading
@ 2021-09-30  7:18 Dominik Csapak
  2021-09-30  8:19 ` Thomas Lamprecht
  2021-10-02  9:52 ` [pbs-devel] applied: " Thomas Lamprecht
  0 siblings, 2 replies; 3+ messages in thread
From: Dominik Csapak @ 2021-09-30  7:18 UTC (permalink / raw)
  To: pbs-devel

until now, we manually polled the systemd service state during a reload
so that the sd_notify messages get processed in the correct order
(RELOAD(old) -> MAINPID(old) -> READY(new))

with systemd >= 246 there is now 'sd_notify_barrier' which
blocks until systemd processed all prior messages

with that change, the daemon does not need to know the service name anymore

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
 debian/control                                |  2 +-
 .../examples/minimal-rest-server.rs           |  1 -
 proxmox-rest-server/src/daemon.rs             | 61 +++++++------------
 src/bin/proxmox-backup-api.rs                 |  1 -
 src/bin/proxmox-backup-proxy.rs               |  1 -
 5 files changed, 22 insertions(+), 44 deletions(-)

diff --git a/debian/control b/debian/control
index 02b094bd..8aec88cf 100644
--- a/debian/control
+++ b/debian/control
@@ -89,7 +89,7 @@ Build-Depends: debhelper (>= 12),
  librust-zstd-0.6+default-dev,
  libacl1-dev,
  libfuse3-dev,
- libsystemd-dev,
+ libsystemd-dev (>= 246-~~),
  uuid-dev,
  libsgutils2-dev,
  bash-completion,
diff --git a/proxmox-rest-server/examples/minimal-rest-server.rs b/proxmox-rest-server/examples/minimal-rest-server.rs
index 22477039..2b6a40a1 100644
--- a/proxmox-rest-server/examples/minimal-rest-server.rs
+++ b/proxmox-rest-server/examples/minimal-rest-server.rs
@@ -207,7 +207,6 @@ async fn run() -> Result<(), Error> {
                 Ok(())
             })
         },
-        "example_server",
     ).await?;
 
     Ok(())
diff --git a/proxmox-rest-server/src/daemon.rs b/proxmox-rest-server/src/daemon.rs
index 9d48ecd2..5d59fce2 100644
--- a/proxmox-rest-server/src/daemon.rs
+++ b/proxmox-rest-server/src/daemon.rs
@@ -186,6 +186,9 @@ impl Reloader {
                 if let Err(e) = systemd_notify(SystemdNotify::MainPid(child)) {
                     log::error!("failed to notify systemd about the new main pid: {}", e);
                 }
+                if let Err(e) = systemd_notify_barrier() {
+                    log::error!("failed to wait on systemd-processing: {}", e);
+                }
 
                 // notify child that it is now the new main process:
                 if let Err(e) = pold.write_all(&[1u8]) {
@@ -248,7 +251,6 @@ impl Reloadable for tokio::net::TcpListener {
 pub async fn create_daemon<F, S>(
     address: std::net::SocketAddr,
     create_service: F,
-    service_name: &str,
 ) -> Result<(), Error>
 where
     F: FnOnce(tokio::net::TcpListener) -> Result<S, Error>,
@@ -289,7 +291,10 @@ where
         if let Err(e) = systemd_notify(SystemdNotify::Reloading) {
             log::error!("failed to notify systemd about the state change: {}", e);
         }
-        wait_service_is_state(service_name, "reloading").await?;
+        if let Err(e) = systemd_notify_barrier() {
+            log::error!("failed to wait on systemd-processing: {}", e);
+        }
+
         if let Err(e) = reloader.take().unwrap().fork_restart() {
             log::error!("error during reload: {}", e);
             let _ = systemd_notify(SystemdNotify::Status("error during reload".to_string()));
@@ -302,51 +307,14 @@ where
         future.await;
     }
 
-    // FIXME: this is a hack, replace with sd_notify_barrier when available
-    if crate::is_reload_request() {
-        wait_service_is_not_state(service_name, "reloading").await?;
-    }
-
     log::info!("daemon shut down.");
     Ok(())
 }
 
-// hack, do not use if unsure!
-async fn get_service_state(service: &str) -> Result<String, Error> {
-    let text = match tokio::process::Command::new("systemctl")
-        .args(&["is-active", service])
-        .output()
-        .await
-    {
-        Ok(output) => match String::from_utf8(output.stdout) {
-            Ok(text) => text,
-            Err(err) => bail!("output of 'systemctl is-active' not valid UTF-8 - {}", err),
-        },
-        Err(err) => bail!("executing 'systemctl is-active' failed - {}", err),
-    };
-
-    Ok(text.trim().trim_start().to_string())
-}
-
-async fn wait_service_is_state(service: &str, state: &str) -> Result<(), Error> {
-    tokio::time::sleep(std::time::Duration::new(1, 0)).await;
-    while get_service_state(service).await? != state {
-        tokio::time::sleep(std::time::Duration::new(5, 0)).await;
-    }
-    Ok(())
-}
-
-async fn wait_service_is_not_state(service: &str, state: &str) -> Result<(), Error> {
-    tokio::time::sleep(std::time::Duration::new(1, 0)).await;
-    while get_service_state(service).await? == state {
-        tokio::time::sleep(std::time::Duration::new(5, 0)).await;
-    }
-    Ok(())
-}
-
 #[link(name = "systemd")]
 extern "C" {
     fn sd_notify(unset_environment: c_int, state: *const c_char) -> c_int;
+    fn sd_notify_barrier(unset_environment: c_int, timeout: u64) -> c_int;
 }
 
 /// Systemd sercice startup states (see: ``man sd_notify``)
@@ -358,6 +326,19 @@ pub enum SystemdNotify {
     MainPid(nix::unistd::Pid),
 }
 
+/// Waits until all previously sent messages with sd_notify are processed
+pub fn systemd_notify_barrier() -> Result<(), Error> {
+    let rc = unsafe { sd_notify_barrier(0, u64::MAX) }; // infinite timeout
+    if rc < 0 {
+        bail!(
+            "systemd_notify_barrier failed: {}",
+            std::io::Error::from_raw_os_error(-rc),
+        );
+    }
+
+    Ok(())
+}
+
 /// Tells systemd the startup state of the service (see: ``man sd_notify``)
 pub fn systemd_notify(state: SystemdNotify) -> Result<(), Error> {
 
diff --git a/src/bin/proxmox-backup-api.rs b/src/bin/proxmox-backup-api.rs
index 35cfc5f0..97b7a5e8 100644
--- a/src/bin/proxmox-backup-api.rs
+++ b/src/bin/proxmox-backup-api.rs
@@ -119,7 +119,6 @@ async fn run() -> Result<(), Error> {
                     .await
             })
         },
-        "proxmox-backup.service",
     );
 
     proxmox_rest_server::write_pid(pbs_buildcfg::PROXMOX_BACKUP_API_PID_FN)?;
diff --git a/src/bin/proxmox-backup-proxy.rs b/src/bin/proxmox-backup-proxy.rs
index a98d4c1f..a548b535 100644
--- a/src/bin/proxmox-backup-proxy.rs
+++ b/src/bin/proxmox-backup-proxy.rs
@@ -262,7 +262,6 @@ async fn run() -> Result<(), Error> {
                     .await
             })
         },
-        "proxmox-backup-proxy.service",
     );
 
     proxmox_rest_server::write_pid(pbs_buildcfg::PROXMOX_BACKUP_PROXY_PID_FN)?;
-- 
2.30.2





^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [pbs-devel] [PATCH proxmox-backup] rest-server/daemon: use sd_notify_barrier for service reloading
  2021-09-30  7:18 [pbs-devel] [PATCH proxmox-backup] rest-server/daemon: use sd_notify_barrier for service reloading Dominik Csapak
@ 2021-09-30  8:19 ` Thomas Lamprecht
  2021-10-02  9:52 ` [pbs-devel] applied: " Thomas Lamprecht
  1 sibling, 0 replies; 3+ messages in thread
From: Thomas Lamprecht @ 2021-09-30  8:19 UTC (permalink / raw)
  To: Proxmox Backup Server development discussion, Dominik Csapak

On 30.09.21 09:18, Dominik Csapak wrote:
> until now, we manually polled the systemd service state during a reload
> so that the sd_notify messages get processed in the correct order
> (RELOAD(old) -> MAINPID(old) -> READY(new))
> 
> with systemd >= 246 there is now 'sd_notify_barrier' which
> blocks until systemd processed all prior messages
> 
> with that change, the daemon does not need to know the service name anymore
> 

looks OK, much nicer, two nits inline (that can be followed up too)

> Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
> ---
>  debian/control                                |  2 +-
>  .../examples/minimal-rest-server.rs           |  1 -
>  proxmox-rest-server/src/daemon.rs             | 61 +++++++------------
>  src/bin/proxmox-backup-api.rs                 |  1 -
>  src/bin/proxmox-backup-proxy.rs               |  1 -
>  5 files changed, 22 insertions(+), 44 deletions(-)
> 
> diff --git a/debian/control b/debian/control
> index 02b094bd..8aec88cf 100644
> --- a/debian/control
> +++ b/debian/control
> @@ -89,7 +89,7 @@ Build-Depends: debhelper (>= 12),
>   librust-zstd-0.6+default-dev,
>   libacl1-dev,
>   libfuse3-dev,
> - libsystemd-dev,
> + libsystemd-dev (>= 246-~~),
>   uuid-dev,
>   libsgutils2-dev,
>   bash-completion,
> diff --git a/proxmox-rest-server/examples/minimal-rest-server.rs b/proxmox-rest-server/examples/minimal-rest-server.rs
> index 22477039..2b6a40a1 100644
> --- a/proxmox-rest-server/examples/minimal-rest-server.rs
> +++ b/proxmox-rest-server/examples/minimal-rest-server.rs
> @@ -207,7 +207,6 @@ async fn run() -> Result<(), Error> {
>                  Ok(())
>              })
>          },
> -        "example_server",
>      ).await?;
>  
>      Ok(())
> diff --git a/proxmox-rest-server/src/daemon.rs b/proxmox-rest-server/src/daemon.rs
> index 9d48ecd2..5d59fce2 100644
> --- a/proxmox-rest-server/src/daemon.rs
> +++ b/proxmox-rest-server/src/daemon.rs
> @@ -186,6 +186,9 @@ impl Reloader {
>                  if let Err(e) = systemd_notify(SystemdNotify::MainPid(child)) {
>                      log::error!("failed to notify systemd about the new main pid: {}", e);
>                  }
> +                if let Err(e) = systemd_notify_barrier() {

maybe add a comment regarding ordering here, e.g., something like
// ensure systemd got the message about the new main PID before continuing, else it gets confused

> +                    log::error!("failed to wait on systemd-processing: {}", e);
> +                }
>  
>                  // notify child that it is now the new main process:
>                  if let Err(e) = pold.write_all(&[1u8]) {
> @@ -248,7 +251,6 @@ impl Reloadable for tokio::net::TcpListener {
>  pub async fn create_daemon<F, S>(
>      address: std::net::SocketAddr,
>      create_service: F,
> -    service_name: &str,
>  ) -> Result<(), Error>
>  where
>      F: FnOnce(tokio::net::TcpListener) -> Result<S, Error>,
> @@ -289,7 +291,10 @@ where
>          if let Err(e) = systemd_notify(SystemdNotify::Reloading) {
>              log::error!("failed to notify systemd about the state change: {}", e);
>          }
> -        wait_service_is_state(service_name, "reloading").await?;
> +        if let Err(e) = systemd_notify_barrier() {
> +            log::error!("failed to wait on systemd-processing: {}", e);
> +        }
> +
>          if let Err(e) = reloader.take().unwrap().fork_restart() {
>              log::error!("error during reload: {}", e);
>              let _ = systemd_notify(SystemdNotify::Status("error during reload".to_string()));
> @@ -302,51 +307,14 @@ where
>          future.await;
>      }
>  
> -    // FIXME: this is a hack, replace with sd_notify_barrier when available
> -    if crate::is_reload_request() {
> -        wait_service_is_not_state(service_name, "reloading").await?;
> -    }
> -
>      log::info!("daemon shut down.");
>      Ok(())
>  }
>  
> -// hack, do not use if unsure!
> -async fn get_service_state(service: &str) -> Result<String, Error> {
> -    let text = match tokio::process::Command::new("systemctl")
> -        .args(&["is-active", service])
> -        .output()
> -        .await
> -    {
> -        Ok(output) => match String::from_utf8(output.stdout) {
> -            Ok(text) => text,
> -            Err(err) => bail!("output of 'systemctl is-active' not valid UTF-8 - {}", err),
> -        },
> -        Err(err) => bail!("executing 'systemctl is-active' failed - {}", err),
> -    };
> -
> -    Ok(text.trim().trim_start().to_string())
> -}
> -
> -async fn wait_service_is_state(service: &str, state: &str) -> Result<(), Error> {
> -    tokio::time::sleep(std::time::Duration::new(1, 0)).await;
> -    while get_service_state(service).await? != state {
> -        tokio::time::sleep(std::time::Duration::new(5, 0)).await;
> -    }
> -    Ok(())
> -}
> -
> -async fn wait_service_is_not_state(service: &str, state: &str) -> Result<(), Error> {
> -    tokio::time::sleep(std::time::Duration::new(1, 0)).await;
> -    while get_service_state(service).await? == state {
> -        tokio::time::sleep(std::time::Duration::new(5, 0)).await;
> -    }
> -    Ok(())
> -}
> -
>  #[link(name = "systemd")]
>  extern "C" {
>      fn sd_notify(unset_environment: c_int, state: *const c_char) -> c_int;
> +    fn sd_notify_barrier(unset_environment: c_int, timeout: u64) -> c_int;
>  }
>  
>  /// Systemd sercice startup states (see: ``man sd_notify``)
> @@ -358,6 +326,19 @@ pub enum SystemdNotify {
>      MainPid(nix::unistd::Pid),
>  }
>  
> +/// Waits until all previously sent messages with sd_notify are processed
> +pub fn systemd_notify_barrier() -> Result<(), Error> {
> +    let rc = unsafe { sd_notify_barrier(0, u64::MAX) }; // infinite timeout
> +    if rc < 0 {
> +        bail!(
> +            "systemd_notify_barrier failed: {}",
> +            std::io::Error::from_raw_os_error(-rc),
> +        );

single line for above would works out nicely for our <= 100 cc in rust here :)

> +    }
> +
> +    Ok(())
> +}
> +
>  /// Tells systemd the startup state of the service (see: ``man sd_notify``)
>  pub fn systemd_notify(state: SystemdNotify) -> Result<(), Error> {
>  
> diff --git a/src/bin/proxmox-backup-api.rs b/src/bin/proxmox-backup-api.rs
> index 35cfc5f0..97b7a5e8 100644
> --- a/src/bin/proxmox-backup-api.rs
> +++ b/src/bin/proxmox-backup-api.rs
> @@ -119,7 +119,6 @@ async fn run() -> Result<(), Error> {
>                      .await
>              })
>          },
> -        "proxmox-backup.service",
>      );
>  
>      proxmox_rest_server::write_pid(pbs_buildcfg::PROXMOX_BACKUP_API_PID_FN)?;
> diff --git a/src/bin/proxmox-backup-proxy.rs b/src/bin/proxmox-backup-proxy.rs
> index a98d4c1f..a548b535 100644
> --- a/src/bin/proxmox-backup-proxy.rs
> +++ b/src/bin/proxmox-backup-proxy.rs
> @@ -262,7 +262,6 @@ async fn run() -> Result<(), Error> {
>                      .await
>              })
>          },
> -        "proxmox-backup-proxy.service",
>      );
>  
>      proxmox_rest_server::write_pid(pbs_buildcfg::PROXMOX_BACKUP_PROXY_PID_FN)?;
> 





^ permalink raw reply	[flat|nested] 3+ messages in thread

* [pbs-devel] applied: [PATCH proxmox-backup] rest-server/daemon: use sd_notify_barrier for service reloading
  2021-09-30  7:18 [pbs-devel] [PATCH proxmox-backup] rest-server/daemon: use sd_notify_barrier for service reloading Dominik Csapak
  2021-09-30  8:19 ` Thomas Lamprecht
@ 2021-10-02  9:52 ` Thomas Lamprecht
  1 sibling, 0 replies; 3+ messages in thread
From: Thomas Lamprecht @ 2021-10-02  9:52 UTC (permalink / raw)
  To: Proxmox Backup Server development discussion, Dominik Csapak

On 30.09.21 09:18, Dominik Csapak wrote:
> until now, we manually polled the systemd service state during a reload
> so that the sd_notify messages get processed in the correct order
> (RELOAD(old) -> MAINPID(old) -> READY(new))
> 
> with systemd >= 246 there is now 'sd_notify_barrier' which
> blocks until systemd processed all prior messages
> 
> with that change, the daemon does not need to know the service name anymore
> 
> Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
> ---
>  debian/control                                |  2 +-
>  .../examples/minimal-rest-server.rs           |  1 -
>  proxmox-rest-server/src/daemon.rs             | 61 +++++++------------
>  src/bin/proxmox-backup-api.rs                 |  1 -
>  src/bin/proxmox-backup-proxy.rs               |  1 -
>  5 files changed, 22 insertions(+), 44 deletions(-)
> 

applied, thanks! made a few followups, see inline.

> @@ -248,7 +251,6 @@ impl Reloadable for tokio::net::TcpListener {
>  pub async fn create_daemon<F, S>(
>      address: std::net::SocketAddr,
>      create_service: F,
> -    service_name: &str,
>  ) -> Result<(), Error>
>  where
>      F: FnOnce(tokio::net::TcpListener) -> Result<S, Error>,
> @@ -289,7 +291,10 @@ where
>          if let Err(e) = systemd_notify(SystemdNotify::Reloading) {
>              log::error!("failed to notify systemd about the state change: {}", e);
>          }
> -        wait_service_is_state(service_name, "reloading").await?;

I added a comment here to actually give an answer to why-reason.

> +        if let Err(e) = systemd_notify_barrier() {
> +            log::error!("failed to wait on systemd-processing: {}", e);
> +        }
> +
>          if let Err(e) = reloader.take().unwrap().fork_restart() {
>              log::error!("error during reload: {}", e);
>              let _ = systemd_notify(SystemdNotify::Status("error during reload".to_string()));


>  #[link(name = "systemd")]
>  extern "C" {
>      fn sd_notify(unset_environment: c_int, state: *const c_char) -> c_int;
> +    fn sd_notify_barrier(unset_environment: c_int, timeout: u64) -> c_int;
>  }
>  
>  /// Systemd sercice startup states (see: ``man sd_notify``)
> @@ -358,6 +326,19 @@ pub enum SystemdNotify {
>      MainPid(nix::unistd::Pid),
>  }
>  
> +/// Waits until all previously sent messages with sd_notify are processed
> +pub fn systemd_notify_barrier() -> Result<(), Error> {
> +    let rc = unsafe { sd_notify_barrier(0, u64::MAX) }; // infinite timeout

I exposed the timeout to the caller, making it act more like the thin FFI wrapper
it is.

> +    if rc < 0 {
> +        bail!(
> +            "systemd_notify_barrier failed: {}",
> +            std::io::Error::from_raw_os_error(-rc),
> +        );
> +    }
> +
> +    Ok(())
> +}
> +

this was barging in between the SystemdNotify enum that is explicitly for, well
systemd_notify below, while not using that itself -> so I moved it out of the way.

>  /// Tells systemd the startup state of the service (see: ``man sd_notify``)
>  pub fn systemd_notify(state: SystemdNotify) -> Result<(), Error> {
>  





^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-10-02  9:54 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-30  7:18 [pbs-devel] [PATCH proxmox-backup] rest-server/daemon: use sd_notify_barrier for service reloading Dominik Csapak
2021-09-30  8:19 ` Thomas Lamprecht
2021-10-02  9:52 ` [pbs-devel] applied: " Thomas Lamprecht

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal