public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
* [pbs-devel] [PATCH proxmox v2] daemon: clean up middle process of double fork
@ 2024-12-03 11:23 Dominik Csapak
  2025-02-07 14:35 ` Wolfgang Bumiller
  0 siblings, 1 reply; 2+ messages in thread
From: Dominik Csapak @ 2024-12-03 11:23 UTC (permalink / raw)
  To: pbs-devel

so we don't leave around a zombie process when the old daemon still
needs to run, because of e.g. a running task.

Since this is mostly a cosmetic issue though, only try a clean up with a
10 second timeout, so we don't block forever. (It could happen that it
didn't exit at that point, but it's very unlikely.)

In case we do run into the timeout here, the process will not be
collected until the parent process exits and the middle process is
collected by pid 1.

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
changes from v1:
* use a timeout
* log the error

 proxmox-daemon/src/server.rs | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/proxmox-daemon/src/server.rs b/proxmox-daemon/src/server.rs
index efea9078..27153fc2 100644
--- a/proxmox-daemon/src/server.rs
+++ b/proxmox-daemon/src/server.rs
@@ -8,6 +8,7 @@ use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd};
 use std::panic::UnwindSafe;
 use std::path::PathBuf;
 use std::pin::{pin, Pin};
+use std::time::Duration;
 
 use anyhow::{bail, format_err, Error};
 use futures::future::{self, Either};
@@ -15,6 +16,7 @@ use nix::unistd::{fork, ForkResult};
 
 use proxmox_sys::fd::fd_change_cloexec;
 use proxmox_sys::fs::CreateOptions;
+use proxmox_sys::linux::timer;
 
 type BoxedStoreFunc = Box<dyn FnOnce() -> Result<String, Error> + UnwindSafe + Send>;
 
@@ -165,10 +167,12 @@ impl Reloader {
                 // No matter how we managed to get here, this is the time where we bail out quickly:
                 unsafe { libc::_exit(-1) }
             }
-            Ok(ForkResult::Parent { child }) => {
+            Ok(ForkResult::Parent {
+                child: middle_child,
+            }) => {
                 log::debug!(
                     "forked off a new server (first pid: {}), waiting for 2nd pid",
-                    child
+                    middle_child
                 );
                 std::mem::drop(pnew);
                 let mut pold = std::fs::File::from(pold);
@@ -211,6 +215,10 @@ impl Reloader {
                     log::error!("child vanished during reload: {}", e);
                 }
 
+                if let Err(e) = waitpid_with_timeout(middle_child, Duration::from_secs(10)) {
+                    log::error!("waitpid for middle process failed: {e}");
+                }
+
                 Ok(())
             }
             Err(e) => {
@@ -230,6 +238,27 @@ impl Reloader {
     }
 }
 
+fn waitpid_with_timeout(pid: nix::unistd::Pid, timeout: std::time::Duration) -> Result<(), Error> {
+    // unblock the timeout signal temporarily
+    let _sigblock_guard = timer::unblock_timeout_signal();
+
+    // setup a timeout timer
+    let mut timer = timer::Timer::create(
+        timer::Clock::Realtime,
+        timer::TimerEvent::ThisThreadSignal(timer::SIGTIMEOUT),
+    )?;
+
+    timer.arm(
+        timer::TimerSpec::new()
+            .value(Some(timeout))
+            .interval(Some(Duration::from_millis(10))),
+    )?;
+
+    nix::sys::wait::waitpid(pid, None)?;
+
+    Ok(())
+}
+
 fn fd_store_func(fd: RawFd) -> Result<BoxedStoreFunc, Error> {
     let fd = unsafe {
         OwnedFd::from_raw_fd(nix::fcntl::fcntl(
-- 
2.39.5



_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [pbs-devel] [PATCH proxmox v2] daemon: clean up middle process of double fork
  2024-12-03 11:23 [pbs-devel] [PATCH proxmox v2] daemon: clean up middle process of double fork Dominik Csapak
@ 2025-02-07 14:35 ` Wolfgang Bumiller
  0 siblings, 0 replies; 2+ messages in thread
From: Wolfgang Bumiller @ 2025-02-07 14:35 UTC (permalink / raw)
  To: Dominik Csapak; +Cc: pbs-devel

On Tue, Dec 03, 2024 at 12:23:49PM +0100, Dominik Csapak wrote:
> so we don't leave around a zombie process when the old daemon still
> needs to run, because of e.g. a running task.
> 
> Since this is mostly a cosmetic issue though, only try a clean up with a
> 10 second timeout, so we don't block forever. (It could happen that it
> didn't exit at that point, but it's very unlikely.)
> 
> In case we do run into the timeout here, the process will not be
> collected until the parent process exits and the middle process is
> collected by pid 1.
> 
> Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
> ---
> changes from v1:
> * use a timeout
> * log the error
> 
>  proxmox-daemon/src/server.rs | 33 +++++++++++++++++++++++++++++++--
>  1 file changed, 31 insertions(+), 2 deletions(-)
> 
> diff --git a/proxmox-daemon/src/server.rs b/proxmox-daemon/src/server.rs
> index efea9078..27153fc2 100644
> --- a/proxmox-daemon/src/server.rs
> +++ b/proxmox-daemon/src/server.rs
> @@ -8,6 +8,7 @@ use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd};
>  use std::panic::UnwindSafe;
>  use std::path::PathBuf;
>  use std::pin::{pin, Pin};
> +use std::time::Duration;
>  
>  use anyhow::{bail, format_err, Error};
>  use futures::future::{self, Either};
> @@ -15,6 +16,7 @@ use nix::unistd::{fork, ForkResult};
>  
>  use proxmox_sys::fd::fd_change_cloexec;
>  use proxmox_sys::fs::CreateOptions;
> +use proxmox_sys::linux::timer;
>  
>  type BoxedStoreFunc = Box<dyn FnOnce() -> Result<String, Error> + UnwindSafe + Send>;
>  
> @@ -165,10 +167,12 @@ impl Reloader {

The error case above here does a `log::error!()` - that is literally the
only thing which could possibly hang.
We could have a 2nd socketpair for stringified errors (or turn the
existing one into a datagram based  socket with a type prefix for
messages) and just use a blocking wait().

But if we really want to use a timeout, I'd rather add a pidfd wrapper,
use `pidfd_open()` (it's our fork, so since we don't `wait()` for it, we
should get the correct process, and we could - in the future - replace
`fork()` with an equivalent `clone()` returning a pidfd).
The pidfd can be used with poll/epoll (so it would even work for async
code via `AsyncFd`).

This is also a case where we could consider switching to a vfork() type
of fork. That might even play nice with whatever logger we might be
using, making it even less likely for anything to block...

>                  // No matter how we managed to get here, this is the time where we bail out quickly:
>                  unsafe { libc::_exit(-1) }
>              }
> -            Ok(ForkResult::Parent { child }) => {
> +            Ok(ForkResult::Parent {
> +                child: middle_child,
> +            }) => {
>                  log::debug!(
>                      "forked off a new server (first pid: {}), waiting for 2nd pid",
> -                    child
> +                    middle_child
>                  );
>                  std::mem::drop(pnew);
>                  let mut pold = std::fs::File::from(pold);
> @@ -211,6 +215,10 @@ impl Reloader {
>                      log::error!("child vanished during reload: {}", e);
>                  }
>  
> +                if let Err(e) = waitpid_with_timeout(middle_child, Duration::from_secs(10)) {
> +                    log::error!("waitpid for middle process failed: {e}");
> +                }
> +
>                  Ok(())
>              }
>              Err(e) => {
> @@ -230,6 +238,27 @@ impl Reloader {
>      }
>  }
>  
> +fn waitpid_with_timeout(pid: nix::unistd::Pid, timeout: std::time::Duration) -> Result<(), Error> {
> +    // unblock the timeout signal temporarily
> +    let _sigblock_guard = timer::unblock_timeout_signal();
> +
> +    // setup a timeout timer
> +    let mut timer = timer::Timer::create(
> +        timer::Clock::Realtime,
> +        timer::TimerEvent::ThisThreadSignal(timer::SIGTIMEOUT),
> +    )?;
> +
> +    timer.arm(
> +        timer::TimerSpec::new()
> +            .value(Some(timeout))
> +            .interval(Some(Duration::from_millis(10))),
> +    )?;
> +
> +    nix::sys::wait::waitpid(pid, None)?;
> +
> +    Ok(())
> +}
> +
>  fn fd_store_func(fd: RawFd) -> Result<BoxedStoreFunc, Error> {
>      let fd = unsafe {
>          OwnedFd::from_raw_fd(nix::fcntl::fcntl(
> -- 
> 2.39.5


_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-02-07 14:36 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-12-03 11:23 [pbs-devel] [PATCH proxmox v2] daemon: clean up middle process of double fork Dominik Csapak
2025-02-07 14:35 ` Wolfgang Bumiller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal