From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 1E21460484 for ; Wed, 7 Oct 2020 13:53:49 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 602662E306 for ; Wed, 7 Oct 2020 13:53:18 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 10EB12E280 for ; Wed, 7 Oct 2020 13:53:15 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id CF8F545C18 for ; Wed, 7 Oct 2020 13:53:14 +0200 (CEST) From: Stefan Reiter To: pbs-devel@lists.proxmox.com Date: Wed, 7 Oct 2020 13:53:08 +0200 Message-Id: <20201007115308.6275-8-s.reiter@proxmox.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201007115308.6275-1-s.reiter@proxmox.com> References: <20201007115308.6275-1-s.reiter@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.041 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [PATCH proxmox-backup 7/7] fuse_loop: handle unmap on crashed instance X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 07 Oct 2020 11:53:49 -0000 If a fuse_loop instance dies suddenly (e.g. SIGKILL), the FUSE mount and loop device assignment are left behind. We can determine this scenario on specific unmap, when the PID file is either missing or contains a PID of a non-running process, but the backing file and potentially loop device are still there. If that's the case, do an "emergency cleanup", by unassigning the loopdev, calling 'fusermount -u' and then cleaning any leftover files manually. With this in place, pretty much any situation is now recoverable via only the 'proxmox-backup-client' binary, by either calling 'unmap' with or without parameters. Signed-off-by: Stefan Reiter --- src/tools/fuse_loop.rs | 50 +++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/src/tools/fuse_loop.rs b/src/tools/fuse_loop.rs index 05d92525..ab733f27 100644 --- a/src/tools/fuse_loop.rs +++ b/src/tools/fuse_loop.rs @@ -236,7 +236,7 @@ pub fn cleanup_unused_run_files(filter_name: Option) { // clean leftover FUSE instances (e.g. user called 'losetup -d' or similar) // does nothing if files are already stagnant (e.g. instance crashed etc...) - if let Ok(_) = unmap_from_backing(&path) { + if let Ok(_) = unmap_from_backing(&path, None) { // we have reaped some leftover instance, tell the user eprintln!( "Cleaned up dangling mapping '{}': no loop device assigned", @@ -280,19 +280,53 @@ fn get_backing_file(loopdev: &str) -> Result { Ok(backing_file.to_owned()) } -fn unmap_from_backing(backing_file: &Path) -> Result<(), Error> { +// call in broken state: we found the mapping, but the client is already dead, +// only thing to do is clean up what we can +fn emerg_cleanup (loopdev: Option<&str>, mut backing_file: PathBuf) { + eprintln!( + "warning: found mapping with dead process ({:?}), attempting cleanup", + &backing_file + ); + + if let Some(loopdev) = loopdev { + let _ = loopdev::unassign(loopdev); + } + + // killing the backing process does not cancel the FUSE mount automatically + let mut command = std::process::Command::new("fusermount"); + command.arg("-u"); + command.arg(&backing_file); + let _ = crate::tools::run_command(command, None); + + let _ = remove_file(&backing_file); + backing_file.set_extension("pid"); + let _ = remove_file(&backing_file); +} + +fn unmap_from_backing(backing_file: &Path, loopdev: Option<&str>) -> Result<(), Error> { let mut pid_path = PathBuf::from(backing_file); pid_path.set_extension("pid"); - let pid_str = read_to_string(&pid_path).map_err(|err| - format_err!("error reading pidfile {:?}: {}", &pid_path, err))?; + let pid_str = read_to_string(&pid_path).map_err(|err| { + if err.kind() == std::io::ErrorKind::NotFound { + emerg_cleanup(loopdev, backing_file.to_owned()); + } + format_err!("error reading pidfile {:?}: {}", &pid_path, err) + })?; let pid = pid_str.parse::().map_err(|err| format_err!("malformed PID ({}) in pidfile - {}", pid_str, err))?; let pid = Pid::from_raw(pid); // send SIGINT to trigger cleanup and exit in target process - signal::kill(pid, Signal::SIGINT)?; + match signal::kill(pid, Signal::SIGINT) { + Ok(()) => {}, + Err(nix::Error::Sys(nix::errno::Errno::ESRCH)) => { + emerg_cleanup(loopdev, backing_file.to_owned()); + return Ok(()); + }, + Err(e) => return Err(e.into()), + } // block until unmap is complete or timeout let start = time::epoch_i64(); @@ -364,16 +398,16 @@ pub fn unmap_loopdev>(loopdev: S) -> Result<(), Error> { } let backing_file = get_backing_file(loopdev)?; - unmap_from_backing(Path::new(&backing_file)) + unmap_from_backing(Path::new(&backing_file), Some(loopdev)) } /// Try and unmap a running proxmox-backup-client instance from the given name pub fn unmap_name>(name: S) -> Result<(), Error> { - for (mapping, _) in find_all_mappings()? { + for (mapping, loopdev) in find_all_mappings()? { if mapping.ends_with(name.as_ref()) { let mut path = PathBuf::from(RUN_DIR); path.push(&mapping); - return unmap_from_backing(&path); + return unmap_from_backing(&path, loopdev.as_deref()); } } Err(format_err!("no mapping for name '{}' found", name.as_ref())) -- 2.20.1