From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id C18D0B795 for ; Thu, 7 Apr 2022 10:45:53 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id AD6632B33 for ; Thu, 7 Apr 2022 10:45:23 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id CCF242B2A for ; Thu, 7 Apr 2022 10:45:22 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 9E83340FD1 for ; Thu, 7 Apr 2022 10:45:22 +0200 (CEST) Date: Thu, 7 Apr 2022 10:45:21 +0200 From: Wolfgang Bumiller To: Stefan Sterz Cc: pbs-devel@lists.proxmox.com Message-ID: <20220407084521.xbk36joix4sg6nos@olga.proxmox.com> References: <20220406093955.3180508-1-s.sterz@proxmox.com> <20220406093955.3180508-2-s.sterz@proxmox.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20220406093955.3180508-2-s.sterz@proxmox.com> X-SPAM-LEVEL: Spam detection results: 0 AWL 0.343 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: Re: [pbs-devel] [PATCH proxmox-backup v1 1/3] fix #3935: datastore/api2/backup: move datastore locking to '/run' X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 07 Apr 2022 08:45:53 -0000 On Wed, Apr 06, 2022 at 11:39:53AM +0200, Stefan Sterz wrote: (...) > @@ -308,27 +308,28 @@ impl DataStore { > > if removed_all { > // no snapshots left, we can now safely remove the empty folder > - std::fs::remove_dir_all(&full_path) > - .map_err(|err| { > - format_err!( > - "removing backup group directory {:?} failed - {}", > - full_path, > - err, > - ) > - })?; > + std::fs::remove_dir_all(&full_path).map_err(|err| { > + format_err!( > + "removing backup group directory {:?} failed - {}", > + full_path, > + err, > + ) > + })?; The above looks like a pure indentation change, unless I'm missing something. If I'm right, please try to avoid this in an otherwise functionality-changing patch. > + > + if let Ok(path) = self.group_lock_path(backup_group) { The only possible failure here is if *creating* the intermediate paths fails. I'm not a friend of this, see my comment at the `_path()` functions below... > + let _ = std::fs::remove_file(path); > + } > } > > Ok(removed_all) > } > > /// Remove a backup directory including all content > - pub fn remove_backup_dir(&self, backup_dir: &BackupDir, force: bool) -> Result<(), Error> { > - > - let full_path = self.snapshot_path(backup_dir); > - > + pub fn remove_backup_dir(&self, backup_dir: &BackupDir, force: bool) -> Result<(), Error> { > let (_guard, _manifest_guard); > + > if !force { > - _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?; > + _guard = self.lock_snapshot(backup_dir)?; > _manifest_guard = self.lock_manifest(backup_dir)?; > } > > @@ -336,6 +337,8 @@ impl DataStore { > bail!("cannot remove protected snapshot"); > } > > + let full_path = self.snapshot_path(backup_dir); > + > log::info!("removing backup snapshot {:?}", full_path); > std::fs::remove_dir_all(&full_path) > .map_err(|err| { > @@ -352,6 +355,10 @@ impl DataStore { > let _ = std::fs::remove_file(path); > } > > + if let Ok(path) = self.snapshot_lock_path(backup_dir) { (same) > + let _ = std::fs::remove_file(path); > + } > + > Ok(()) > } > (...) > @@ -952,4 +957,72 @@ impl DataStore { > > Ok(chunk_list) > } > -} > + > + fn snapshot_lock_path(&self, backup_dir: &BackupDir) -> Result { > + let path = Path::new(DATASTORE_LOCKS_DIR).join(self.name()); > + std::fs::create_dir_all(&path)?; Given the name, this function is to query a path. Apart from the helpers which also create the lock files, the only other current user doesn't actually *need* the path created, since it only wants to remove it. I think we should leave the mkdir up to the `fn lock_helper` and drop the `Result` here. > + > + // hash relative path to get a fixed length unique file name > + let rpath_hash = > + openssl::sha::sha256(backup_dir.relative_path().to_str().unwrap().as_bytes()); Please use `.as_os_str().as_bytes()` via the `OsStrExt` trait. Never use `.to_str().unwrap()` on paths. > + Ok(path.join(hex::encode(rpath_hash))) > + } > + > + fn group_lock_path(&self, group: &BackupGroup) -> Result { Same for this function as for `snapshot_lock_path()` above. > + let path = Path::new(DATASTORE_LOCKS_DIR).join(self.name()); > + std::fs::create_dir_all(&path)?; > + > + // hash relative path to get a fixed length unique file name > + let rpath_hash = openssl::sha::sha256(group.group_path().to_str().unwrap().as_bytes()); > + Ok(path.join(hex::encode(rpath_hash))) > + } > + > + // this function helps implement the double stat'ing procedure. it > + // avoids certain race conditions upon lock deletion. > + fn lock_helper(&self, path: &std::path::Path, lock_fn: F) -> Result > + where > + F: Fn(&std::path::Path) -> Result, > + { This is where I'd put the `create_dir_all` part instead. Also, instead of a path based stat here: > + let stat = match nix::sys::stat::stat(path) { > + Ok(result) => result.st_ino, > + // lock hasn't been created yet, ignore check > + Err(e) if e.not_found() => return Ok(lock_fn(path)?), > + Err(e) => bail!("could not stat lock file before locking! - {}", e), > + }; > + > + let lock = lock_fn(path)?; I'd prefer an `fstat` on the file handle here so the path access happens only twice instead of 3 times. For that, you can `impl AsRawFd for BackupLockGuard` (which can just return -1 for when its inner option is `None`. (Which it shouldn't even have actually and I wonder if the `cfg(test)` in the tape inventory code (which is the reason for the Option apparently) could be changed further so that it doesn't actually use this particular type at all in tests, but that's a different issue.) > + > + if stat != nix::sys::stat::stat(path)?.st_ino { Please don't use '?' here because if the file is currently removed we also want the same nice error message. Something like `stat().map(|st| st.st_ino != stat).unwrap_or(true)` might work. > + bail!("could not acquire lock, another thread modified the lock file"); > + } > + > + Ok(lock) > + } > + > + /// Lock a snapshot exclusively > + pub fn lock_snapshot(&self, backup_dir: &BackupDir) -> Result { > + self.lock_helper(&self.snapshot_lock_path(backup_dir)?, |p| { > + open_backup_lockfile(p, Some(Duration::from_secs(0)), true).map_err(|err| { > + format_err!("unable to acquire snapshot lock {:?} - {}", &p, err) > + }) > + }) > + } > + > + /// Acquire a shared lock on a snapshot > + pub fn lock_snapshot_shared(&self, backup_dir: &BackupDir) -> Result { > + self.lock_helper(&self.snapshot_lock_path(backup_dir)?, |p| { > + open_backup_lockfile(p, Some(Duration::from_secs(0)), false).map_err(|err| { > + format_err!("unable to acquire shared snapshot lock {:?} - {}", &p, err) > + }) > + }) > + } > + > + /// Lock a group exclusively > + pub fn lock_group(&self, group: &BackupGroup) -> Result { > + self.lock_helper(&self.group_lock_path(group)?, |p| { > + open_backup_lockfile(p, Some(Duration::from_secs(0)), true).map_err(|err| { > + format_err!("unable to acquire backup group lock {:?} - {}", &p, err) > + }) > + }) > + } > +} > \ No newline at end of file