From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 94E641FF16B for ; Thu, 6 Mar 2025 12:02:40 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 042C5335E; Thu, 6 Mar 2025 12:02:36 +0100 (CET) Date: Thu, 06 Mar 2025 12:02:26 +0100 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Proxmox Backup Server development discussion References: <20250305151453.388817-1-c.ebner@proxmox.com> <20250305151453.388817-4-c.ebner@proxmox.com> In-Reply-To: <20250305151453.388817-4-c.ebner@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.16.0 (https://github.com/astroidmail/astroid) Message-Id: <1741255167.q6i3s0tuox.astroid@yuna.none> X-SPAM-LEVEL: Spam detection results: 0 AWL 0.045 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH v4 proxmox-backup 3/8] fix #5982: garbage collection: check atime updates are honored X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" On March 5, 2025 4:14 pm, Christian Ebner wrote: > Check if the filesystem backing the chunk store actually updates the > atime to avoid potential data loss in phase 2 of garbage collection, > in case the atime update is not honored. > > Perform the check before phase 1 of garbage collection, as well as > on datastore creation. The latter to early detect and disallow > datastore creation on filesystem configurations which otherwise most > likely would lead to data losses. > > Enable the atime update check by default, but allow to opt-out by > setting a datastore tuning parameter flag for backwards compatibility. > This is honored by both, garbage collection and datastore creation. > > The check uses a 4 MiB fixed sized, unencypted and compressed chunk > as test marker, inserted if not present. This all zero-chunk is very > likely anyways for unencrypted backup contents with large all-zero > regions using fixed size chunking (e.g. VMs). > > To avoid cases were the timestamp will not be updated because of the > Linux kernels timestamp granularity, sleep in-between stating and > utimensat for 1 second. > > Fixes: https://bugzilla.proxmox.com/show_bug.cgi?id=5982 > Signed-off-by: Christian Ebner > --- > changes since version 3: > - Drop check for relatime like behaviour as all tested filesystem (see > coverletter) honor the direct atime update. > - Reduce risk of races by moving the sleep to be before the first > metadata call. > - Log also if the test chunk was newly inserted or pre-existed. > > pbs-datastore/src/chunk_store.rs | 87 ++++++++++++++++++++++++++++++-- > pbs-datastore/src/datastore.rs | 9 ++++ > src/api2/config/datastore.rs | 1 + > 3 files changed, 92 insertions(+), 5 deletions(-) > > diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs > index 5e02909a1..e482330b3 100644 > --- a/pbs-datastore/src/chunk_store.rs > +++ b/pbs-datastore/src/chunk_store.rs > @@ -1,6 +1,8 @@ > +use std::os::unix::fs::MetadataExt; > use std::os::unix::io::AsRawFd; > use std::path::{Path, PathBuf}; > use std::sync::{Arc, Mutex}; > +use std::time::Duration; > > use anyhow::{bail, format_err, Error}; > use tracing::info; > @@ -13,6 +15,7 @@ use proxmox_sys::process_locker::{ > }; > use proxmox_worker_task::WorkerTaskContext; > > +use crate::data_blob::DataChunkBuilder; > use crate::file_formats::{ > COMPRESSED_BLOB_MAGIC_1_0, ENCRYPTED_BLOB_MAGIC_1_0, UNCOMPRESSED_BLOB_MAGIC_1_0, > }; > @@ -93,6 +96,7 @@ impl ChunkStore { > uid: nix::unistd::Uid, > gid: nix::unistd::Gid, > sync_level: DatastoreFSyncLevel, > + atime_safety_check: bool, > ) -> Result > where > P: Into, > @@ -147,7 +151,14 @@ impl ChunkStore { > } > } > > - Self::open(name, base, sync_level) > + let chunk_store = Self::open(name, base, sync_level)?; > + if atime_safety_check { > + chunk_store.check_fs_atime_updates(true)?; > + } else { > + info!("atime safety check skipped."); > + } > + > + Ok(chunk_store) > } > > fn lockfile_path>(base: P) -> PathBuf { > @@ -442,6 +453,59 @@ impl ChunkStore { > Ok(()) > } > > + /// Check if atime updates are honored by the filesystem backing the chunk store. > + /// > + /// Checks if the atime is always updated by utimensat taking into consideration the Linux > + /// kernel timestamp granularity. > + /// If `retry_on_file_changed` is set to true, the check is performed again on the changed file > + /// if a file change while testing is detected by differences in bith time or inode number. > + /// Uses a 4 MiB fixed size, compressed but unencrypted chunk to test. The chunk is inserted in > + /// the chunk store if not yet present. > + /// Returns with error if the check could not be performed. > + pub fn check_fs_atime_updates(&self, retry_on_file_changed: bool) -> Result<(), Error> { > + let (zero_chunk, digest) = DataChunkBuilder::build_zero_chunk(None, 4096 * 1024, true)?; > + let (pre_existing, _) = self.insert_chunk(&zero_chunk, &digest)?; this one logs the path if something goes wrong, which is nice in case the problem is related to permissions/.. > + let (path, _digest) = self.chunk_path(&digest); > + > + // Take into account timestamp update granularity in the kernel > + std::thread::sleep(Duration::from_secs(1)); > + > + let metadata_before = std::fs::metadata(&path).map_err(Error::from)?; but this one probably wouldn't, and would benefit from it? > + > + // Second atime update if chunk was newly inserted above nit: comment is inverted - it's the second atime *update* if the chunk already existed. it's the first *update* if we inserted it ;) > + self.cond_touch_path(&path, true)?; this one includes the path in errors :) > + > + let metadata_now = std::fs::metadata(&path).map_err(Error::from)?; this one might benefit from some context as well? > + > + // Check for the unlikely case that the file changed in-between the > + // two metadata calls, try to check once again on changed file > + if metadata_before.ino() != metadata_now.ino() { > + if retry_on_file_changed { > + return self.check_fs_atime_updates(false); > + } > + bail!("chunk file changed twice during atime testing, cannot proceed."); adding the path or digest here might help as well > + } > + > + match ( > + metadata_before.accessed()? < metadata_now.accessed()?, error context as well ;) > + pre_existing, > + ) { > + (true, false) => info!("Access time safety check successful (inserted chunk for test)."), > + (true, true) => { > + info!("Access time safety check successful (used pre-existing chunk for test).") > + } > + (false, false) => bail!( > + "Access time safety check failed (inserted chunk for test), is atime support \ > + enabled on datastore backing filesystem?" > + ), > + (false, true) => bail!( > + "Access time safety check failed (used pre-existing chunk for test), is atime \ > + support enabled on datastore backing filesystem?" > + ), > + } this is a bit excessive while at the same time not including the interesting bits.. how about the following: if metadata_before.accessed()? >= metadata_now.accessed()? { let chunk_info_str = if pre_existing { "pre-existing" } else { "newly inserted" }; log::warn!("Chunk metadata was not correctly updated during atime test:"); info!("Metadata before update: {metadata_before:?}"); info!("Metadata after update: {metadata_now:?}"); bail!("atime safety check using {chunk_info_str} chunk failed, aborting GC!"); } info!("atime safety check successful, proceeding with GC."); would look like this in the task log: Chunk metadata was not correctly updated during atime test: Metadata before update: Metadata { file_type: FileType { is_file: true, is_dir: false, is_symlink: false, .. }, permissions: Permissions(FilePermissions { mode: 0o100500 (-r-x------) }), len: 158, modified: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144 }, accessed: SystemTime { tv_sec: 1741256689, tv_nsec: 18294604 }, created: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144 }, .. } Metadata after update: Metadata { file_type: FileType { is_file: true, is_dir: false, is_symlink: false, .. }, permissions: Permissions(FilePermissions { mode: 0o100500 (-r-x------) }), len: 158, modified: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144 }, accessed: SystemTime { tv_sec: 1741256689, tv_nsec: 18294604 }, created: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144 }, .. } TASK ERROR: atime safety check using pre-existing chunk failed, aborting GC! alternatively we could of course do our own pretty printing, or add `#` to the format string to get: Metadata before update: Metadata { file_type: FileType { is_file: true, is_dir: false, is_symlink: false, .. }, permissions: Permissions( FilePermissions { mode: 0o100500 (-r-x------), }, ), len: 158, modified: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144, }, accessed: SystemTime { tv_sec: 1741256877, tv_nsec: 225020600, }, created: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144, }, .. } I think it is important to get the logging right here because if somebody runs into a failing check, they should report the relevant data to us without the need to jump through hoops. > + Ok(()) > + } > + > pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> { > // unwrap: only `None` in unit tests > assert!(self.locker.is_some()); > @@ -628,8 +692,15 @@ fn test_chunk_store1() { > let user = nix::unistd::User::from_uid(nix::unistd::Uid::current()) > .unwrap() > .unwrap(); > - let chunk_store = > - ChunkStore::create("test", &path, user.uid, user.gid, DatastoreFSyncLevel::None).unwrap(); > + let chunk_store = ChunkStore::create( > + "test", > + &path, > + user.uid, > + user.gid, > + DatastoreFSyncLevel::None, > + true, > + ) > + .unwrap(); > > let (chunk, digest) = crate::data_blob::DataChunkBuilder::new(&[0u8, 1u8]) > .build() > @@ -641,8 +712,14 @@ fn test_chunk_store1() { > let (exists, _) = chunk_store.insert_chunk(&chunk, &digest).unwrap(); > assert!(exists); > > - let chunk_store = > - ChunkStore::create("test", &path, user.uid, user.gid, DatastoreFSyncLevel::None); > + let chunk_store = ChunkStore::create( > + "test", > + &path, > + user.uid, > + user.gid, > + DatastoreFSyncLevel::None, > + true, > + ); > assert!(chunk_store.is_err()); > > if let Err(_e) = std::fs::remove_dir_all(".testdir") { /* ignore */ } > diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs > index 75c0c16ab..b62ddc172 100644 > --- a/pbs-datastore/src/datastore.rs > +++ b/pbs-datastore/src/datastore.rs > @@ -1170,6 +1170,15 @@ impl DataStore { > upid: Some(upid.to_string()), > ..Default::default() > }; > + let tuning: DatastoreTuning = serde_json::from_value( > + DatastoreTuning::API_SCHEMA > + .parse_property_string(gc_store_config.tuning.as_deref().unwrap_or(""))?, > + )?; > + if tuning.gc_atime_safety_check.unwrap_or(true) { > + self.inner.chunk_store.check_fs_atime_updates(true)?; > + } else { > + info!("Filesystem atime safety check disabled by datastore tuning options."); > + } > > info!("Start GC phase1 (mark used chunks)"); > > diff --git a/src/api2/config/datastore.rs b/src/api2/config/datastore.rs > index fe3260f6d..35847fc45 100644 > --- a/src/api2/config/datastore.rs > +++ b/src/api2/config/datastore.rs > @@ -119,6 +119,7 @@ pub(crate) fn do_create_datastore( > backup_user.uid, > backup_user.gid, > tuning.sync_level.unwrap_or_default(), > + tuning.gc_atime_safety_check.unwrap_or(true), > ) > .map(|_| ()) > } else { > -- > 2.39.5 > > > > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel > > > _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel