From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <pbs-devel-bounces@lists.proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
	by lore.proxmox.com (Postfix) with ESMTPS id 3C8DE1FF16B
	for <inbox@lore.proxmox.com>; Thu, 20 Mar 2025 15:59:51 +0100 (CET)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
	by firstgate.proxmox.com (Proxmox) with ESMTP id 8D60E5C9D;
	Thu, 20 Mar 2025 15:59:50 +0100 (CET)
Message-ID: <ab6008e1-cc84-406f-a7ae-d20daad50baf@proxmox.com>
Date: Thu, 20 Mar 2025 15:59:45 +0100
MIME-Version: 1.0
User-Agent: Mozilla Thunderbird
To: pbs-devel@lists.proxmox.com
References: <20250320131711.312894-1-c.ebner@proxmox.com>
 <20250320131711.312894-6-c.ebner@proxmox.com>
Content-Language: en-US, de-DE
From: Christian Ebner <c.ebner@proxmox.com>
In-Reply-To: <20250320131711.312894-6-c.ebner@proxmox.com>
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.029 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to
 Validity was blocked. See
 https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more
 information.
 RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to
 Validity was blocked. See
 https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more
 information.
 RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to
 Validity was blocked. See
 https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more
 information.
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See
 http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more
 information. [datastore.rs, proxmox.com]
Subject: Re: [pbs-devel] [PATCH v7 proxmox-backup 05/10] fix #5982: garbage
 collection: check atime updates are honored
X-BeenThere: pbs-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe>
Reply-To: Proxmox Backup Server development discussion
 <pbs-devel@lists.proxmox.com>
Content-Transfer-Encoding: 7bit
Content-Type: text/plain; charset="us-ascii"; Format="flowed"
Errors-To: pbs-devel-bounces@lists.proxmox.com
Sender: "pbs-devel" <pbs-devel-bounces@lists.proxmox.com>

On 3/20/25 14:17, Christian Ebner wrote:
> Check if the filesystem backing the chunk store actually updates the
> atime to avoid potential data loss in phase 2 of garbage collection,
> in case the atime update is not honored.
> 
> Perform the check before phase 1 of garbage collection, as well as
> on datastore creation. The latter to early detect and disallow
> datastore creation on filesystem configurations which otherwise most
> likely would lead to data losses. To perform the check also when
> reusing an existing datastore, open the chunks store also on reuse.
> 
> Enable the atime update check by default, but allow to opt-out by
> setting a datastore tuning parameter flag for backwards compatibility.
> This is honored by both, garbage collection and datastore creation.
> 
> The check uses a 4 MiB fixed sized, unencypted and compressed chunk
> as test marker, inserted if not present. This all zero-chunk is very
> likely anyways for unencrypted backup contents with large all-zero
> regions using fixed size chunking (e.g. VMs).
> 
> To avoid cases were the timestamp will not be updated because of the
> Linux kernels timestamp granularity, sleep in-between chunk insert
> (including an atime update if pre-existing) and the subsequent
> stating + utimensat for 1 second.
> 
> Fixes: https://bugzilla.proxmox.com/show_bug.cgi?id=5982
> Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
> ---
> changes since version 6:
> - render option as error instead of unix time epoch if a timestamp
>    cannot be read
> - pass anyhow error context up till caller logging or printing the error
> 
>   pbs-datastore/src/chunk_store.rs | 72 ++++++++++++++++++++++++++++++--
>   pbs-datastore/src/datastore.rs   | 15 ++++++-
>   src/api2/config/datastore.rs     | 37 ++++++++++++----
>   3 files changed, 111 insertions(+), 13 deletions(-)
> 
> diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs
> index 5e02909a1..e3be7b623 100644
> --- a/pbs-datastore/src/chunk_store.rs
> +++ b/pbs-datastore/src/chunk_store.rs
> @@ -1,9 +1,11 @@
> +use std::os::unix::fs::MetadataExt;
>   use std::os::unix::io::AsRawFd;
>   use std::path::{Path, PathBuf};
>   use std::sync::{Arc, Mutex};
> +use std::time::Duration;
>   
> -use anyhow::{bail, format_err, Error};
> -use tracing::info;
> +use anyhow::{bail, format_err, Context, Error};
> +use tracing::{info, warn};
>   
>   use pbs_api_types::{DatastoreFSyncLevel, GarbageCollectionStatus};
>   use proxmox_io::ReadExt;
> @@ -13,6 +15,7 @@ use proxmox_sys::process_locker::{
>   };
>   use proxmox_worker_task::WorkerTaskContext;
>   
> +use crate::data_blob::DataChunkBuilder;
>   use crate::file_formats::{
>       COMPRESSED_BLOB_MAGIC_1_0, ENCRYPTED_BLOB_MAGIC_1_0, UNCOMPRESSED_BLOB_MAGIC_1_0,
>   };
> @@ -177,7 +180,7 @@ impl ChunkStore {
>       /// Note that this must be used with care, as it's dangerous to create two instances on the
>       /// same base path, as closing the underlying ProcessLocker drops all locks from this process
>       /// on the lockfile (even if separate FDs)
> -    pub(crate) fn open<P: Into<PathBuf>>(
> +    pub fn open<P: Into<PathBuf>>(
>           name: &str,
>           base: P,
>           sync_level: DatastoreFSyncLevel,
> @@ -442,6 +445,69 @@ impl ChunkStore {
>           Ok(())
>       }
>   
> +    /// Check if atime updates are honored by the filesystem backing the chunk store.
> +    ///
> +    /// Checks if the atime is always updated by utimensat taking into consideration the Linux
> +    /// kernel timestamp granularity.
> +    /// If `retry_on_file_changed` is set to true, the check is performed again on the changed file
> +    /// if a file change while testing is detected by differences in bith time or inode number.
> +    /// Uses a 4 MiB fixed size, compressed but unencrypted chunk to test. The chunk is inserted in
> +    /// the chunk store if not yet present.
> +    /// Returns with error if the check could not be performed.
> +    pub fn check_fs_atime_updates(&self, retry_on_file_changed: bool) -> Result<(), Error> {
> +        let (zero_chunk, digest) = DataChunkBuilder::build_zero_chunk(None, 4096 * 1024, true)?;
> +        let (pre_existing, _) = self.insert_chunk(&zero_chunk, &digest)?;
> +        let (path, _digest) = self.chunk_path(&digest);
> +
> +        // Take into account timestamp update granularity in the kernel
> +        // Blocking the thread is fine here since this runs in a worker.
> +        std::thread::sleep(Duration::from_secs(1));
> +
> +        let metadata_before = std::fs::metadata(&path).context(format!(
> +            "failed to get metadata for {path:?} before atime update"
> +        ))?;
> +
> +        // Second atime update if chunk pre-existed, insert_chunk already updates pre-existing ones
> +        self.cond_touch_path(&path, true)?;
> +
> +        let metadata_now = std::fs::metadata(&path).context(format!(
> +            "failed to get metadata for {path:?} after atime update"
> +        ))?;
> +
> +        // Check for the unlikely case that the file changed in-between the
> +        // two metadata calls, try to check once again on changed file
> +        if metadata_before.ino() != metadata_now.ino() {
> +            if retry_on_file_changed {
> +                return self.check_fs_atime_updates(false);
> +            }
> +            bail!("chunk {path:?} changed twice during access time safety check, cannot proceed.");
> +        }
> +
> +        if metadata_before.accessed()? >= metadata_now.accessed()? {
> +            let chunk_info_str = if pre_existing {
> +                "pre-existing"
> +            } else {
> +                "newly inserted"
> +            };
> +            warn!("Chunk metadata was not correctly updated during access time safety check:");
> +            info!(
> +                "Timestamps before update: accessed {:?}, modified {:?}, created {:?}",
> +                metadata_before.accessed().ok(),
> +                metadata_before.modified().ok(),
> +                metadata_before.created().ok(),
> +            );
> +            info!(
> +                "Timestamps after update: accessed {:?}, modified {:?}, created {:?}",
> +                metadata_now.accessed().ok(),
> +                metadata_now.modified().ok(),
> +                metadata_now.created().ok(),
> +            );
> +            bail!("access time safety check using {chunk_info_str} chunk failed, aborting GC!");
> +        }
> +
> +        Ok(())
> +    }
> +
>       pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> {
>           // unwrap: only `None` in unit tests
>           assert!(self.locker.is_some());
> diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
> index a6a91ca79..9bdb96b18 100644
> --- a/pbs-datastore/src/datastore.rs
> +++ b/pbs-datastore/src/datastore.rs
> @@ -5,7 +5,7 @@ use std::os::unix::io::AsRawFd;
>   use std::path::{Path, PathBuf};
>   use std::sync::{Arc, LazyLock, Mutex};
>   
> -use anyhow::{bail, format_err, Error};
> +use anyhow::{bail, format_err, Context, Error};
>   use nix::unistd::{unlinkat, UnlinkatFlags};
>   use tracing::{info, warn};
>   
> @@ -1170,6 +1170,19 @@ impl DataStore {
>                   upid: Some(upid.to_string()),
>                   ..Default::default()
>               };
> +            let tuning: DatastoreTuning = serde_json::from_value(
> +                DatastoreTuning::API_SCHEMA
> +                    .parse_property_string(gc_store_config.tuning.as_deref().unwrap_or(""))?,
> +            )?;
> +            if tuning.gc_atime_safety_check.unwrap_or(true) {
> +                self.inner
> +                    .chunk_store
> +                    .check_fs_atime_updates(true)
> +                    .context("atime safety check failed")?;
> +                info!("Access time update check successful, proceeding with GC.");
> +            } else {
> +                info!("Access time update check disabled by datastore tuning options.");
> +            }
>   
>               info!("Start GC phase1 (mark used chunks)");
>   
> diff --git a/src/api2/config/datastore.rs b/src/api2/config/datastore.rs
> index fe3260f6d..b80f14ce9 100644
> --- a/src/api2/config/datastore.rs
> +++ b/src/api2/config/datastore.rs
> @@ -1,10 +1,10 @@
>   use std::path::{Path, PathBuf};
>   
>   use ::serde::{Deserialize, Serialize};
> -use anyhow::{bail, format_err, Error};
> +use anyhow::{bail, format_err, Context, Error};
>   use hex::FromHex;
>   use serde_json::Value;
> -use tracing::warn;
> +use tracing::{info, warn};
>   
>   use proxmox_router::{http_bail, Permission, Router, RpcEnvironment, RpcEnvironmentType};
>   use proxmox_schema::{api, param_bail, ApiType};
> @@ -98,7 +98,15 @@ pub(crate) fn do_create_datastore(
>       )?;
>   

Note: The changes below will get much more concise after a rebase on 
Hannes' unmount guard patch, so that should be applied first IMO.

https://lore.proxmox.com/pbs-devel/20250320121125.114333-1-h.laimer@proxmox.com/T/

>       let res = if reuse_datastore {
> -        ChunkStore::verify_chunkstore(&path)
> +        ChunkStore::verify_chunkstore(&path).and_then(|_| {
> +            // Must be the only instance accessing and locking the chunk store,
> +            // dropping will close all other locks from this process on the lockfile as well.
> +            ChunkStore::open(
> +                &datastore.name,
> +                &path,
> +                tuning.sync_level.unwrap_or_default(),
> +            )
> +        })
>       } else {
>           let mut is_empty = true;
>           if let Ok(dir) = std::fs::read_dir(&path) {
> @@ -120,19 +128,30 @@ pub(crate) fn do_create_datastore(
>                   backup_user.gid,
>                   tuning.sync_level.unwrap_or_default(),
>               )
> -            .map(|_| ())
>           } else {
>               Err(format_err!("datastore path not empty"))
>           }
>       };
>   
> -    if res.is_err() {
> -        if need_unmount {
> -            if let Err(e) = unmount_by_mountpoint(&path) {
> -                warn!("could not unmount device: {e}");
> +    match res {
> +        Ok(chunk_store) => {
> +            if tuning.gc_atime_safety_check.unwrap_or(true) {
> +                chunk_store
> +                    .check_fs_atime_updates(true)
> +                    .context("access time safety check failed")?;
> +                info!("Access time update check successful.");
> +            } else {
> +                info!("Access time update check skipped.");
> +            }
> +        }
> +        Err(err) => {
> +            if need_unmount {
> +                if let Err(e) = unmount_by_mountpoint(&path) {
> +                    warn!("could not unmount device: {e}");
> +                }
>               }
> +            return Err(err);
>           }
> -        return res;
>       }
>   
>       config.set_data(&datastore.name, "datastore", &datastore)?;



_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel