From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <pbs-devel-bounces@lists.proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9])
	by lore.proxmox.com (Postfix) with ESMTPS id 8223E1FF18E
	for <inbox@lore.proxmox.com>; Mon, 13 Jan 2025 11:37:16 +0100 (CET)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
	by firstgate.proxmox.com (Proxmox) with ESMTP id B07F72B6A6;
	Mon, 13 Jan 2025 11:36:58 +0100 (CET)
Date: Mon, 13 Jan 2025 11:36:52 +0100
From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= <f.gruenbichler@proxmox.com>
To: Proxmox Backup Server development discussion <pbs-devel@lists.proxmox.com>
References: <20241212075204.36931-1-c.ebner@proxmox.com>
In-Reply-To: <20241212075204.36931-1-c.ebner@proxmox.com>
MIME-Version: 1.0
User-Agent: astroid/0.16.0 (https://github.com/astroidmail/astroid)
Message-Id: <1736762644.qd8dn3pt4j.astroid@yuna.none>
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.045 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to
 Validity was blocked. See
 https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more
 information.
 RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to
 Validity was blocked. See
 https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more
 information.
 RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to
 Validity was blocked. See
 https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more
 information.
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
Subject: [pbs-devel] applied: [PATCH proxmox-backup] Revert "fix #5710: api:
 backup: stat known chunks on backup finish"
X-BeenThere: pbs-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe>
Reply-To: Proxmox Backup Server development discussion
 <pbs-devel@lists.proxmox.com>
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: pbs-devel-bounces@lists.proxmox.com
Sender: "pbs-devel" <pbs-devel-bounces@lists.proxmox.com>

we should probably spec out some potential replacement approaches?

On December 12, 2024 8:52 am, Christian Ebner wrote:
> Commit da11d226 ("fix #5710: api: backup: stat known chunks on backup
> finish") introduced a seemingly cheap server side check to verify
> existence of known chunks in the chunk store by stating. This check
> however does not scale for large backup snapshots which might contain
> millions of known chunks, as reported in the community forum [0].
> Revert the changes for now instead of making this opt-in/opt-out, a
> more general approach has to be thought out to mark backup snapshots
> which fail verification.
> 
> Link to the report in the forum:
> [0] https://forum.proxmox.com/threads/158812/
> 
> Fixes: da11d226 ("fix #5710: api: backup: stat known chunks on backup finish")
> Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
> ---
>  src/api2/backup/environment.rs | 54 +++++-----------------------------
>  src/api2/backup/mod.rs         | 22 +-------------
>  2 files changed, 8 insertions(+), 68 deletions(-)
> 
> diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs
> index 19624fae3..99d885e2e 100644
> --- a/src/api2/backup/environment.rs
> +++ b/src/api2/backup/environment.rs
> @@ -1,4 +1,4 @@
> -use anyhow::{bail, format_err, Context, Error};
> +use anyhow::{bail, format_err, Error};
>  use nix::dir::Dir;
>  use std::collections::HashMap;
>  use std::sync::{Arc, Mutex};
> @@ -72,14 +72,8 @@ struct FixedWriterState {
>      incremental: bool,
>  }
>  
> -#[derive(Copy, Clone)]
> -struct KnownChunkInfo {
> -    uploaded: bool,
> -    length: u32,
> -}
> -
> -// key=digest, value=KnownChunkInfo
> -type KnownChunksMap = HashMap<[u8; 32], KnownChunkInfo>;
> +// key=digest, value=length
> +type KnownChunksMap = HashMap<[u8; 32], u32>;
>  
>  struct SharedBackupState {
>      finished: bool,
> @@ -165,13 +159,7 @@ impl BackupEnvironment {
>  
>          state.ensure_unfinished()?;
>  
> -        state.known_chunks.insert(
> -            digest,
> -            KnownChunkInfo {
> -                uploaded: false,
> -                length,
> -            },
> -        );
> +        state.known_chunks.insert(digest, length);
>  
>          Ok(())
>      }
> @@ -225,13 +213,7 @@ impl BackupEnvironment {
>          }
>  
>          // register chunk
> -        state.known_chunks.insert(
> -            digest,
> -            KnownChunkInfo {
> -                uploaded: true,
> -                length: size,
> -            },
> -        );
> +        state.known_chunks.insert(digest, size);
>  
>          Ok(())
>      }
> @@ -266,13 +248,7 @@ impl BackupEnvironment {
>          }
>  
>          // register chunk
> -        state.known_chunks.insert(
> -            digest,
> -            KnownChunkInfo {
> -                uploaded: true,
> -                length: size,
> -            },
> -        );
> +        state.known_chunks.insert(digest, size);
>  
>          Ok(())
>      }
> @@ -280,23 +256,7 @@ impl BackupEnvironment {
>      pub fn lookup_chunk(&self, digest: &[u8; 32]) -> Option<u32> {
>          let state = self.state.lock().unwrap();
>  
> -        state
> -            .known_chunks
> -            .get(digest)
> -            .map(|known_chunk_info| known_chunk_info.length)
> -    }
> -
> -    /// stat known chunks from previous backup, so excluding newly uploaded ones
> -    pub fn stat_prev_known_chunks(&self) -> Result<(), Error> {
> -        let state = self.state.lock().unwrap();
> -        for (digest, known_chunk_info) in &state.known_chunks {
> -            if !known_chunk_info.uploaded {
> -                self.datastore
> -                    .stat_chunk(digest)
> -                    .with_context(|| format!("stat failed on {}", hex::encode(digest)))?;
> -            }
> -        }
> -        Ok(())
> +        state.known_chunks.get(digest).copied()
>      }
>  
>      /// Store the writer with an unique ID
> diff --git a/src/api2/backup/mod.rs b/src/api2/backup/mod.rs
> index 31334b59c..0373d135b 100644
> --- a/src/api2/backup/mod.rs
> +++ b/src/api2/backup/mod.rs
> @@ -1,6 +1,6 @@
>  //! Backup protocol (HTTP2 upgrade)
>  
> -use anyhow::{bail, format_err, Context, Error};
> +use anyhow::{bail, format_err, Error};
>  use futures::*;
>  use hex::FromHex;
>  use hyper::header::{HeaderValue, CONNECTION, UPGRADE};
> @@ -788,26 +788,6 @@ fn finish_backup(
>  ) -> Result<Value, Error> {
>      let env: &BackupEnvironment = rpcenv.as_ref();
>  
> -    if let Err(err) = env.stat_prev_known_chunks() {
> -        env.debug(format!("stat registered chunks failed - {err:?}"));
> -
> -        if let Some(last) = env.last_backup.as_ref() {
> -            // No need to acquire snapshot lock, already locked when starting the backup
> -            let verify_state = SnapshotVerifyState {
> -                state: VerifyState::Failed,
> -                upid: env.worker.upid().clone(), // backup writer UPID
> -            };
> -            let verify_state = serde_json::to_value(verify_state)?;
> -            last.backup_dir
> -                .update_manifest(|manifest| {
> -                    manifest.unprotected["verify_state"] = verify_state;
> -                })
> -                .with_context(|| "manifest update failed")?;
> -        }
> -
> -        bail!("stat known chunks failed - {err:?}");
> -    }
> -
>      env.finish_backup()?;
>      env.log("successfully finished backup");
>  
> -- 
> 2.39.5
> 
> 
> 
> _______________________________________________
> pbs-devel mailing list
> pbs-devel@lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
> 
> 
> 


_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel