From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 125306A03E for ; Tue, 11 Aug 2020 10:51:33 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 082711B212 for ; Tue, 11 Aug 2020 10:51:03 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 24D4C1B1C2 for ; Tue, 11 Aug 2020 10:50:59 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id E143144531 for ; Tue, 11 Aug 2020 10:50:58 +0200 (CEST) From: Stefan Reiter To: pbs-devel@lists.proxmox.com Date: Tue, 11 Aug 2020 10:50:42 +0200 Message-Id: <20200811085042.30686-8-s.reiter@proxmox.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200811085042.30686-1-s.reiter@proxmox.com> References: <20200811085042.30686-1-s.reiter@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.066 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [prune.rs, datastore.rs, proxmox-backup-proxy.rs] Subject: [pbs-devel] [PATCH v2 proxmox-backup 7/7] prune: also check backup snapshot locks X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 11 Aug 2020 08:51:33 -0000 ...to avoid pruning running backups or backups used as base for others. Unfinished backups that have no lock are considered broken and will always be pruned. The ignore_locks parameter is used for testing, since flocks are not available in test environment. Signed-off-by: Stefan Reiter --- Once again, feel free to remove my comments, I just can't wrap my head around that logic without them ;) src/api2/admin/datastore.rs | 2 +- src/backup/prune.rs | 50 ++++++++++++++++++++++----------- src/bin/proxmox-backup-proxy.rs | 2 +- tests/prune.rs | 6 ++-- 4 files changed, 39 insertions(+), 21 deletions(-) diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs index fe72ea32..a987416e 100644 --- a/src/api2/admin/datastore.rs +++ b/src/api2/admin/datastore.rs @@ -636,7 +636,7 @@ fn prune( let list = group.list_backups(&datastore.base_path())?; - let mut prune_info = compute_prune_info(list, &prune_options)?; + let mut prune_info = compute_prune_info(list, &prune_options, &datastore.base_path(), false)?; prune_info.reverse(); // delete older snapshots first diff --git a/src/backup/prune.rs b/src/backup/prune.rs index f7a87c5a..f642e7b3 100644 --- a/src/backup/prune.rs +++ b/src/backup/prune.rs @@ -1,12 +1,13 @@ use anyhow::{Error}; use std::collections::{HashMap, HashSet}; -use std::path::PathBuf; +use std::path::{PathBuf, Path}; use chrono::{DateTime, Timelike, Datelike, Local}; use super::{BackupDir, BackupInfo}; +use crate::tools::fs::lock_dir_noblock; -enum PruneMark { Keep, KeepPartial, Remove } +enum PruneMark { Keep, Ignored, Remove } fn mark_selections, &BackupInfo) -> String> ( mark: &mut HashMap, @@ -45,26 +46,39 @@ fn mark_selections, &BackupInfo) -> String> ( } } -fn remove_incomplete_snapshots( +fn mark_incomplete_and_in_use_snapshots( mark: &mut HashMap, list: &Vec, + base_path: &Path, + ignore_locks: bool, ) { - let mut keep_unfinished = true; for info in list.iter() { - // backup is considered unfinished if there is no manifest - if info.is_finished() { - // There is a new finished backup, so there is no need - // to keep older unfinished backups. - keep_unfinished = false; - } else { - let backup_id = info.backup_dir.relative_path(); - if keep_unfinished { // keep first unfinished - mark.insert(backup_id, PruneMark::KeepPartial); - } else { + let backup_id = info.backup_dir.relative_path(); + let mut full_path = base_path.to_owned(); + full_path.push(backup_id.clone()); + + if ignore_locks || lock_dir_noblock(&full_path, "snapshot", "").is_ok() { + if !info.is_finished() { + // incomplete backup, but we can lock it, so it's not running - + // always remove to clean up broken backups mark.insert(backup_id, PruneMark::Remove); } - keep_unfinished = false; + // if locking succeeds and the backup is complete, let the regular + // marking logic figure it out + // note that we don't need to keep any locks either - any newly + // started backup can only ever use the latest finished backup as + // base, which is kept anyway (since we always keep the latest, and + // this function already filters out any unfinished ones) + } else { + // backups we can't lock we have to ignore - note that this means in + // case a backup is running in a group, the first three snapshots + // will always be kept + // we cannot special case that though, since we don't know if a lock + // is in place for backup or forget, where in the latter case we + // must not mark it as "Keep", as otherwise we might end up with + // no backup for a given selection period + mark.insert(backup_id, PruneMark::Ignored); } } } @@ -173,13 +187,15 @@ impl PruneOptions { pub fn compute_prune_info( mut list: Vec, options: &PruneOptions, + base_path: &Path, + ignore_locks: bool, ) -> Result, Error> { let mut mark = HashMap::new(); BackupInfo::sort_list(&mut list, false); - remove_incomplete_snapshots(&mut mark, &list); + mark_incomplete_and_in_use_snapshots(&mut mark, &list, base_path, ignore_locks); if let Some(keep_last) = options.keep_last { mark_selections(&mut mark, &list, keep_last as usize, |_local_time, info| { @@ -227,7 +243,7 @@ pub fn compute_prune_info( let backup_id = info.backup_dir.relative_path(); let keep = match mark.get(&backup_id) { Some(PruneMark::Keep) => true, - Some(PruneMark::KeepPartial) => true, + Some(PruneMark::Ignored) => true, _ => false, }; (info, keep) diff --git a/src/bin/proxmox-backup-proxy.rs b/src/bin/proxmox-backup-proxy.rs index 3f7bf3ec..8cd2e105 100644 --- a/src/bin/proxmox-backup-proxy.rs +++ b/src/bin/proxmox-backup-proxy.rs @@ -442,7 +442,7 @@ async fn schedule_datastore_prune() { let groups = BackupGroup::list_groups(&base_path)?; for group in groups { let list = group.list_backups(&base_path)?; - let mut prune_info = compute_prune_info(list, &prune_options)?; + let mut prune_info = compute_prune_info(list, &prune_options, &base_path, false)?; prune_info.reverse(); // delete older snapshots first worker.log(format!("Starting prune on store \"{}\" group \"{}/{}\"", diff --git a/tests/prune.rs b/tests/prune.rs index d9758ea7..31f48970 100644 --- a/tests/prune.rs +++ b/tests/prune.rs @@ -1,5 +1,5 @@ use anyhow::{Error}; -use std::path::PathBuf; +use std::path::{PathBuf, Path}; use proxmox_backup::backup::*; @@ -9,7 +9,9 @@ fn get_prune_list( options: &PruneOptions, ) -> Vec { - let mut prune_info = compute_prune_info(list, options).unwrap(); + // ignore locks, would always fail in test environment + // base_path is only used for locking so leave empty + let mut prune_info = compute_prune_info(list, options, &Path::new(""), true).unwrap(); prune_info.reverse(); -- 2.20.1