From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 775DA69195 for ; Tue, 4 Aug 2020 12:42:57 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 2D45EBDC0 for ; Tue, 4 Aug 2020 12:42:27 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id E2917BD83 for ; Tue, 4 Aug 2020 12:42:23 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id AC1DC43479 for ; Tue, 4 Aug 2020 12:42:23 +0200 (CEST) From: Stefan Reiter To: pbs-devel@lists.proxmox.com Date: Tue, 4 Aug 2020 12:42:02 +0200 Message-Id: <20200804104205.29540-5-s.reiter@proxmox.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200804104205.29540-1-s.reiter@proxmox.com> References: <20200804104205.29540-1-s.reiter@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.059 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [datastore.rs, prune.rs, proxmox-backup-proxy.rs] Subject: [pbs-devel] [PATCH proxmox-backup 4/7] prune: also check backup snapshot locks X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 04 Aug 2020 10:42:57 -0000 ...to avoid pruning running backups or backups used as base for others. Unfinished backups that have no lock are considered broken and will always be pruned. The ignore_locks parameter is used for testing, since flocks are not available in test environment. Signed-off-by: Stefan Reiter --- This one I'm a bit scared of: The prune logic is astonishingly confusing IMHO, but I hope I worked out how this makes the most sense. I left my long-form comments in, even though they might seem a bit excessive - they helped me make light of the situation, and might help in the future as well. But if it's too much, feel free to shorten/remove them. src/api2/admin/datastore.rs | 2 +- src/backup/prune.rs | 48 +++++++++++++++++++++++---------- src/bin/proxmox-backup-proxy.rs | 2 +- tests/prune.rs | 6 +++-- 4 files changed, 40 insertions(+), 18 deletions(-) diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs index 97fd9093..6194c9c7 100644 --- a/src/api2/admin/datastore.rs +++ b/src/api2/admin/datastore.rs @@ -621,7 +621,7 @@ fn prune( let list = group.list_backups(&datastore.base_path())?; - let mut prune_info = compute_prune_info(list, &prune_options)?; + let mut prune_info = compute_prune_info(list, &prune_options, false)?; prune_info.reverse(); // delete older snapshots first diff --git a/src/backup/prune.rs b/src/backup/prune.rs index f7a87c5a..1bc5b9e8 100644 --- a/src/backup/prune.rs +++ b/src/backup/prune.rs @@ -45,26 +45,45 @@ fn mark_selections, &BackupInfo) -> String> ( } } -fn remove_incomplete_snapshots( +fn mark_incomplete_and_in_use_snapshots( mark: &mut HashMap, list: &Vec, + ignore_locks: bool, ) { - let mut keep_unfinished = true; + let mut first_finished = true; for info in list.iter() { - // backup is considered unfinished if there is no manifest - if info.is_finished() { - // There is a new finished backup, so there is no need - // to keep older unfinished backups. - keep_unfinished = false; - } else { - let backup_id = info.backup_dir.relative_path(); - if keep_unfinished { // keep first unfinished - mark.insert(backup_id, PruneMark::KeepPartial); - } else { + let backup_id = info.backup_dir.relative_path(); + let finished = info.is_finished(); + if ignore_locks || info.lock().is_ok() { + if !finished { + // incomplete backup, but we can lock it, so it's not running - + // always remove to clean up broken backups mark.insert(backup_id, PruneMark::Remove); } - keep_unfinished = false; + // if locking succeeds and the backup is complete, let the regular + // marking logic figure it out + // note that we don't need to keep any locks either - any newly + // started backup can only ever use the latest finished backup as + // base, which is kept anyway (since we always keep the latest, and + // this function already filters out any unfinished ones) + } else { + if finished { + // if the backup is finished but locking fails, we have to keep + // it, however, if it's the latest finished one, it will be kept + // anyway, so don't mark it at all + if !first_finished { + mark.insert(backup_id, PruneMark::Keep); + } + } else { + // ...if it's incomplete, and locking fails, that probably means + // it's currently running, so keep it, but do not include it in + // mark computation + mark.insert(backup_id, PruneMark::KeepPartial); + }; + } + if first_finished && finished { + first_finished = false; } } } @@ -173,13 +192,14 @@ impl PruneOptions { pub fn compute_prune_info( mut list: Vec, options: &PruneOptions, + ignore_locks: bool, ) -> Result, Error> { let mut mark = HashMap::new(); BackupInfo::sort_list(&mut list, false); - remove_incomplete_snapshots(&mut mark, &list); + mark_incomplete_and_in_use_snapshots(&mut mark, &list, ignore_locks); if let Some(keep_last) = options.keep_last { mark_selections(&mut mark, &list, keep_last as usize, |_local_time, info| { diff --git a/src/bin/proxmox-backup-proxy.rs b/src/bin/proxmox-backup-proxy.rs index dc0d16d2..694e4b60 100644 --- a/src/bin/proxmox-backup-proxy.rs +++ b/src/bin/proxmox-backup-proxy.rs @@ -441,7 +441,7 @@ async fn schedule_datastore_prune() { let groups = BackupGroup::list_groups(&base_path)?; for group in groups { let list = group.list_backups(&base_path)?; - let mut prune_info = compute_prune_info(list, &prune_options)?; + let mut prune_info = compute_prune_info(list, &prune_options, false)?; prune_info.reverse(); // delete older snapshots first worker.log(format!("Starting prune on store \"{}\" group \"{}/{}\"", diff --git a/tests/prune.rs b/tests/prune.rs index d9758ea7..923a94fd 100644 --- a/tests/prune.rs +++ b/tests/prune.rs @@ -9,7 +9,8 @@ fn get_prune_list( options: &PruneOptions, ) -> Vec { - let mut prune_info = compute_prune_info(list, options).unwrap(); + // ignore locks, would always fail in test environment + let mut prune_info = compute_prune_info(list, options, true).unwrap(); prune_info.reverse(); @@ -38,7 +39,8 @@ fn create_info( files.push(String::from(MANIFEST_BLOB_NAME)); } - BackupInfo { backup_dir, files } + // note: base_path is only used for locking, which we ignore here anyway + BackupInfo { backup_dir, files, base_path: PathBuf::from("/tmp/pbs-test") } } #[test] -- 2.20.1