From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 147941FF16B for ; Thu, 17 Oct 2024 15:27:39 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 4A5BA101CD; Thu, 17 Oct 2024 15:28:11 +0200 (CEST) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Thu, 17 Oct 2024 15:26:46 +0200 Message-Id: <20241017132716.385234-2-c.ebner@proxmox.com> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20241017132716.385234-1-c.ebner@proxmox.com> References: <20241017132716.385234-1-c.ebner@proxmox.com> MIME-Version: 1.0 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.026 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [PATCH v4 proxmox 01/31] client: backup writer: refactor backup and upload stats counters X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" In preparation for push support in sync jobs. Extend and move `BackupStats` into `backup_stats` submodule and add method to create them from `UploadStats`. Further, introduce `UploadCounters` struct to hold the Arc clones of the chunk upload statistics counters, simplifying the house keeping. By bundling the counters into the struct, they can be passed as single function parameter when factoring out the common stream future in the subsequent implementation of the chunk upload for sync jobs in push direction. Signed-off-by: Christian Ebner --- changes since version 3: - not present in previous version pbs-client/src/backup_stats.rs | 130 ++++++++++++++++++++++++++++++++ pbs-client/src/backup_writer.rs | 104 ++++++++----------------- pbs-client/src/lib.rs | 3 + 3 files changed, 165 insertions(+), 72 deletions(-) create mode 100644 pbs-client/src/backup_stats.rs diff --git a/pbs-client/src/backup_stats.rs b/pbs-client/src/backup_stats.rs new file mode 100644 index 000000000..7aa618667 --- /dev/null +++ b/pbs-client/src/backup_stats.rs @@ -0,0 +1,130 @@ +//! Implements counters to generate statistics for log outputs during uploads with backup writer + +use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; +use std::sync::Arc; +use std::time::Duration; + +/// Basic backup run statistics and archive checksum +pub struct BackupStats { + pub size: u64, + pub csum: [u8; 32], + pub duration: Duration, + pub chunk_count: u64, +} + +/// Extended backup run statistics and archive checksum +pub(crate) struct UploadStats { + pub(crate) chunk_count: usize, + pub(crate) chunk_reused: usize, + pub(crate) chunk_injected: usize, + pub(crate) size: usize, + pub(crate) size_reused: usize, + pub(crate) size_injected: usize, + pub(crate) size_compressed: usize, + pub(crate) duration: Duration, + pub(crate) csum: [u8; 32], +} + +impl UploadStats { + /// Convert the upload stats to the more concise [`BackupStats`] + #[inline(always)] + pub(crate) fn to_backup_stats(&self) -> BackupStats { + BackupStats { + chunk_count: self.chunk_count as u64, + size: self.size as u64, + duration: self.duration, + csum: self.csum, + } + } +} + +/// Atomic counters for accounting upload stream progress information +#[derive(Clone)] +pub(crate) struct UploadCounters { + injected_chunk_count: Arc, + known_chunk_count: Arc, + total_chunk_count: Arc, + compressed_stream_len: Arc, + injected_stream_len: Arc, + reused_stream_len: Arc, + total_stream_len: Arc, +} + +impl UploadCounters { + /// Create and zero init new upload counters + pub(crate) fn new() -> Self { + Self { + total_chunk_count: Arc::new(AtomicUsize::new(0)), + injected_chunk_count: Arc::new(AtomicUsize::new(0)), + known_chunk_count: Arc::new(AtomicUsize::new(0)), + compressed_stream_len: Arc::new(AtomicU64::new(0)), + injected_stream_len: Arc::new(AtomicUsize::new(0)), + reused_stream_len: Arc::new(AtomicUsize::new(0)), + total_stream_len: Arc::new(AtomicUsize::new(0)), + } + } + + /// Increment total chunk counter by `count`, returns previous value + #[inline(always)] + pub(crate) fn inc_total_chunks(&mut self, count: usize) -> usize { + self.total_chunk_count.fetch_add(count, Ordering::SeqCst) + } + + /// Increment known chunk counter by `count`, returns previous value + #[inline(always)] + pub(crate) fn inc_known_chunks(&mut self, count: usize) -> usize { + self.known_chunk_count.fetch_add(count, Ordering::SeqCst) + } + + /// Increment injected chunk counter by `count`, returns previous value + #[inline(always)] + pub(crate) fn inc_injected_chunks(&mut self, count: usize) -> usize { + self.injected_chunk_count.fetch_add(count, Ordering::SeqCst) + } + + /// Increment stream length counter by given size, returns previous value + #[inline(always)] + pub(crate) fn inc_total_stream_len(&mut self, size: usize) -> usize { + self.total_stream_len.fetch_add(size, Ordering::SeqCst) + } + + /// Increment reused length counter by given size, returns previous value + #[inline(always)] + pub(crate) fn inc_reused_stream_len(&mut self, size: usize) -> usize { + self.reused_stream_len.fetch_add(size, Ordering::SeqCst) + } + + /// Increment compressed length counter by given size, returns previous value + #[inline(always)] + pub(crate) fn inc_compressed_stream_len(&mut self, size: u64) -> u64 { + self.compressed_stream_len.fetch_add(size, Ordering::SeqCst) + } + + /// Increment stream length counter by given size, returns previous value + #[inline(always)] + pub(crate) fn inc_injected_stream_len(&mut self, size: usize) -> usize { + self.injected_stream_len.fetch_add(size, Ordering::SeqCst) + } + + /// Return a Arc clone to the total stream length counter + #[inline(always)] + pub(crate) fn total_stream_len_counter(&self) -> Arc { + self.total_stream_len.clone() + } + + /// Convert the counters to [`UploadStats`], including given archive checksum and runtime. + #[inline(always)] + pub(crate) fn to_upload_stats(&self, csum: [u8; 32], duration: Duration) -> UploadStats { + UploadStats { + chunk_count: self.total_chunk_count.load(Ordering::SeqCst), + chunk_reused: self.known_chunk_count.load(Ordering::SeqCst), + chunk_injected: self.injected_chunk_count.load(Ordering::SeqCst), + size: self.total_stream_len.load(Ordering::SeqCst), + size_reused: self.reused_stream_len.load(Ordering::SeqCst), + size_injected: self.injected_stream_len.load(Ordering::SeqCst), + size_compressed: self.compressed_stream_len.load(Ordering::SeqCst) as usize, + duration, + csum, + } + } +} diff --git a/pbs-client/src/backup_writer.rs b/pbs-client/src/backup_writer.rs index d63c09b5a..5ccfcc9b3 100644 --- a/pbs-client/src/backup_writer.rs +++ b/pbs-client/src/backup_writer.rs @@ -1,7 +1,7 @@ use std::collections::HashSet; use std::future::Future; -use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; +use std::time::Instant; use anyhow::{bail, format_err, Error}; use futures::future::{self, AbortHandle, Either, FutureExt, TryFutureExt}; @@ -22,6 +22,7 @@ use pbs_tools::crypt_config::CryptConfig; use proxmox_human_byte::HumanByte; +use super::backup_stats::{BackupStats, UploadCounters, UploadStats}; use super::inject_reused_chunks::{InjectChunks, InjectReusedChunks, InjectedChunksInfo}; use super::merge_known_chunks::{MergeKnownChunks, MergedChunkInfo}; @@ -39,11 +40,6 @@ impl Drop for BackupWriter { } } -pub struct BackupStats { - pub size: u64, - pub csum: [u8; 32], -} - /// Options for uploading blobs/streams to the server #[derive(Default, Clone)] pub struct UploadOptions { @@ -53,18 +49,6 @@ pub struct UploadOptions { pub fixed_size: Option, } -struct UploadStats { - chunk_count: usize, - chunk_reused: usize, - chunk_injected: usize, - size: usize, - size_reused: usize, - size_injected: usize, - size_compressed: usize, - duration: std::time::Duration, - csum: [u8; 32], -} - type UploadQueueSender = mpsc::Sender<(MergedChunkInfo, Option)>; type UploadResultReceiver = oneshot::Receiver>; @@ -188,6 +172,7 @@ impl BackupWriter { mut reader: R, file_name: &str, ) -> Result { + let start_time = Instant::now(); let mut raw_data = Vec::new(); // fixme: avoid loading into memory reader.read_to_end(&mut raw_data)?; @@ -205,7 +190,12 @@ impl BackupWriter { raw_data, ) .await?; - Ok(BackupStats { size, csum }) + Ok(BackupStats { + size, + csum, + duration: start_time.elapsed(), + chunk_count: 0, + }) } pub async fn upload_blob_from_data( @@ -214,6 +204,7 @@ impl BackupWriter { file_name: &str, options: UploadOptions, ) -> Result { + let start_time = Instant::now(); let blob = match (options.encrypt, &self.crypt_config) { (false, _) => DataBlob::encode(&data, None, options.compress)?, (true, None) => bail!("requested encryption without a crypt config"), @@ -237,7 +228,12 @@ impl BackupWriter { raw_data, ) .await?; - Ok(BackupStats { size, csum }) + Ok(BackupStats { + size, + csum, + duration: start_time.elapsed(), + chunk_count: 0, + }) } pub async fn upload_blob_from_file>( @@ -413,10 +409,7 @@ impl BackupWriter { "csum": hex::encode(upload_stats.csum), }); let _value = self.h2.post(&close_path, Some(param)).await?; - Ok(BackupStats { - size: upload_stats.size as u64, - csum: upload_stats.csum, - }) + Ok(upload_stats.to_backup_stats()) } fn response_queue() -> ( @@ -637,21 +630,8 @@ impl BackupWriter { compress: bool, injections: Option>, ) -> impl Future> { - let total_chunks = Arc::new(AtomicUsize::new(0)); - let total_chunks2 = total_chunks.clone(); - let known_chunk_count = Arc::new(AtomicUsize::new(0)); - let known_chunk_count2 = known_chunk_count.clone(); - let injected_chunk_count = Arc::new(AtomicUsize::new(0)); - let injected_chunk_count2 = injected_chunk_count.clone(); - - let stream_len = Arc::new(AtomicUsize::new(0)); - let stream_len2 = stream_len.clone(); - let compressed_stream_len = Arc::new(AtomicU64::new(0)); - let compressed_stream_len2 = compressed_stream_len.clone(); - let reused_len = Arc::new(AtomicUsize::new(0)); - let reused_len2 = reused_len.clone(); - let injected_len = Arc::new(AtomicUsize::new(0)); - let injected_len2 = injected_len.clone(); + let mut counters = UploadCounters::new(); + let counters_readonly = counters.clone(); let append_chunk_path = format!("{}_index", prefix); let upload_chunk_path = format!("{}_chunk", prefix); @@ -666,22 +646,21 @@ impl BackupWriter { let index_csum_2 = index_csum.clone(); stream - .inject_reused_chunks(injections, stream_len.clone()) + .inject_reused_chunks(injections, counters.total_stream_len_counter()) .and_then(move |chunk_info| match chunk_info { InjectedChunksInfo::Known(chunks) => { // account for injected chunks let count = chunks.len(); - total_chunks.fetch_add(count, Ordering::SeqCst); - injected_chunk_count.fetch_add(count, Ordering::SeqCst); + counters.inc_total_chunks(count); + counters.inc_injected_chunks(count); let mut known = Vec::new(); let mut guard = index_csum.lock().unwrap(); let csum = guard.as_mut().unwrap(); for chunk in chunks { - let offset = - stream_len.fetch_add(chunk.size() as usize, Ordering::SeqCst) as u64; - reused_len.fetch_add(chunk.size() as usize, Ordering::SeqCst); - injected_len.fetch_add(chunk.size() as usize, Ordering::SeqCst); + let offset = counters.inc_total_stream_len(chunk.size() as usize) as u64; + counters.inc_reused_stream_len(chunk.size() as usize); + counters.inc_injected_stream_len(chunk.size() as usize); let digest = chunk.digest(); known.push((offset, digest)); let end_offset = offset + chunk.size(); @@ -694,8 +673,8 @@ impl BackupWriter { // account for not injected chunks (new and known) let chunk_len = data.len(); - total_chunks.fetch_add(1, Ordering::SeqCst); - let offset = stream_len.fetch_add(chunk_len, Ordering::SeqCst) as u64; + counters.inc_total_chunks(1); + let offset = counters.inc_total_stream_len(chunk_len) as u64; let mut chunk_builder = DataChunkBuilder::new(data.as_ref()).compress(compress); @@ -718,14 +697,14 @@ impl BackupWriter { let chunk_is_known = known_chunks.contains(digest); if chunk_is_known { - known_chunk_count.fetch_add(1, Ordering::SeqCst); - reused_len.fetch_add(chunk_len, Ordering::SeqCst); + counters.inc_known_chunks(1); + counters.inc_reused_stream_len(chunk_len); future::ok(MergedChunkInfo::Known(vec![(offset, *digest)])) } else { - let compressed_stream_len2 = compressed_stream_len.clone(); + let mut counters = counters.clone(); known_chunks.insert(*digest); future::ready(chunk_builder.build().map(move |(chunk, digest)| { - compressed_stream_len2.fetch_add(chunk.raw_size(), Ordering::SeqCst); + counters.inc_compressed_stream_len(chunk.raw_size()); MergedChunkInfo::New(ChunkInfo { chunk, digest, @@ -794,29 +773,10 @@ impl BackupWriter { }) .then(move |result| async move { upload_result.await?.and(result) }.boxed()) .and_then(move |_| { - let duration = start_time.elapsed(); - let chunk_count = total_chunks2.load(Ordering::SeqCst); - let chunk_reused = known_chunk_count2.load(Ordering::SeqCst); - let chunk_injected = injected_chunk_count2.load(Ordering::SeqCst); - let size = stream_len2.load(Ordering::SeqCst); - let size_reused = reused_len2.load(Ordering::SeqCst); - let size_injected = injected_len2.load(Ordering::SeqCst); - let size_compressed = compressed_stream_len2.load(Ordering::SeqCst) as usize; - let mut guard = index_csum_2.lock().unwrap(); let csum = guard.take().unwrap().finish(); - futures::future::ok(UploadStats { - chunk_count, - chunk_reused, - chunk_injected, - size, - size_reused, - size_injected, - size_compressed, - duration, - csum, - }) + futures::future::ok(counters_readonly.to_upload_stats(csum, start_time.elapsed())) }) } diff --git a/pbs-client/src/lib.rs b/pbs-client/src/lib.rs index 3d2da27b9..b875347bb 100644 --- a/pbs-client/src/lib.rs +++ b/pbs-client/src/lib.rs @@ -41,4 +41,7 @@ pub use backup_specification::*; mod chunk_stream; pub use chunk_stream::{ChunkStream, FixedChunkStream, InjectionData}; +mod backup_stats; +pub use backup_stats::BackupStats; + pub const PROXMOX_BACKUP_TCP_KEEPALIVE_TIME: u32 = 120; -- 2.39.5 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel