* [pbs-devel] [PATCH proxmox-backup] client/backup_writer: clarify backup and upload size
@ 2021-03-24 10:59 Dominik Csapak
2021-03-24 14:39 ` Thomas Lamprecht
0 siblings, 1 reply; 2+ messages in thread
From: Dominik Csapak @ 2021-03-24 10:59 UTC (permalink / raw)
To: pbs-devel
The text 'had to upload [KMG]iB' implies that this is the size we
actually had to send to the server, while in reality it is the
raw data size before compression.
Count the size of the compressed chunks and print it separately.
Split the average speed into its own line so they do not get too long.
Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
src/client/backup_writer.rs | 35 ++++++++++++++++++++++-------------
1 file changed, 22 insertions(+), 13 deletions(-)
diff --git a/src/client/backup_writer.rs b/src/client/backup_writer.rs
index cef7edef..1be66707 100644
--- a/src/client/backup_writer.rs
+++ b/src/client/backup_writer.rs
@@ -1,6 +1,6 @@
use std::collections::HashSet;
use std::os::unix::fs::OpenOptionsExt;
-use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::atomic::{AtomicUsize, AtomicU64, Ordering};
use std::sync::{Arc, Mutex};
use anyhow::{bail, format_err, Error};
@@ -256,7 +256,7 @@ impl BackupWriter {
let wid = self.h2.post(&index_path, Some(param)).await?.as_u64().unwrap();
- let (chunk_count, chunk_reused, size, size_reused, duration, csum) =
+ let (chunk_count, chunk_reused, size, size_reused, uploaded, duration, csum) =
Self::upload_chunk_info_stream(
self.h2.clone(),
wid,
@@ -269,7 +269,7 @@ impl BackupWriter {
)
.await?;
- let uploaded = size - size_reused;
+ let saved = size - size_reused;
let vsize_h: HumanByte = size.into();
let archive = if self.verbose {
archive_name.to_string()
@@ -277,9 +277,11 @@ impl BackupWriter {
crate::tools::format::strip_server_file_extension(archive_name)
};
if archive_name != CATALOG_NAME {
- let speed: HumanByte = ((uploaded * 1_000_000) / (duration.as_micros() as usize)).into();
+ let speed: HumanByte = ((saved * 1_000_000) / (duration.as_micros() as usize)).into();
+ let saved: HumanByte = saved.into();
let uploaded: HumanByte = uploaded.into();
- println!("{}: had to upload {} of {} in {:.2}s, average speed {}/s).", archive, uploaded, vsize_h, duration.as_secs_f64(), speed);
+ println!("{}: had to backup {} of {} (compressed {}) in {:.2}s", archive, saved, vsize_h, uploaded, duration.as_secs_f64());
+ println!("Average backup speed: {}.", speed);
} else {
println!("Uploaded backup catalog ({})", vsize_h);
}
@@ -521,7 +523,7 @@ impl BackupWriter {
crypt_config: Option<Arc<CryptConfig>>,
compress: bool,
verbose: bool,
- ) -> impl Future<Output = Result<(usize, usize, usize, usize, std::time::Duration, [u8; 32]), Error>> {
+ ) -> impl Future<Output = Result<(usize, usize, usize, usize, usize, std::time::Duration, [u8; 32]), Error>> {
let total_chunks = Arc::new(AtomicUsize::new(0));
let total_chunks2 = total_chunks.clone();
@@ -530,6 +532,8 @@ impl BackupWriter {
let stream_len = Arc::new(AtomicUsize::new(0));
let stream_len2 = stream_len.clone();
+ let compressed_stream_len = Arc::new(AtomicU64::new(0));
+ let compressed_stream_len2 = compressed_stream_len.clone();
let reused_len = Arc::new(AtomicUsize::new(0));
let reused_len2 = reused_len.clone();
@@ -572,6 +576,7 @@ impl BackupWriter {
csum.update(digest);
let chunk_is_known = known_chunks.contains(digest);
+ let compressed_stream_len2 = compressed_stream_len.clone();
if chunk_is_known {
known_chunk_count.fetch_add(1, Ordering::SeqCst);
reused_len.fetch_add(chunk_len, Ordering::SeqCst);
@@ -580,12 +585,15 @@ impl BackupWriter {
known_chunks.insert(*digest);
future::ready(chunk_builder
.build()
- .map(move |(chunk, digest)| MergedChunkInfo::New(ChunkInfo {
- chunk,
- digest,
- chunk_len: chunk_len as u64,
- offset,
- }))
+ .map(move |(chunk, digest)| {
+ compressed_stream_len2.fetch_add(chunk.raw_size(), Ordering::SeqCst);
+ MergedChunkInfo::New(ChunkInfo {
+ chunk,
+ digest,
+ chunk_len: chunk_len as u64,
+ offset,
+ })
+ })
)
}
})
@@ -645,12 +653,13 @@ impl BackupWriter {
let total_chunks = total_chunks2.load(Ordering::SeqCst);
let known_chunk_count = known_chunk_count2.load(Ordering::SeqCst);
let stream_len = stream_len2.load(Ordering::SeqCst);
+ let upload_len = compressed_stream_len2.load(Ordering::SeqCst) as usize;
let reused_len = reused_len2.load(Ordering::SeqCst);
let mut guard = index_csum_2.lock().unwrap();
let csum = guard.take().unwrap().finish();
- futures::future::ok((total_chunks, known_chunk_count, stream_len, reused_len, duration, csum))
+ futures::future::ok((total_chunks, known_chunk_count, stream_len, reused_len, upload_len, duration, csum))
})
}
--
2.20.1
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [pbs-devel] [PATCH proxmox-backup] client/backup_writer: clarify backup and upload size
2021-03-24 10:59 [pbs-devel] [PATCH proxmox-backup] client/backup_writer: clarify backup and upload size Dominik Csapak
@ 2021-03-24 14:39 ` Thomas Lamprecht
0 siblings, 0 replies; 2+ messages in thread
From: Thomas Lamprecht @ 2021-03-24 14:39 UTC (permalink / raw)
To: Proxmox Backup Server development discussion, Dominik Csapak
On 24.03.21 11:59, Dominik Csapak wrote:
> The text 'had to upload [KMG]iB' implies that this is the size we
> actually had to send to the server, while in reality it is the
> raw data size before compression.
>
> Count the size of the compressed chunks and print it separately.
> Split the average speed into its own line so they do not get too long.
>
looks Ok code-wise, so I can only complain about a few naming and a format nits
and that we now def. need to fix that mess of a return signature tuple monster
Fabian also noted off-list.
> Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
> ---
> src/client/backup_writer.rs | 35 ++++++++++++++++++++++-------------
> 1 file changed, 22 insertions(+), 13 deletions(-)
>
> diff --git a/src/client/backup_writer.rs b/src/client/backup_writer.rs
> index cef7edef..1be66707 100644
> --- a/src/client/backup_writer.rs
> +++ b/src/client/backup_writer.rs
> @@ -1,6 +1,6 @@
> use std::collections::HashSet;
> use std::os::unix::fs::OpenOptionsExt;
> -use std::sync::atomic::{AtomicUsize, Ordering};
> +use std::sync::atomic::{AtomicUsize, AtomicU64, Ordering};
> use std::sync::{Arc, Mutex};
>
> use anyhow::{bail, format_err, Error};
> @@ -256,7 +256,7 @@ impl BackupWriter {
>
> let wid = self.h2.post(&index_path, Some(param)).await?.as_u64().unwrap();
>
> - let (chunk_count, chunk_reused, size, size_reused, duration, csum) =
> + let (chunk_count, chunk_reused, size, size_reused, uploaded, duration, csum) =
why not compressed_size ? it may not necessarily be the total upload amount (protocol
and tls overhead, ...?)
> Self::upload_chunk_info_stream(
> self.h2.clone(),
> wid,
> @@ -269,7 +269,7 @@ impl BackupWriter {
> )
> .await?;
>
> - let uploaded = size - size_reused;
> + let saved = size - size_reused;
hmm, that name change is not really improves understanding for me, `saved` is rather
general and a bit confusing (as we effectively saved all of `size`, not only this
amount). Maybe `dirty_size`?
> let vsize_h: HumanByte = size.into();
> let archive = if self.verbose {
> archive_name.to_string()
> @@ -277,9 +277,11 @@ impl BackupWriter {
> crate::tools::format::strip_server_file_extension(archive_name)
> };
> if archive_name != CATALOG_NAME {
> - let speed: HumanByte = ((uploaded * 1_000_000) / (duration.as_micros() as usize)).into();
> + let speed: HumanByte = ((saved * 1_000_000) / (duration.as_micros() as usize)).into();
> + let saved: HumanByte = saved.into();
> let uploaded: HumanByte = uploaded.into();
> - println!("{}: had to upload {} of {} in {:.2}s, average speed {}/s).", archive, uploaded, vsize_h, duration.as_secs_f64(), speed);
> + println!("{}: had to backup {} of {} (compressed {}) in {:.2}s", archive, saved, vsize_h, uploaded, duration.as_secs_f64());
> + println!("Average backup speed: {}.", speed);
we normally put the archive in front for archive-specific messages, and
there's no point at the end of the sentence in surrounding prints.
> } else {
> println!("Uploaded backup catalog ({})", vsize_h);
> }
> @@ -521,7 +523,7 @@ impl BackupWriter {
> crypt_config: Option<Arc<CryptConfig>>,
> compress: bool,
> verbose: bool,
> - ) -> impl Future<Output = Result<(usize, usize, usize, usize, std::time::Duration, [u8; 32]), Error>> {
> + ) -> impl Future<Output = Result<(usize, usize, usize, usize, usize, std::time::Duration, [u8; 32]), Error>> {
a WriterStats (just first name popping into my head) struct would be really
nicer here.
>
> let total_chunks = Arc::new(AtomicUsize::new(0));
> let total_chunks2 = total_chunks.clone();
> @@ -530,6 +532,8 @@ impl BackupWriter {
>
> let stream_len = Arc::new(AtomicUsize::new(0));
> let stream_len2 = stream_len.clone();
> + let compressed_stream_len = Arc::new(AtomicU64::new(0));
> + let compressed_stream_len2 = compressed_stream_len.clone();
> let reused_len = Arc::new(AtomicUsize::new(0));
> let reused_len2 = reused_len.clone();
>
> @@ -572,6 +576,7 @@ impl BackupWriter {
> csum.update(digest);
>
> let chunk_is_known = known_chunks.contains(digest);
> + let compressed_stream_len2 = compressed_stream_len.clone();
> if chunk_is_known {
> known_chunk_count.fetch_add(1, Ordering::SeqCst);
> reused_len.fetch_add(chunk_len, Ordering::SeqCst);
> @@ -580,12 +585,15 @@ impl BackupWriter {
> known_chunks.insert(*digest);
> future::ready(chunk_builder
> .build()
> - .map(move |(chunk, digest)| MergedChunkInfo::New(ChunkInfo {
> - chunk,
> - digest,
> - chunk_len: chunk_len as u64,
> - offset,
> - }))
> + .map(move |(chunk, digest)| {
> + compressed_stream_len2.fetch_add(chunk.raw_size(), Ordering::SeqCst);
> + MergedChunkInfo::New(ChunkInfo {
> + chunk,
> + digest,
> + chunk_len: chunk_len as u64,
> + offset,
> + })
> + })
> )
> }
> })
> @@ -645,12 +653,13 @@ impl BackupWriter {
> let total_chunks = total_chunks2.load(Ordering::SeqCst);
> let known_chunk_count = known_chunk_count2.load(Ordering::SeqCst);
> let stream_len = stream_len2.load(Ordering::SeqCst);
> + let upload_len = compressed_stream_len2.load(Ordering::SeqCst) as usize;
> let reused_len = reused_len2.load(Ordering::SeqCst);
>
> let mut guard = index_csum_2.lock().unwrap();
> let csum = guard.take().unwrap().finish();
>
> - futures::future::ok((total_chunks, known_chunk_count, stream_len, reused_len, duration, csum))
> + futures::future::ok((total_chunks, known_chunk_count, stream_len, reused_len, upload_len, duration, csum))
> })
> }
>
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2021-03-24 14:39 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-24 10:59 [pbs-devel] [PATCH proxmox-backup] client/backup_writer: clarify backup and upload size Dominik Csapak
2021-03-24 14:39 ` Thomas Lamprecht
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox