From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id CBFFE90C25 for ; Fri, 31 Mar 2023 10:43:51 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id B62CA255E5 for ; Fri, 31 Mar 2023 10:43:51 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Fri, 31 Mar 2023 10:43:51 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id C8AE941CCC for ; Fri, 31 Mar 2023 10:43:50 +0200 (CEST) From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= To: pbs-devel@lists.proxmox.com Date: Fri, 31 Mar 2023 10:43:45 +0200 Message-Id: <20230331084345.3971666-1-f.gruenbichler@proxmox.com> X-Mailer: git-send-email 2.30.2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.072 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [PATCH proxmox-backup] chunk store: handle insertion edge cases X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 31 Mar 2023 08:43:51 -0000 these were previously called out in a comment, but should now be handled (as much as they can be). the performance impact shouldn't be too bad, since we only look at the magic 8 bytes at the start of the existing chunk (we already did a stat on it, so that might even be prefetched already by storage), and only if there is a size mismatch and encryption is enabled. Signed-off-by: Fabian Grünbichler --- Notes: we could verify the CRC when deciding between existing and incoming encrypted chunk, but that would require reading the full chunk, and that would be quite the load if we ever upgrade ZSTD or change its parameters and the new version is substantially better or worse at compressing.. the CRC is verified on every regular load (such as verify, sync, restore) anyway. we cannot decide which of the two potentially invalid encrypted chunks to keep based on the size and compression status: uncompressed chunks should always be bigger than compressed ones, but both the size and the compression bit is 100% under a potential attacker's control anyhow, so we cannot prevent them from sending us rather small chunks that we still need to discard out of caution, even if they are smaller than the existing one. pbs-datastore/src/chunk_store.rs | 36 ++++++++++++++++++++++++++------ pbs-datastore/src/data_blob.rs | 6 ++++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs index 1944ae00..ff1229bc 100644 --- a/pbs-datastore/src/chunk_store.rs +++ b/pbs-datastore/src/chunk_store.rs @@ -5,6 +5,7 @@ use std::sync::{Arc, Mutex}; use anyhow::{bail, format_err, Error}; use pbs_api_types::{DatastoreFSyncLevel, GarbageCollectionStatus}; +use proxmox_io::ReadExt; use proxmox_sys::fs::{create_dir, create_path, file_type_from_file_stat, CreateOptions}; use proxmox_sys::process_locker::{ ProcessLockExclusiveGuard, ProcessLockSharedGuard, ProcessLocker, @@ -12,6 +13,9 @@ use proxmox_sys::process_locker::{ use proxmox_sys::task_log; use proxmox_sys::WorkerTaskContext; +use crate::file_formats::{ + COMPRESSED_BLOB_MAGIC_1_0, ENCRYPTED_BLOB_MAGIC_1_0, UNCOMPRESSED_BLOB_MAGIC_1_0, +}; use crate::DataBlob; /// File system based chunk store @@ -460,13 +464,33 @@ impl ChunkStore { return Ok((true, old_size)); } else if old_size == 0 { log::warn!("found empty chunk '{digest_str}' in store {name}, overwriting"); + } else if chunk.is_encrypted() { + // incoming chunk is encrypted, possible attack or hash collision! + let mut existing_file = std::fs::File::open(&chunk_path)?; + let magic = existing_file.read_exact_allocated(8)?; + + // going from unencrypted to encrypted can never be right, since the digest + // includes data derived from the encryption key + if magic == UNCOMPRESSED_BLOB_MAGIC_1_0 || magic == COMPRESSED_BLOB_MAGIC_1_0 { + bail!("Overwriting unencrypted chunk '{digest_str}' on store '{name}' with encrypted chunk with same digest not allowed!"); + } + + // if both chunks are uncompressed and encrypted and have the same digest, but + // their sizes are different, one of them *must* be invalid + if magic == ENCRYPTED_BLOB_MAGIC_1_0 && !chunk.is_compressed() { + bail!("Overwriting existing (encrypted) chunk '{digest_str}' on store '{name}' is not allowed!") + } + + // we can't tell if either chunk is invalid if either or both of them are + // compressed, the size mismatch could be caused by different zstd versions + // so let's keep the one that was uploaded first, bit-rot is hopefully detected by + // verification at some point.. + return Ok((true, old_size)); + } else if old_size < encoded_size { + log::debug!("Got another copy of chunk with digest '{digest_str}', existing chunk is smaller, discarding uploaded one."); + return Ok((true, old_size)); } else { - // other sizes can happen in legitimate and illegitimate ways: - // - illegitimate: encryped chunks and bad actor client - // - legitimate: same chunk but newer zstd version (or compression level) can - // compress it better (or worse) so the - // Ideally we could take the actual smaller chunk so that improved zstd tech gets - // leveraged, but we could only allow to do that for un-encrypted ones. + log::debug!("Got another copy of chunk with digest '{digest_str}', existing chunk is bigger, replacing with uploaded one."); } } diff --git a/pbs-datastore/src/data_blob.rs b/pbs-datastore/src/data_blob.rs index 9c47bd45..f37c7a34 100644 --- a/pbs-datastore/src/data_blob.rs +++ b/pbs-datastore/src/data_blob.rs @@ -295,6 +295,12 @@ impl DataBlob { magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 } + /// Returns if chunk is compressed + pub fn is_compressed(&self) -> bool { + let magic = self.magic(); + magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &COMPRESSED_BLOB_MAGIC_1_0 + } + /// Verify digest and data length for unencrypted chunks. /// /// To do that, we need to decompress data first. Please note that -- 2.30.2