From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <s.reiter@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id 87C9E653BF
 for <pbs-devel@lists.proxmox.com>; Wed, 22 Jul 2020 16:24:53 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id 8579921415
 for <pbs-devel@lists.proxmox.com>; Wed, 22 Jul 2020 16:24:53 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [212.186.127.180])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256)
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS id CF55121408
 for <pbs-devel@lists.proxmox.com>; Wed, 22 Jul 2020 16:24:52 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 9BCA543131
 for <pbs-devel@lists.proxmox.com>; Wed, 22 Jul 2020 16:24:52 +0200 (CEST)
To: Thomas Lamprecht <t.lamprecht@proxmox.com>,
 Proxmox Backup Server development discussion <pbs-devel@lists.proxmox.com>
References: <20200722135625.23653-1-s.reiter@proxmox.com>
 <20200722135625.23653-2-s.reiter@proxmox.com>
 <05152997-427c-2cb9-014d-068c74935434@proxmox.com>
From: Stefan Reiter <s.reiter@proxmox.com>
Message-ID: <33d25d9d-5f17-3064-7a84-1e908f439a9d@proxmox.com>
Date: Wed, 22 Jul 2020 16:24:51 +0200
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101
 Thunderbird/60.9.0
MIME-Version: 1.0
In-Reply-To: <05152997-427c-2cb9-014d-068c74935434@proxmox.com>
Content-Type: text/plain; charset=utf-8; format=flowed
Content-Language: en-US
Content-Transfer-Encoding: 7bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.015 Adjusted score from AWL reputation of From: address
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 NICE_REPLY_A           -0.001 Looks like a legit reply (A)
 RCVD_IN_DNSWL_MED        -2.3 Sender listed at https://www.dnswl.org/,
 medium trust
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See
 http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more
 information. [index.rs]
Subject: Re: [pbs-devel] [PATCH v2 backup 1/5] add and implement
 chunk_from_offset for IndexFile
X-BeenThere: pbs-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Wed, 22 Jul 2020 14:24:53 -0000

On 7/22/20 4:16 PM, Thomas Lamprecht wrote:
> On 22.07.20 15:56, Stefan Reiter wrote:
>> Necessary for byte-wise seeking through chunks in an index.
>>
>> Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
>> ---
>>   src/backup/dynamic_index.rs | 18 ++++++++++++++++++
>>   src/backup/fixed_index.rs   | 11 +++++++++++
>>   src/backup/index.rs         |  3 +++
>>   3 files changed, 32 insertions(+)
>>
>> diff --git a/src/backup/dynamic_index.rs b/src/backup/dynamic_index.rs
>> index 4907fe1f..887b7cf3 100644
>> --- a/src/backup/dynamic_index.rs
>> +++ b/src/backup/dynamic_index.rs
>> @@ -216,6 +216,24 @@ impl IndexFile for DynamicIndexReader {
>>               digest: self.index[pos].digest.clone(),
>>           })
>>       }
>> +
>> +    fn chunk_from_offset(&self, offset: u64) -> Option<(usize, u64)> {
>> +        let end_idx = self.index.len() - 1;
>> +        let end = self.chunk_end(end_idx);
>> +        let found_idx = self.binary_search(0, 0, end_idx, end, offset);
>> +        let found_idx = match found_idx {
>> +            Ok(i) => i,
>> +            Err(_) => return None
>> +        };
>> +
>> +        let found_start = if found_idx == 0 {
>> +            0
>> +        } else {
>> +            self.chunk_end(found_idx - 1)
>> +        };
>> +
>> +        Some((found_idx, offset - found_start))
>> +    }
>>   }
>>   
>>   struct CachedChunk {
>> diff --git a/src/backup/fixed_index.rs b/src/backup/fixed_index.rs
>> index 73d0dad0..b7e785d6 100644
>> --- a/src/backup/fixed_index.rs
>> +++ b/src/backup/fixed_index.rs
>> @@ -219,6 +219,17 @@ impl IndexFile for FixedIndexReader {
>>   
>>           (csum, chunk_end)
>>       }
>> +
>> +    fn chunk_from_offset(&self, offset: u64) -> Option<(usize, u64)> {
>> +        if offset >= self.size {
>> +            return None;
>> +        }
>> +
>> +        Some((
>> +            (offset / self.chunk_size as u64) as usize,
>> +            offset % self.chunk_size as u64
> 
> modulo is really slow, but isn't chunk_size always a 2^x and thus we can
> do the same here as we do in other places:
> 
> offset & (self.chunk_size - 1)
> 

I found it more readable this way and I don't think it's hot-path enough 
to make a real difference in performance.

But I don't mind, could even replace the div as well. Maybe an 
assert!(chunk_size.is_power_of_two()) might be good somewhere though.

>> +        ))
>> +    }
>>   }
>>   
>>   pub struct FixedIndexWriter {
>> diff --git a/src/backup/index.rs b/src/backup/index.rs
>> index efdf3b54..2eab8524 100644
>> --- a/src/backup/index.rs
>> +++ b/src/backup/index.rs
>> @@ -22,6 +22,9 @@ pub trait IndexFile {
>>       fn index_bytes(&self) -> u64;
>>       fn chunk_info(&self, pos: usize) -> Option<ChunkReadInfo>;
>>   
>> +    /// Get the chunk index and the relative offset within it for a byte offset
>> +    fn chunk_from_offset(&self, offset: u64) -> Option<(usize, u64)>;
>> +
>>       /// Compute index checksum and size
>>       fn compute_csum(&self) -> ([u8; 32], u64);
>>   
>>
>