From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: <pdm-devel-bounces@lists.proxmox.com> Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 258251FF15C for <inbox@lore.proxmox.com>; Wed, 5 Mar 2025 16:01:21 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 9BE8B18D9A; Wed, 5 Mar 2025 16:01:14 +0100 (CET) From: Wolfgang Bumiller <w.bumiller@proxmox.com> To: pdm-devel@lists.proxmox.com Date: Wed, 5 Mar 2025 16:01:07 +0100 Message-Id: <20250305150108.245584-7-w.bumiller@proxmox.com> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250305150108.245584-1-w.bumiller@proxmox.com> References: <20250305150108.245584-1-w.bumiller@proxmox.com> MIME-Version: 1.0 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.083 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pdm-devel] [PATCH v2 datacenter-manager 6/7] server: try previously unreachable clients as last resort X-BeenThere: pdm-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Datacenter Manager development discussion <pdm-devel.lists.proxmox.com> List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pdm-devel>, <mailto:pdm-devel-request@lists.proxmox.com?subject=unsubscribe> List-Archive: <http://lists.proxmox.com/pipermail/pdm-devel/> List-Post: <mailto:pdm-devel@lists.proxmox.com> List-Help: <mailto:pdm-devel-request@lists.proxmox.com?subject=help> List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pdm-devel>, <mailto:pdm-devel-request@lists.proxmox.com?subject=subscribe> Reply-To: Proxmox Datacenter Manager development discussion <pdm-devel@lists.proxmox.com> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pdm-devel-bounces@lists.proxmox.com Sender: "pdm-devel" <pdm-devel-bounces@lists.proxmox.com> and mark them as reachable again if they succeed Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com> Reviewed-by: Lukas Wagner <l.wagner@proxmox.com> --- No changes since v1. server/src/connection.rs | 92 ++++++++++++++++++++++++++++++++++------ 1 file changed, 80 insertions(+), 12 deletions(-) diff --git a/server/src/connection.rs b/server/src/connection.rs index b9c0e16..3092ab3 100644 --- a/server/src/connection.rs +++ b/server/src/connection.rs @@ -469,13 +469,15 @@ struct MultiClientEntry { /// - For `GET` requests we could also start a 2nd request after a shorter time out (eg. 10s). struct MultiClient { state: StdMutex<MultiClientState>, + remote: String, timeout: Duration, } impl MultiClient { fn new(remote: String, entries: Vec<MultiClientEntry>) -> Self { Self { - state: StdMutex::new(MultiClientState::new(remote, entries)), + state: StdMutex::new(MultiClientState::new(remote.clone(), entries)), + remote, timeout: Duration::from_secs(60), } } @@ -559,11 +561,16 @@ impl MultiClientState { &self.entries[self.index()] } - /// Get the current client and its index which can be passed to `failed()` if the client fails + /// Get the current entry and its index which can be passed to `failed()` if the client fails /// to connect. - fn get(&self) -> (Arc<Client>, usize) { + fn get(&self) -> (&MultiClientEntry, usize) { let index = self.index(); - (Arc::clone(&self.entries[index].client), self.current) + (&self.entries[index], self.current) + } + + /// Get a client at a specific point (which still needs to be converted to an index). + fn get_at(&self, at: usize) -> &MultiClientEntry { + &self.entries[at % self.entries.len()] } /// Check if we already tried all clients since a specific starting index. @@ -588,6 +595,30 @@ impl MultiClientState { } } +struct TryClient { + client: Arc<Client>, + reachable: bool, + hostname: String, +} + +impl TryClient { + fn reachable(entry: &MultiClientEntry) -> Self { + Self { + client: Arc::clone(&entry.client), + hostname: entry.hostname.clone(), + reachable: true, + } + } + + fn unreachable(entry: &MultiClientEntry) -> Self { + Self { + client: Arc::clone(&entry.client), + hostname: entry.hostname.clone(), + reachable: false, + } + } +} + impl MultiClient { /// This is the client usage strategy. /// @@ -598,17 +629,28 @@ impl MultiClient { /// We might be skipping clients if other tasks already tried "more" clients, but that's fine, /// since there's no point in trying the same remote twice simultaneously if it is currently /// offline... - fn try_clients(&self) -> impl Iterator<Item = Arc<Client>> + '_ { + fn try_clients(&self) -> impl Iterator<Item = TryClient> + '_ { let mut start_current = None; let state = &self.state; + + let mut unreachable_clients = Vec::new(); + let mut try_unreachable = None::<std::vec::IntoIter<_>>; + std::iter::from_fn(move || { let mut state = state.lock().unwrap(); + + if let Some(ref mut try_unreachable) = try_unreachable { + return Some(TryClient::unreachable( + state.get_at(try_unreachable.next()?), + )); + } + match start_current { None => { // first attempt, just use the current client and remember the starting index let (client, index) = state.get(); start_current = Some((index, index)); - Some(client) + Some(TryClient::reachable(client)) } Some((start, current)) => { // If our last request failed, the retry-loop asks for another client, mark the @@ -618,13 +660,24 @@ impl MultiClient { if state.tried_all_since(start) { // This iterator (and therefore this retry-loop) has tried all clients. // Give up. - return None; + try_unreachable = + Some(std::mem::take(&mut unreachable_clients).into_iter()); + return Some(TryClient::unreachable( + state.get_at(try_unreachable.as_mut()?.next()?), + )); } // finally just get the new current client and update `current` for the later // call to `failed()` - let (client, current) = state.get(); - start_current = Some((start, current)); - Some(client) + let (client, new_current) = state.get(); + start_current = Some((start, new_current)); + + // remember all the clients we skipped: + let mut at = current + 1; + while at != new_current { + unreachable_clients.push(at); + at = at.wrapping_add(1); + } + Some(TryClient::reachable(client)) } } }) @@ -647,7 +700,12 @@ macro_rules! try_request { let mut timed_out = false; // The iterator in use here will automatically mark a client as faulty if we move on to // the `next()` one. - for client in $self.try_clients() { + for TryClient { + client, + hostname, + reachable, + } in $self.try_clients() + { if let Some(err) = last_err.take() { log::error!("API client error, trying another remote - {err:?}"); } @@ -661,7 +719,17 @@ macro_rules! try_request { Ok(Err(proxmox_client::Error::Client(err))) => { last_err = Some(err); } - Ok(result) => return result, + Ok(result) => { + if !reachable { + log::error!("marking {hostname:?} as reachable again!"); + if let Ok(mut cache) = crate::remote_cache::RemoteMappingCache::write() + { + cache.mark_host_reachable(&$self.remote, &hostname, true); + let _ = cache.save(); + } + } + return result; + } Err(_) => { timed_out = true; } -- 2.39.5 _______________________________________________ pdm-devel mailing list pdm-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pdm-devel