From 5069e2eb8d63fd82ece354102709204e11b2477f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 13 May 2025 11:14:00 -0700 Subject: [PATCH 1/2] [nexus] Create a longer-lived connection from Nexus -> Sled Agent --- nexus/networking/src/sled_client.rs | 38 +++++++++++++++++++++++------ nexus/src/app/sled.rs | 12 ++++++++- nexus/src/app/support_bundles.rs | 16 +++++++++++- 3 files changed, 57 insertions(+), 9 deletions(-) diff --git a/nexus/networking/src/sled_client.rs b/nexus/networking/src/sled_client.rs index 8d3b1765b4f..27410356e0b 100644 --- a/nexus/networking/src/sled_client.rs +++ b/nexus/networking/src/sled_client.rs @@ -25,29 +25,53 @@ pub fn sled_lookup<'a>( Ok(sled) } +pub fn default_reqwest_client() -> reqwest::Client { + let dur = std::time::Duration::from_secs(60); + reqwest::ClientBuilder::new() + .connect_timeout(dur) + .timeout(dur) + .build() + .expect("Failed to build reqwest Client") +} + pub async fn sled_client( datastore: &DataStore, lookup_opctx: &OpContext, sled_id: Uuid, log: &Logger, +) -> Result { + let client = default_reqwest_client(); + sled_client_ext(datastore, lookup_opctx, sled_id, log, client).await +} + +pub async fn sled_client_ext( + datastore: &DataStore, + lookup_opctx: &OpContext, + sled_id: Uuid, + log: &Logger, + client: reqwest::Client, ) -> Result { let (.., sled) = sled_lookup(datastore, lookup_opctx, sled_id)?.fetch().await?; - Ok(sled_client_from_address(sled_id, sled.address(), log)) + Ok(sled_client_from_address_ext(sled_id, sled.address(), log, client)) } pub fn sled_client_from_address( sled_id: Uuid, address: SocketAddrV6, log: &Logger, +) -> SledAgentClient { + let client = default_reqwest_client(); + sled_client_from_address_ext(sled_id, address, log, client) +} + +pub fn sled_client_from_address_ext( + sled_id: Uuid, + address: SocketAddrV6, + log: &Logger, + client: reqwest::Client, ) -> SledAgentClient { let log = log.new(o!("SledAgent" => sled_id.to_string())); - let dur = std::time::Duration::from_secs(60); - let client = reqwest::ClientBuilder::new() - .connect_timeout(dur) - .timeout(dur) - .build() - .unwrap(); SledAgentClient::new_with_client(&format!("http://{address}"), client, log) } diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 42cf16c762e..db4e4eae40f 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -147,6 +147,15 @@ impl super::Nexus { pub async fn sled_client( &self, id: &SledUuid, + ) -> Result, Error> { + let client = nexus_networking::default_reqwest_client(); + self.sled_client_ext(id, client).await + } + + pub async fn sled_client_ext( + &self, + id: &SledUuid, + client: reqwest::Client, ) -> Result, Error> { // TODO: We should consider injecting connection pooling here, // but for now, connections to sled agents are constructed @@ -155,11 +164,12 @@ impl super::Nexus { // Frankly, returning an "Arc" here without a connection pool is a // little silly; it's not actually used if each client connection exists // as a one-shot. - let client = nexus_networking::sled_client( + let client = nexus_networking::sled_client_ext( &self.db_datastore, &self.opctx_alloc, id.into_untyped_uuid(), &self.log, + client, ) .await?; Ok(Arc::new(client)) diff --git a/nexus/src/app/support_bundles.rs b/nexus/src/app/support_bundles.rs index 7b1e07e8e1d..ed60f9c97c0 100644 --- a/nexus/src/app/support_bundles.rs +++ b/nexus/src/app/support_bundles.rs @@ -90,7 +90,21 @@ impl super::Nexus { .db_datastore .zpool_get_sled_if_in_service(&opctx, bundle.zpool_id.into()) .await?; - let client = self.sled_client(&sled_id).await?; + + let short_timeout = std::time::Duration::from_secs(60); + let long_timeout = std::time::Duration::from_secs(3600); + let client = reqwest::ClientBuilder::new() + // Connecting and continuing to read from the sled agent should + // happen relatively quickly. + .connect_timeout(short_timeout) + .read_timeout(short_timeout) + // However, the bundle itself may be large. As long as we're + // continuing to make progress (see: read_timeout) we should be + // willing to keep transferring the bundle for a while longer. + .timeout(long_timeout) + .build() + .expect("Failed to build reqwest Client"); + let client = self.sled_client_ext(&sled_id, client).await?; // TODO: Use "range"? From aea1ec86d90d1bec2980b1a553f3e58e87a734a4 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 13 May 2025 15:16:43 -0700 Subject: [PATCH 2/2] Supply builder, not client --- nexus/networking/src/sled_client.rs | 12 ++++-------- nexus/src/app/sled.rs | 3 ++- nexus/src/app/support_bundles.rs | 7 +++---- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/nexus/networking/src/sled_client.rs b/nexus/networking/src/sled_client.rs index 27410356e0b..362e77a287b 100644 --- a/nexus/networking/src/sled_client.rs +++ b/nexus/networking/src/sled_client.rs @@ -25,13 +25,9 @@ pub fn sled_lookup<'a>( Ok(sled) } -pub fn default_reqwest_client() -> reqwest::Client { +pub fn default_reqwest_client_builder() -> reqwest::ClientBuilder { let dur = std::time::Duration::from_secs(60); - reqwest::ClientBuilder::new() - .connect_timeout(dur) - .timeout(dur) - .build() - .expect("Failed to build reqwest Client") + reqwest::ClientBuilder::new().connect_timeout(dur).timeout(dur) } pub async fn sled_client( @@ -40,7 +36,7 @@ pub async fn sled_client( sled_id: Uuid, log: &Logger, ) -> Result { - let client = default_reqwest_client(); + let client = default_reqwest_client_builder().build().unwrap(); sled_client_ext(datastore, lookup_opctx, sled_id, log, client).await } @@ -62,7 +58,7 @@ pub fn sled_client_from_address( address: SocketAddrV6, log: &Logger, ) -> SledAgentClient { - let client = default_reqwest_client(); + let client = default_reqwest_client_builder().build().unwrap(); sled_client_from_address_ext(sled_id, address, log, client) } diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index db4e4eae40f..715a1504081 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -148,7 +148,8 @@ impl super::Nexus { &self, id: &SledUuid, ) -> Result, Error> { - let client = nexus_networking::default_reqwest_client(); + let client = + nexus_networking::default_reqwest_client_builder().build().unwrap(); self.sled_client_ext(id, client).await } diff --git a/nexus/src/app/support_bundles.rs b/nexus/src/app/support_bundles.rs index ed60f9c97c0..5005a536f08 100644 --- a/nexus/src/app/support_bundles.rs +++ b/nexus/src/app/support_bundles.rs @@ -93,10 +93,9 @@ impl super::Nexus { let short_timeout = std::time::Duration::from_secs(60); let long_timeout = std::time::Duration::from_secs(3600); - let client = reqwest::ClientBuilder::new() - // Connecting and continuing to read from the sled agent should - // happen relatively quickly. - .connect_timeout(short_timeout) + let client = nexus_networking::default_reqwest_client_builder() + // Continuing to read from the sled agent should happen relatively + // quickly. .read_timeout(short_timeout) // However, the bundle itself may be large. As long as we're // continuing to make progress (see: read_timeout) we should be