Skip to content

Commit 1669365

Browse files
author
Gilad Chase
committed
chore(apollo_l1_endpoint_monitor): implement the core of l1 endpoint monitor
Constructors and other boilerplate stuff will come later.
1 parent 6cb02b4 commit 1669365

File tree

4 files changed

+89
-1
lines changed

4 files changed

+89
-1
lines changed

Cargo.lock

+11
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/apollo_l1_endpoint_monitor/Cargo.toml

+9
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,15 @@ repository.workspace = true
66
license.workspace = true
77

88
[dependencies]
9+
alloy.workspace = true
10+
apollo_config.workspace = true
11+
apollo_infra.workspace = true
12+
async-trait.workspace = true
13+
serde.workspace = true
14+
thiserror.workspace = true
15+
tokio.workspace = true
16+
tracing.workspace = true
17+
url = { workspace = true, features = ["serde"] }
918

1019
[dev-dependencies]
1120

Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1+
pub mod monitor;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
use alloy::providers::{Provider, ProviderBuilder};
2+
use serde::{Deserialize, Serialize};
3+
use thiserror::Error;
4+
use tracing::{error, warn};
5+
use url::Url;
6+
7+
type L1EndpointMonitorResult<T> = Result<T, L1EndpointMonitorError>;
8+
#[derive(Debug, Clone)]
9+
pub struct L1EndpointMonitor {
10+
pub current_l1_endpoint_index: usize,
11+
pub config: L1EndpointMonitorConfig,
12+
}
13+
14+
impl L1EndpointMonitor {
15+
/// Returns a functional L1 endpoint, or fails if all configured endpoints are non-operational.
16+
/// The method cycles through the configured endpoints, starting from the currently selected one
17+
/// and returns the first one that is operational.
18+
pub async fn get_active_l1_endpoint(&mut self) -> L1EndpointMonitorResult<Url> {
19+
let current_l1_endpoint_index = self.current_l1_endpoint_index;
20+
// This check can be done async, instead of blocking the user, but this requires an
21+
// additional "active" component or async task in our infra.
22+
if self.is_operational(current_l1_endpoint_index).await {
23+
return Ok(self.get_node_url(current_l1_endpoint_index).clone());
24+
}
25+
26+
let n_urls = self.config.ordered_l1_endpoint_urls.len();
27+
for offset in 1..n_urls {
28+
let idx = (current_l1_endpoint_index + offset) % n_urls;
29+
if self.is_operational(idx).await {
30+
warn!(
31+
"L1 endpoint {} down; switched to {}",
32+
self.get_node_url(current_l1_endpoint_index),
33+
self.get_node_url(idx)
34+
);
35+
36+
self.current_l1_endpoint_index = idx;
37+
return Ok(self.get_node_url(idx).clone());
38+
}
39+
}
40+
41+
error!("No operational L1 endpoints found in {:?}", self.config.ordered_l1_endpoint_urls);
42+
Err(L1EndpointMonitorError::NoActiveL1Endpoint)
43+
}
44+
45+
fn get_node_url(&self, index: usize) -> &Url {
46+
&self.config.ordered_l1_endpoint_urls[index]
47+
}
48+
49+
async fn is_operational(&self, l1_endpoint_index: usize) -> bool {
50+
let l1_endpoint_url = self.get_node_url(l1_endpoint_index);
51+
let l1_client = ProviderBuilder::new().on_http(l1_endpoint_url.clone());
52+
// Is this fast enough? we can use something to just check connectivity, but a recent infura
53+
// bug failed on this API even though connectivity was fine. Besides, this API is called for
54+
// most of our operations anyway.
55+
l1_client.get_block_number().await.is_ok()
56+
}
57+
}
58+
59+
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
60+
pub struct L1EndpointMonitorConfig {
61+
pub ordered_l1_endpoint_urls: Vec<Url>,
62+
}
63+
64+
#[derive(Debug, Clone, Error, Serialize, Deserialize, PartialEq, Eq)]
65+
pub enum L1EndpointMonitorError {
66+
#[error("All L1 endpoints are non-operational")]
67+
NoActiveL1Endpoint,
68+
}

0 commit comments

Comments
 (0)