Skip to content

Commit ea2621e

Browse files
author
Gilad Chase
committed
chore(apollo_l1_endpoint_monitor): implement the core of l1 endpoint monitor
Constructors and other boilerplate stuff will come later.
1 parent 6cb02b4 commit ea2621e

File tree

4 files changed

+82
-1
lines changed

4 files changed

+82
-1
lines changed

Cargo.lock

+7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/apollo_l1_endpoint_monitor/Cargo.toml

+5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@ repository.workspace = true
66
license.workspace = true
77

88
[dependencies]
9+
alloy.workspace = true
10+
serde.workspace = true
11+
thiserror.workspace = true
12+
tracing.workspace = true
13+
url = { workspace = true, features = ["serde"] }
914

1015
[dev-dependencies]
1116

Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1+
pub mod monitor;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
use alloy::providers::{Provider, ProviderBuilder};
2+
use serde::{Deserialize, Serialize};
3+
use thiserror::Error;
4+
use tracing::{error, warn};
5+
use url::Url;
6+
7+
type L1EndpointMonitorResult<T> = Result<T, L1EndpointMonitorError>;
8+
9+
#[derive(Debug, Clone)]
10+
pub struct L1EndpointMonitor {
11+
pub current_l1_endpoint_index: usize,
12+
pub config: L1EndpointMonitorConfig,
13+
}
14+
15+
impl L1EndpointMonitor {
16+
/// Returns a functional L1 endpoint, or fails if all configured endpoints are non-operational.
17+
/// The method cycles through the configured endpoints, starting from the currently selected one
18+
/// and returns the first one that is operational.
19+
pub async fn get_active_l1_endpoint(&mut self) -> L1EndpointMonitorResult<Url> {
20+
let current_l1_endpoint_index = self.current_l1_endpoint_index;
21+
// This check can be done async, instead of blocking the user, but this requires an
22+
// additional "active" component or async task in our infra.
23+
if self.is_operational(current_l1_endpoint_index).await {
24+
return Ok(self.get_node_url(current_l1_endpoint_index).clone());
25+
}
26+
27+
let n_urls = self.config.ordered_l1_endpoint_urls.len();
28+
for offset in 1..n_urls {
29+
let idx = (current_l1_endpoint_index + offset) % n_urls;
30+
if self.is_operational(idx).await {
31+
warn!(
32+
"L1 endpoint {} down; switched to {}",
33+
self.get_node_url(current_l1_endpoint_index),
34+
self.get_node_url(idx)
35+
);
36+
37+
self.current_l1_endpoint_index = idx;
38+
return Ok(self.get_node_url(idx).clone());
39+
}
40+
}
41+
42+
error!("No operational L1 endpoints found in {:?}", self.config.ordered_l1_endpoint_urls);
43+
Err(L1EndpointMonitorError::NoActiveL1Endpoint)
44+
}
45+
46+
fn get_node_url(&self, index: usize) -> &Url {
47+
&self.config.ordered_l1_endpoint_urls[index]
48+
}
49+
50+
async fn is_operational(&self, l1_endpoint_index: usize) -> bool {
51+
let l1_endpoint_url = self.get_node_url(l1_endpoint_index);
52+
let l1_client = ProviderBuilder::new().on_http(l1_endpoint_url.clone());
53+
// Is this fast enough? we can use something to just check connectivity, but a recent infura
54+
// bug failed on this API even though connectivity was fine. Besides, this API is called for
55+
// most of our operations anyway.
56+
l1_client.get_block_number().await.is_ok()
57+
}
58+
}
59+
60+
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
61+
pub struct L1EndpointMonitorConfig {
62+
pub ordered_l1_endpoint_urls: Vec<Url>,
63+
}
64+
65+
#[derive(Debug, Clone, Error, Serialize, Deserialize, PartialEq, Eq)]
66+
pub enum L1EndpointMonitorError {
67+
#[error("All L1 endpoints are non-operational")]
68+
NoActiveL1Endpoint,
69+
}

0 commit comments

Comments
 (0)