|
| 1 | +use alloy::providers::{Provider, ProviderBuilder}; |
| 2 | +use serde::{Deserialize, Serialize}; |
| 3 | +use thiserror::Error; |
| 4 | +use tracing::{error, warn}; |
| 5 | +use url::Url; |
| 6 | + |
| 7 | +type L1EndpointMonitorResult<T> = Result<T, L1EndpointMonitorError>; |
| 8 | + |
| 9 | +#[derive(Debug, Clone)] |
| 10 | +pub struct L1EndpointMonitor { |
| 11 | + pub current_l1_endpoint_index: usize, |
| 12 | + pub config: L1EndpointMonitorConfig, |
| 13 | +} |
| 14 | + |
| 15 | +impl L1EndpointMonitor { |
| 16 | + /// Returns a functional L1 endpoint, or fails if all configured endpoints are non-operational. |
| 17 | + /// The method cycles through the configured endpoints, starting from the currently selected one |
| 18 | + /// and returns the first one that is operational. |
| 19 | + pub async fn get_active_l1_endpoint(&mut self) -> L1EndpointMonitorResult<Url> { |
| 20 | + let current_l1_endpoint_index = self.current_l1_endpoint_index; |
| 21 | + // This check can be done async, instead of blocking the user, but this requires an |
| 22 | + // additional "active" component or async task in our infra. |
| 23 | + if self.is_operational(current_l1_endpoint_index).await { |
| 24 | + return Ok(self.get_node_url(current_l1_endpoint_index).clone()); |
| 25 | + } |
| 26 | + |
| 27 | + let n_urls = self.config.ordered_l1_endpoint_urls.len(); |
| 28 | + for offset in 1..n_urls { |
| 29 | + let idx = (current_l1_endpoint_index + offset) % n_urls; |
| 30 | + if self.is_operational(idx).await { |
| 31 | + warn!( |
| 32 | + "L1 endpoint {} down; switched to {}", |
| 33 | + self.get_node_url(current_l1_endpoint_index), |
| 34 | + self.get_node_url(idx) |
| 35 | + ); |
| 36 | + |
| 37 | + self.current_l1_endpoint_index = idx; |
| 38 | + return Ok(self.get_node_url(idx).clone()); |
| 39 | + } |
| 40 | + } |
| 41 | + |
| 42 | + error!("No operational L1 endpoints found in {:?}", self.config.ordered_l1_endpoint_urls); |
| 43 | + Err(L1EndpointMonitorError::NoActiveL1Endpoint) |
| 44 | + } |
| 45 | + |
| 46 | + fn get_node_url(&self, index: usize) -> &Url { |
| 47 | + &self.config.ordered_l1_endpoint_urls[index] |
| 48 | + } |
| 49 | + |
| 50 | + async fn is_operational(&self, l1_endpoint_index: usize) -> bool { |
| 51 | + let l1_endpoint_url = self.get_node_url(l1_endpoint_index); |
| 52 | + let l1_client = ProviderBuilder::new().on_http(l1_endpoint_url.clone()); |
| 53 | + // Is this fast enough? we can use something to just check connectivity, but a recent infura |
| 54 | + // bug failed on this API even though connectivity was fine. Besides, this API is called for |
| 55 | + // most of our operations anyway. |
| 56 | + l1_client.get_block_number().await.is_ok() |
| 57 | + } |
| 58 | +} |
| 59 | + |
| 60 | +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] |
| 61 | +pub struct L1EndpointMonitorConfig { |
| 62 | + pub ordered_l1_endpoint_urls: Vec<Url>, |
| 63 | +} |
| 64 | + |
| 65 | +#[derive(Debug, Clone, Error, Serialize, Deserialize, PartialEq, Eq)] |
| 66 | +pub enum L1EndpointMonitorError { |
| 67 | + #[error("All L1 endpoints are non-operational")] |
| 68 | + NoActiveL1Endpoint, |
| 69 | +} |
0 commit comments