diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 77d631a7da..28b8ec29e3 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -309,7 +309,9 @@ jobs: run: | make - name: Install lcli - if: env.SELF_HOSTED_RUNNERS == 'false' + # TODO: uncomment after the version of lcli in https://github.com/sigp/lighthouse/pull/5137 + # is installed on the runners + # if: env.SELF_HOSTED_RUNNERS == 'false' run: make install-lcli - name: Run the doppelganger protection failure test script run: | diff --git a/beacon_node/client/src/builder.rs b/beacon_node/client/src/builder.rs index 444a277509..de6e08cc37 100644 --- a/beacon_node/client/src/builder.rs +++ b/beacon_node/client/src/builder.rs @@ -505,6 +505,7 @@ where network_senders: None, network_globals: None, beacon_processor_send: None, + beacon_processor_reprocess_send: None, eth1_service: Some(genesis_service.eth1_service.clone()), log: context.log().clone(), sse_logging_components: runtime_context.sse_logging_components.clone(), @@ -747,6 +748,9 @@ where network_globals: self.network_globals.clone(), eth1_service: self.eth1_service.clone(), beacon_processor_send: Some(beacon_processor_channels.beacon_processor_tx.clone()), + beacon_processor_reprocess_send: Some( + beacon_processor_channels.work_reprocessing_tx.clone(), + ), sse_logging_components: runtime_context.sse_logging_components.clone(), log: log.clone(), }); diff --git a/beacon_node/http_api/src/lib.rs b/beacon_node/http_api/src/lib.rs index ec3fd494d4..b39450d735 100644 --- a/beacon_node/http_api/src/lib.rs +++ b/beacon_node/http_api/src/lib.rs @@ -16,6 +16,7 @@ mod database; mod metrics; mod produce_block; mod proposer_duties; +mod publish_attestations; mod publish_blocks; mod standard_block_rewards; mod state_id; @@ -35,7 +36,7 @@ use beacon_chain::{ validator_monitor::timestamp_now, AttestationError as AttnError, BeaconChain, BeaconChainError, BeaconChainTypes, WhenSlotSkipped, }; -use beacon_processor::BeaconProcessorSend; +use beacon_processor::{work_reprocessing_queue::ReprocessQueueMessage, BeaconProcessorSend}; pub use block_id::BlockId; use builder_states::get_next_withdrawals; use bytes::Bytes; @@ -129,6 +130,7 @@ pub struct Context { pub network_senders: Option>, pub network_globals: Option>>, pub beacon_processor_send: Option>, + pub beacon_processor_reprocess_send: Option>, pub eth1_service: Option, pub sse_logging_components: Option, pub log: Logger, @@ -534,6 +536,11 @@ pub fn serve( .filter(|_| config.enable_beacon_processor); let task_spawner_filter = warp::any().map(move || TaskSpawner::new(beacon_processor_send.clone())); + let beacon_processor_reprocess_send = ctx + .beacon_processor_reprocess_send + .clone() + .filter(|_| config.enable_beacon_processor); + let reprocess_send_filter = warp::any().map(move || beacon_processor_reprocess_send.clone()); let duplicate_block_status_code = ctx.config.duplicate_block_status_code; @@ -1756,140 +1763,26 @@ pub fn serve( .and(warp::path::end()) .and(warp_utils::json::json()) .and(network_tx_filter.clone()) + .and(reprocess_send_filter) .and(log_filter.clone()) .then( |task_spawner: TaskSpawner, chain: Arc>, attestations: Vec>, network_tx: UnboundedSender>, - log: Logger| { - task_spawner.blocking_json_task(Priority::P0, move || { - let seen_timestamp = timestamp_now(); - let mut failures = Vec::new(); - let mut num_already_known = 0; - - for (index, attestation) in attestations.as_slice().iter().enumerate() { - let attestation = match chain - .verify_unaggregated_attestation_for_gossip(attestation, None) - { - Ok(attestation) => attestation, - Err(AttnError::PriorAttestationKnown { .. }) => { - num_already_known += 1; - - // Skip to the next attestation since an attestation for this - // validator is already known in this epoch. - // - // There's little value for the network in validating a second - // attestation for another validator since it is either: - // - // 1. A duplicate. - // 2. Slashable. - // 3. Invalid. - // - // We are likely to get duplicates in the case where a VC is using - // fallback BNs. If the first BN actually publishes some/all of a - // batch of attestations but fails to respond in a timely fashion, - // the VC is likely to try publishing the attestations on another - // BN. That second BN may have already seen the attestations from - // the first BN and therefore indicate that the attestations are - // "already seen". An attestation that has already been seen has - // been published on the network so there's no actual error from - // the perspective of the user. - // - // It's better to prevent slashable attestations from ever - // appearing on the network than trying to slash validators, - // especially those validators connected to the local API. - // - // There might be *some* value in determining that this attestation - // is invalid, but since a valid attestation already it exists it - // appears that this validator is capable of producing valid - // attestations and there's no immediate cause for concern. - continue; - } - Err(e) => { - error!(log, - "Failure verifying attestation for gossip"; - "error" => ?e, - "request_index" => index, - "committee_index" => attestation.data.index, - "attestation_slot" => attestation.data.slot, - ); - failures.push(api_types::Failure::new( - index, - format!("Verification: {:?}", e), - )); - // skip to the next attestation so we do not publish this one to gossip - continue; - } - }; - - // Notify the validator monitor. - chain - .validator_monitor - .read() - .register_api_unaggregated_attestation( - seen_timestamp, - attestation.indexed_attestation(), - &chain.slot_clock, - ); - - publish_pubsub_message( - &network_tx, - PubsubMessage::Attestation(Box::new(( - attestation.subnet_id(), - attestation.attestation().clone(), - ))), - )?; - - let committee_index = attestation.attestation().data.index; - let slot = attestation.attestation().data.slot; - - if let Err(e) = chain.apply_attestation_to_fork_choice(&attestation) { - error!(log, - "Failure applying verified attestation to fork choice"; - "error" => ?e, - "request_index" => index, - "committee_index" => committee_index, - "slot" => slot, - ); - failures.push(api_types::Failure::new( - index, - format!("Fork choice: {:?}", e), - )); - }; - - if let Err(e) = chain.add_to_naive_aggregation_pool(&attestation) { - error!(log, - "Failure adding verified attestation to the naive aggregation pool"; - "error" => ?e, - "request_index" => index, - "committee_index" => committee_index, - "slot" => slot, - ); - failures.push(api_types::Failure::new( - index, - format!("Naive aggregation pool: {:?}", e), - )); - } - } - - if num_already_known > 0 { - debug!( - log, - "Some unagg attestations already known"; - "count" => num_already_known - ); - } - - if failures.is_empty() { - Ok(()) - } else { - Err(warp_utils::reject::indexed_bad_request( - "error processing attestations".to_string(), - failures, - )) - } - }) + reprocess_tx: Option>, + log: Logger| async move { + let result = crate::publish_attestations::publish_attestations( + task_spawner, + chain, + attestations, + network_tx, + reprocess_tx, + log, + ) + .await + .map(|()| warp::reply::json(&())); + task_spawner::convert_rejection(result).await }, ); diff --git a/beacon_node/http_api/src/publish_attestations.rs b/beacon_node/http_api/src/publish_attestations.rs new file mode 100644 index 0000000000..ed7f1ed17c --- /dev/null +++ b/beacon_node/http_api/src/publish_attestations.rs @@ -0,0 +1,319 @@ +//! Import attestations and publish them to the network. +//! +//! This module gracefully handles attestations to unknown blocks by requeuing them and then +//! efficiently waiting for them to finish reprocessing (using an async yield). +//! +//! The following comments relate to the handling of duplicate attestations (relocated here during +//! refactoring): +//! +//! Skip to the next attestation since an attestation for this +//! validator is already known in this epoch. +//! +//! There's little value for the network in validating a second +//! attestation for another validator since it is either: +//! +//! 1. A duplicate. +//! 2. Slashable. +//! 3. Invalid. +//! +//! We are likely to get duplicates in the case where a VC is using +//! fallback BNs. If the first BN actually publishes some/all of a +//! batch of attestations but fails to respond in a timely fashion, +//! the VC is likely to try publishing the attestations on another +//! BN. That second BN may have already seen the attestations from +//! the first BN and therefore indicate that the attestations are +//! "already seen". An attestation that has already been seen has +//! been published on the network so there's no actual error from +//! the perspective of the user. +//! +//! It's better to prevent slashable attestations from ever +//! appearing on the network than trying to slash validators, +//! especially those validators connected to the local API. +//! +//! There might be *some* value in determining that this attestation +//! is invalid, but since a valid attestation already it exists it +//! appears that this validator is capable of producing valid +//! attestations and there's no immediate cause for concern. +use crate::task_spawner::{Priority, TaskSpawner}; +use beacon_chain::{ + validator_monitor::timestamp_now, AttestationError, BeaconChain, BeaconChainError, + BeaconChainTypes, +}; +use beacon_processor::work_reprocessing_queue::{QueuedUnaggregate, ReprocessQueueMessage}; +use eth2::types::Failure; +use lighthouse_network::PubsubMessage; +use network::NetworkMessage; +use slog::{debug, error, warn, Logger}; +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::{ + mpsc::{Sender, UnboundedSender}, + oneshot, +}; +use types::Attestation; + +// Error variants are only used in `Debug` and considered `dead_code` by the compiler. +#[derive(Debug)] +enum Error { + Validation(AttestationError), + Publication, + ForkChoice(#[allow(dead_code)] BeaconChainError), + AggregationPool(#[allow(dead_code)] AttestationError), + ReprocessDisabled, + ReprocessFull, + ReprocessTimeout, +} + +enum PublishAttestationResult { + Success, + AlreadyKnown, + Reprocessing(oneshot::Receiver>), + Failure(Error), +} + +fn verify_and_publish_attestation( + chain: &Arc>, + attestation: &Attestation, + seen_timestamp: Duration, + network_tx: &UnboundedSender>, + log: &Logger, +) -> Result<(), Error> { + let attestation = chain + .verify_unaggregated_attestation_for_gossip(attestation, None) + .map_err(Error::Validation)?; + + // Publish. + network_tx + .send(NetworkMessage::Publish { + messages: vec![PubsubMessage::Attestation(Box::new(( + attestation.subnet_id(), + attestation.attestation().clone(), + )))], + }) + .map_err(|_| Error::Publication)?; + + // Notify the validator monitor. + chain + .validator_monitor + .read() + .register_api_unaggregated_attestation( + seen_timestamp, + attestation.indexed_attestation(), + &chain.slot_clock, + ); + + let fc_result = chain.apply_attestation_to_fork_choice(&attestation); + let naive_aggregation_result = chain.add_to_naive_aggregation_pool(&attestation); + + if let Err(e) = &fc_result { + warn!( + log, + "Attestation invalid for fork choice"; + "err" => ?e, + ); + } + if let Err(e) = &naive_aggregation_result { + warn!( + log, + "Attestation invalid for aggregation"; + "err" => ?e + ); + } + + if let Err(e) = fc_result { + Err(Error::ForkChoice(e)) + } else if let Err(e) = naive_aggregation_result { + Err(Error::AggregationPool(e)) + } else { + Ok(()) + } +} + +pub async fn publish_attestations( + task_spawner: TaskSpawner, + chain: Arc>, + attestations: Vec>, + network_tx: UnboundedSender>, + reprocess_send: Option>, + log: Logger, +) -> Result<(), warp::Rejection> { + // Collect metadata about attestations which we'll use to report failures. We need to + // move the `attestations` vec into the blocking task, so this small overhead is unavoidable. + let attestation_metadata = attestations + .iter() + .map(|att| (att.data.slot, att.data.index)) + .collect::>(); + + // Gossip validate and publish attestations that can be immediately processed. + let seen_timestamp = timestamp_now(); + let inner_log = log.clone(); + let mut prelim_results = task_spawner + .blocking_task(Priority::P0, move || { + Ok(attestations + .into_iter() + .map(|attestation| { + match verify_and_publish_attestation( + &chain, + &attestation, + seen_timestamp, + &network_tx, + &inner_log, + ) { + Ok(()) => PublishAttestationResult::Success, + Err(Error::Validation(AttestationError::UnknownHeadBlock { + beacon_block_root, + })) => { + let Some(reprocess_tx) = &reprocess_send else { + return PublishAttestationResult::Failure(Error::ReprocessDisabled); + }; + // Re-process. + let (tx, rx) = oneshot::channel(); + let reprocess_chain = chain.clone(); + let reprocess_network_tx = network_tx.clone(); + let reprocess_log = inner_log.clone(); + let reprocess_fn = move || { + let result = verify_and_publish_attestation( + &reprocess_chain, + &attestation, + seen_timestamp, + &reprocess_network_tx, + &reprocess_log, + ); + // Ignore failure on the oneshot that reports the result. This + // shouldn't happen unless some catastrophe befalls the waiting + // thread which causes it to drop. + let _ = tx.send(result); + }; + let reprocess_msg = + ReprocessQueueMessage::UnknownBlockUnaggregate(QueuedUnaggregate { + beacon_block_root, + process_fn: Box::new(reprocess_fn), + }); + if reprocess_tx.try_send(reprocess_msg).is_err() { + PublishAttestationResult::Failure(Error::ReprocessFull) + } else { + PublishAttestationResult::Reprocessing(rx) + } + } + Err(Error::Validation(AttestationError::PriorAttestationKnown { + .. + })) => PublishAttestationResult::AlreadyKnown, + Err(e) => PublishAttestationResult::Failure(e), + } + }) + .map(Some) + .collect::>()) + }) + .await?; + + // Asynchronously wait for re-processing of attestations to unknown blocks. This avoids blocking + // any of the beacon processor workers while we wait for reprocessing. + let (reprocess_indices, reprocess_futures): (Vec<_>, Vec<_>) = prelim_results + .iter_mut() + .enumerate() + .filter_map(|(i, opt_result)| { + if let Some(PublishAttestationResult::Reprocessing(..)) = &opt_result { + let PublishAttestationResult::Reprocessing(rx) = opt_result.take()? else { + // Unreachable. + return None; + }; + Some((i, rx)) + } else { + None + } + }) + .unzip(); + let reprocess_results = futures::future::join_all(reprocess_futures).await; + + // Join everything back together and construct a response. + // This part should be quick so we just stay in the Tokio executor's async task. + for (i, reprocess_result) in reprocess_indices.into_iter().zip(reprocess_results) { + let Some(result_entry) = prelim_results.get_mut(i) else { + error!( + log, + "Unreachable case in attestation publishing"; + "case" => "prelim out of bounds", + "request_index" => i, + ); + continue; + }; + *result_entry = Some(match reprocess_result { + Ok(Ok(())) => PublishAttestationResult::Success, + // Attestation failed processing on re-process. + Ok(Err(Error::Validation(AttestationError::PriorAttestationKnown { .. }))) => { + PublishAttestationResult::AlreadyKnown + } + Ok(Err(e)) => PublishAttestationResult::Failure(e), + // Oneshot was dropped, indicating that the attestation either timed out in the + // reprocess queue or was dropped due to some error. + Err(_) => PublishAttestationResult::Failure(Error::ReprocessTimeout), + }); + } + + // Construct the response. + let mut failures = vec![]; + let mut num_already_known = 0; + + for (index, result) in prelim_results.iter().enumerate() { + match result { + Some(PublishAttestationResult::Success) => {} + Some(PublishAttestationResult::AlreadyKnown) => num_already_known += 1, + Some(PublishAttestationResult::Failure(e)) => { + if let Some((slot, committee_index)) = attestation_metadata.get(index) { + error!( + log, + "Failure verifying attestation for gossip"; + "error" => ?e, + "request_index" => index, + "committee_index" => committee_index, + "attestation_slot" => slot, + ); + failures.push(Failure::new(index, format!("{e:?}"))); + } else { + error!( + log, + "Unreachable case in attestation publishing"; + "case" => "out of bounds", + "request_index" => index + ); + failures.push(Failure::new(index, "metadata logic error".into())); + } + } + Some(PublishAttestationResult::Reprocessing(_)) => { + error!( + log, + "Unreachable case in attestation publishing"; + "case" => "reprocessing", + "request_index" => index + ); + failures.push(Failure::new(index, "reprocess logic error".into())); + } + None => { + error!( + log, + "Unreachable case in attestation publishing"; + "case" => "result is None", + "request_index" => index + ); + failures.push(Failure::new(index, "result logic error".into())); + } + } + } + + if num_already_known > 0 { + debug!( + log, + "Some unagg attestations already known"; + "count" => num_already_known + ); + } + + if failures.is_empty() { + Ok(()) + } else { + Err(warp_utils::reject::indexed_bad_request( + "error processing attestations".to_string(), + failures, + )) + } +} diff --git a/beacon_node/http_api/src/task_spawner.rs b/beacon_node/http_api/src/task_spawner.rs index 8768e057da..cfee5e01ca 100644 --- a/beacon_node/http_api/src/task_spawner.rs +++ b/beacon_node/http_api/src/task_spawner.rs @@ -60,11 +60,15 @@ impl TaskSpawner { } } - /// Executes a "blocking" (non-async) task which returns a `Response`. - pub async fn blocking_response_task(self, priority: Priority, func: F) -> Response + /// Executes a "blocking" (non-async) task which returns an arbitrary value. + pub async fn blocking_task( + self, + priority: Priority, + func: F, + ) -> Result where F: FnOnce() -> Result + Send + Sync + 'static, - T: Reply + Send + 'static, + T: Send + 'static, { if let Some(beacon_processor_send) = &self.beacon_processor_send { // Create a closure that will execute `func` and send the result to @@ -79,22 +83,31 @@ impl TaskSpawner { }; // Send the function to the beacon processor for execution at some arbitrary time. - let result = send_to_beacon_processor( + send_to_beacon_processor( beacon_processor_send, priority, BlockingOrAsync::Blocking(Box::new(process_fn)), rx, ) .await - .and_then(|x| x); - convert_rejection(result).await + .and_then(|x| x) } else { // There is no beacon processor so spawn a task directly on the // tokio executor. - convert_rejection(warp_utils::task::blocking_response_task(func).await).await + warp_utils::task::blocking_task(func).await } } + /// Executes a "blocking" (non-async) task which returns a `Response`. + pub async fn blocking_response_task(self, priority: Priority, func: F) -> Response + where + F: FnOnce() -> Result + Send + Sync + 'static, + T: Reply + Send + 'static, + { + let result = self.blocking_task(priority, func).await; + convert_rejection(result).await + } + /// Executes a "blocking" (non-async) task which returns a JSON-serializable /// object. pub async fn blocking_json_task(self, priority: Priority, func: F) -> Response diff --git a/beacon_node/http_api/src/test_utils.rs b/beacon_node/http_api/src/test_utils.rs index b87fdf6088..c1313168bc 100644 --- a/beacon_node/http_api/src/test_utils.rs +++ b/beacon_node/http_api/src/test_utils.rs @@ -35,6 +35,7 @@ pub const EXTERNAL_ADDR: &str = "/ip4/0.0.0.0/tcp/9000"; /// HTTP API tester that allows interaction with the underlying beacon chain harness. pub struct InteractiveTester { + pub ctx: Arc>>, pub harness: BeaconChainHarness>, pub client: BeaconNodeHttpClient, pub network_rx: NetworkReceivers, @@ -43,10 +44,11 @@ pub struct InteractiveTester { /// The result of calling `create_api_server`. /// /// Glue-type between `tests::ApiTester` and `InteractiveTester`. -pub struct ApiServer> { +pub struct ApiServer> { + pub ctx: Arc>, pub server: SFut, pub listening_socket: SocketAddr, - pub network_rx: NetworkReceivers, + pub network_rx: NetworkReceivers, pub local_enr: Enr, pub external_peer_id: PeerId, } @@ -90,6 +92,7 @@ impl InteractiveTester { let harness = harness_builder.build(); let ApiServer { + ctx, server, listening_socket, network_rx, @@ -114,6 +117,7 @@ impl InteractiveTester { ); Self { + ctx, harness, client, network_rx, @@ -125,7 +129,7 @@ pub async fn create_api_server( chain: Arc>, test_runtime: &TestRuntime, log: Logger, -) -> ApiServer> { +) -> ApiServer> { // Use port 0 to allocate a new unused port. let port = 0; @@ -187,6 +191,7 @@ pub async fn create_api_server( } = BeaconProcessorChannels::new(&beacon_processor_config); let beacon_processor_send = beacon_processor_tx; + let reprocess_send = work_reprocessing_tx.clone(); BeaconProcessor { network_globals: network_globals.clone(), executor: test_runtime.task_executor.clone(), @@ -216,14 +221,17 @@ pub async fn create_api_server( network_senders: Some(network_senders), network_globals: Some(network_globals), beacon_processor_send: Some(beacon_processor_send), + beacon_processor_reprocess_send: Some(reprocess_send), eth1_service: Some(eth1_service), sse_logging_components: None, log, }); - let (listening_socket, server) = crate::serve(ctx, test_runtime.task_executor.exit()).unwrap(); + let (listening_socket, server) = + crate::serve(ctx.clone(), test_runtime.task_executor.exit()).unwrap(); ApiServer { + ctx, server, listening_socket, network_rx: network_receivers, diff --git a/beacon_node/http_api/tests/interactive_tests.rs b/beacon_node/http_api/tests/interactive_tests.rs index 6fb197b41a..d63d04fcec 100644 --- a/beacon_node/http_api/tests/interactive_tests.rs +++ b/beacon_node/http_api/tests/interactive_tests.rs @@ -4,6 +4,7 @@ use beacon_chain::{ test_utils::{AttestationStrategy, BlockStrategy, SyncCommitteeStrategy}, ChainConfig, }; +use beacon_processor::work_reprocessing_queue::ReprocessQueueMessage; use eth2::types::ProduceBlockV3Response; use eth2::types::{DepositContractData, StateId}; use execution_layer::{ForkchoiceState, PayloadAttributes}; @@ -840,3 +841,78 @@ pub async fn fork_choice_before_proposal() { // D's parent is B. assert_eq!(block_d.parent_root(), block_root_b.into()); } + +// Test that attestations to unknown blocks are requeued and processed when their block arrives. +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn queue_attestations_from_http() { + let validator_count = 128; + let all_validators = (0..validator_count).collect::>(); + + let tester = InteractiveTester::::new(None, validator_count).await; + let harness = &tester.harness; + let client = tester.client.clone(); + + let num_initial = 5; + + // Slot of the block attested to. + let attestation_slot = Slot::new(num_initial) + 1; + + // Make some initial blocks. + harness.advance_slot(); + harness + .extend_chain( + num_initial as usize, + BlockStrategy::OnCanonicalHead, + AttestationStrategy::AllValidators, + ) + .await; + + harness.advance_slot(); + assert_eq!(harness.get_current_slot(), attestation_slot); + + // Make the attested-to block without applying it. + let pre_state = harness.get_current_state(); + let (block, post_state) = harness.make_block(pre_state, attestation_slot).await; + let block_root = block.0.canonical_root(); + + // Make attestations to the block and POST them to the beacon node on a background thread. + let attestations = harness + .make_unaggregated_attestations( + &all_validators, + &post_state, + block.0.state_root(), + block_root.into(), + attestation_slot, + ) + .into_iter() + .flat_map(|attestations| attestations.into_iter().map(|(att, _subnet)| att)) + .collect::>(); + + let attestation_future = tokio::spawn(async move { + client + .post_beacon_pool_attestations(&attestations) + .await + .expect("attestations should be processed successfully") + }); + + // In parallel, apply the block. We need to manually notify the reprocess queue, because the + // `beacon_chain` does not know about the queue and will not update it for us. + let parent_root = block.0.parent_root(); + harness + .process_block(attestation_slot, block_root, block) + .await + .unwrap(); + tester + .ctx + .beacon_processor_reprocess_send + .as_ref() + .unwrap() + .send(ReprocessQueueMessage::BlockImported { + block_root, + parent_root, + }) + .await + .unwrap(); + + attestation_future.await.unwrap(); +} diff --git a/beacon_node/http_api/tests/tests.rs b/beacon_node/http_api/tests/tests.rs index 2d946f3092..a7ba2c1ab8 100644 --- a/beacon_node/http_api/tests/tests.rs +++ b/beacon_node/http_api/tests/tests.rs @@ -248,6 +248,7 @@ impl ApiTester { let log = null_logger().unwrap(); let ApiServer { + ctx: _, server, listening_socket, network_rx, @@ -341,6 +342,7 @@ impl ApiTester { let log = null_logger().unwrap(); let ApiServer { + ctx: _, server, listening_socket, network_rx, diff --git a/book/src/help_vc.md b/book/src/help_vc.md index 0281980431..3d2519aac5 100644 --- a/book/src/help_vc.md +++ b/book/src/help_vc.md @@ -26,6 +26,9 @@ FLAGS: but is only safe if slashing protection is enabled on the remote signer and is implemented correctly. DO NOT ENABLE THIS FLAG UNLESS YOU ARE CERTAIN THAT SLASHING PROTECTION IS ENABLED ON THE REMOTE SIGNER. YOU WILL GET SLASHED IF YOU USE THIS FLAG WITHOUT ENABLING WEB3SIGNER'S SLASHING PROTECTION. + --distributed + Enables functionality required for running the validator in a distributed validator cluster. + --enable-doppelganger-protection If this flag is set, Lighthouse will delay startup for three epochs and monitor for messages on the network by any of the validators managed by this client. This will result in three (possibly four) epochs worth of diff --git a/common/eth2_network_config/built_in_network_configs/mainnet/config.yaml b/common/eth2_network_config/built_in_network_configs/mainnet/config.yaml index 019eda4324..b29ecfc6d3 100644 --- a/common/eth2_network_config/built_in_network_configs/mainnet/config.yaml +++ b/common/eth2_network_config/built_in_network_configs/mainnet/config.yaml @@ -1,9 +1,15 @@ # Mainnet config # Extends the mainnet preset -CONFIG_NAME: 'mainnet' PRESET_BASE: 'mainnet' +# Free-form short name of the network that this configuration applies to - known +# canonical network names include: +# * 'mainnet' - there can be only one +# * 'prater' - testnet +# Must match the regex: [a-z0-9\-] +CONFIG_NAME: 'mainnet' + # Transition # --------------------------------------------------------------- # Estimated on Sept 15, 2022 @@ -12,6 +18,8 @@ TERMINAL_TOTAL_DIFFICULTY: 58750000000000000000000 TERMINAL_BLOCK_HASH: 0x0000000000000000000000000000000000000000000000000000000000000000 TERMINAL_BLOCK_HASH_ACTIVATION_EPOCH: 18446744073709551615 + + # Genesis # --------------------------------------------------------------- # `2**14` (= 16,384) @@ -32,22 +40,16 @@ GENESIS_DELAY: 604800 # Altair ALTAIR_FORK_VERSION: 0x01000000 -ALTAIR_FORK_EPOCH: 74240 -# Merge +ALTAIR_FORK_EPOCH: 74240 # Oct 27, 2021, 10:56:23am UTC +# Bellatrix BELLATRIX_FORK_VERSION: 0x02000000 -BELLATRIX_FORK_EPOCH: 144896 # Sept 6, 2022, 11:34:47am UTC +BELLATRIX_FORK_EPOCH: 144896 # Sept 6, 2022, 11:34:47am UTC # Capella CAPELLA_FORK_VERSION: 0x03000000 -CAPELLA_FORK_EPOCH: 194048 # April 12, 2023, 10:27:35pm UTC +CAPELLA_FORK_EPOCH: 194048 # April 12, 2023, 10:27:35pm UTC # Deneb DENEB_FORK_VERSION: 0x04000000 -DENEB_FORK_EPOCH: 18446744073709551615 -# Sharding -SHARDING_FORK_VERSION: 0x03000000 -SHARDING_FORK_EPOCH: 18446744073709551615 - -# TBD, 2**32 is a placeholder. Merge transition approach is in active R&D. -TRANSITION_TOTAL_DIFFICULTY: 4294967296 +DENEB_FORK_EPOCH: 269568 # March 13, 2024, 01:55:35pm UTC # Time parameters @@ -74,16 +76,22 @@ INACTIVITY_SCORE_RECOVERY_RATE: 16 EJECTION_BALANCE: 16000000000 # 2**2 (= 4) MIN_PER_EPOCH_CHURN_LIMIT: 4 -# 2**3 (= 8) -MAX_PER_EPOCH_ACTIVATION_CHURN_LIMIT: 8 # 2**16 (= 65,536) CHURN_LIMIT_QUOTIENT: 65536 - +# [New in Deneb:EIP7514] 2**3 (= 8) +MAX_PER_EPOCH_ACTIVATION_CHURN_LIMIT: 8 # Fork choice # --------------------------------------------------------------- # 40% PROPOSER_SCORE_BOOST: 40 +# 20% +REORG_HEAD_WEIGHT_THRESHOLD: 20 +# 160% +REORG_PARENT_WEIGHT_THRESHOLD: 160 +# `2` epochs +REORG_MAX_EPOCHS_SINCE_FINALIZATION: 2 + # Deposit contract # --------------------------------------------------------------- @@ -92,17 +100,43 @@ DEPOSIT_CHAIN_ID: 1 DEPOSIT_NETWORK_ID: 1 DEPOSIT_CONTRACT_ADDRESS: 0x00000000219ab540356cBB839Cbe05303d7705Fa -# Network + +# Networking # --------------------------------------------------------------- -SUBNETS_PER_NODE: 2 +# `10 * 2**20` (= 10485760, 10 MiB) GOSSIP_MAX_SIZE: 10485760 +# `2**10` (= 1024) +MAX_REQUEST_BLOCKS: 1024 +# `2**8` (= 256) +EPOCHS_PER_SUBNET_SUBSCRIPTION: 256 +# `MIN_VALIDATOR_WITHDRAWABILITY_DELAY + CHURN_LIMIT_QUOTIENT // 2` (= 33024, ~5 months) MIN_EPOCHS_FOR_BLOCK_REQUESTS: 33024 +# `10 * 2**20` (=10485760, 10 MiB) MAX_CHUNK_SIZE: 10485760 +# 5s TTFB_TIMEOUT: 5 +# 10s RESP_TIMEOUT: 10 +ATTESTATION_PROPAGATION_SLOT_RANGE: 32 +# 500ms +MAXIMUM_GOSSIP_CLOCK_DISPARITY: 500 MESSAGE_DOMAIN_INVALID_SNAPPY: 0x00000000 MESSAGE_DOMAIN_VALID_SNAPPY: 0x01000000 +# 2 subnets per node +SUBNETS_PER_NODE: 2 +# 2**8 (= 64) ATTESTATION_SUBNET_COUNT: 64 ATTESTATION_SUBNET_EXTRA_BITS: 0 +# ceillog2(ATTESTATION_SUBNET_COUNT) + ATTESTATION_SUBNET_EXTRA_BITS ATTESTATION_SUBNET_PREFIX_BITS: 6 ATTESTATION_SUBNET_SHUFFLING_PREFIX_BITS: 3 + +# Deneb +# `2**7` (=128) +MAX_REQUEST_BLOCKS_DENEB: 128 +# MAX_REQUEST_BLOCKS_DENEB * MAX_BLOBS_PER_BLOCK +MAX_REQUEST_BLOB_SIDECARS: 768 +# `2**12` (= 4096 epochs, ~18 days) +MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS: 4096 +# `6` +BLOB_SIDECAR_SUBNET_COUNT: 6 diff --git a/common/eth2_network_config/src/lib.rs b/common/eth2_network_config/src/lib.rs index 565b8d7890..a76a8320aa 100644 --- a/common/eth2_network_config/src/lib.rs +++ b/common/eth2_network_config/src/lib.rs @@ -583,6 +583,8 @@ mod tests { } else { GenesisStateSource::Unknown }; + // With Deneb enabled by default we must set a trusted setup here. + let kzg_trusted_setup = get_trusted_setup_from_config(&config).unwrap(); let testnet = Eth2NetworkConfig { deposit_contract_deploy_block, @@ -593,7 +595,7 @@ mod tests { .map(Encode::as_ssz_bytes) .map(Into::into), config, - kzg_trusted_setup: None, + kzg_trusted_setup: Some(kzg_trusted_setup), }; testnet diff --git a/consensus/types/presets/mainnet/deneb.yaml b/consensus/types/presets/mainnet/deneb.yaml index 6d2fb4abde..0f56b8bdfa 100644 --- a/consensus/types/presets/mainnet/deneb.yaml +++ b/consensus/types/presets/mainnet/deneb.yaml @@ -8,5 +8,5 @@ FIELD_ELEMENTS_PER_BLOB: 4096 MAX_BLOB_COMMITMENTS_PER_BLOCK: 4096 # `uint64(6)` MAX_BLOBS_PER_BLOCK: 6 -# `floorlog2(BLOB_KZG_COMMITMENTS_GINDEX) + 1 + ceillog2(MAX_BLOB_COMMITMENTS_PER_BLOCK)` = 4 + 1 + 12 = 17 +# `floorlog2(get_generalized_index(BeaconBlockBody, 'blob_kzg_commitments')) + 1 + ceillog2(MAX_BLOB_COMMITMENTS_PER_BLOCK)` = 4 + 1 + 12 = 17 KZG_COMMITMENT_INCLUSION_PROOF_DEPTH: 17 diff --git a/consensus/types/src/chain_spec.rs b/consensus/types/src/chain_spec.rs index a182c0f98d..c32c67fa33 100644 --- a/consensus/types/src/chain_spec.rs +++ b/consensus/types/src/chain_spec.rs @@ -681,7 +681,7 @@ impl ChainSpec { * Deneb hard fork params */ deneb_fork_version: [0x04, 0x00, 0x00, 0x00], - deneb_fork_epoch: None, + deneb_fork_epoch: Some(Epoch::new(269568)), /* * Network specific diff --git a/lcli/src/new_testnet.rs b/lcli/src/new_testnet.rs index 3a0c7a9f60..47db1036d9 100644 --- a/lcli/src/new_testnet.rs +++ b/lcli/src/new_testnet.rs @@ -9,7 +9,9 @@ use ethereum_hashing::hash; use ssz::Decode; use ssz::Encode; use state_processing::process_activations; -use state_processing::upgrade::{upgrade_to_altair, upgrade_to_bellatrix}; +use state_processing::upgrade::{ + upgrade_to_altair, upgrade_to_bellatrix, upgrade_to_capella, upgrade_to_deneb, +}; use std::fs::File; use std::io::Read; use std::path::PathBuf; @@ -19,8 +21,8 @@ use types::ExecutionBlockHash; use types::{ test_utils::generate_deterministic_keypairs, Address, BeaconState, ChainSpec, Config, Epoch, Eth1Data, EthSpec, ExecutionPayloadHeader, ExecutionPayloadHeaderCapella, - ExecutionPayloadHeaderDeneb, ExecutionPayloadHeaderMerge, ExecutionPayloadHeaderRefMut, - ForkName, Hash256, Keypair, PublicKey, Validator, + ExecutionPayloadHeaderDeneb, ExecutionPayloadHeaderMerge, ForkName, Hash256, Keypair, + PublicKey, Validator, }; pub fn run(testnet_dir_path: PathBuf, matches: &ArgMatches) -> Result<(), String> { @@ -302,26 +304,47 @@ fn initialize_state_with_validators( state.fork_mut().previous_version = spec.bellatrix_fork_version; // Override latest execution payload header. - // See https://github.com/ethereum/consensus-specs/blob/v1.1.0/specs/merge/beacon-chain.md#testing - - // Currently, we only support starting from a bellatrix state - match state - .latest_execution_payload_header_mut() - .map_err(|e| format!("Failed to get execution payload header: {:?}", e))? - { - ExecutionPayloadHeaderRefMut::Merge(header_mut) => { - if let ExecutionPayloadHeader::Merge(eph) = execution_payload_header { - *header_mut = eph; - } else { - return Err("Execution payload header must be a bellatrix header".to_string()); - } - } - ExecutionPayloadHeaderRefMut::Capella(_) => { - return Err("Cannot start genesis from a capella state".to_string()) - } - ExecutionPayloadHeaderRefMut::Deneb(_) => { - return Err("Cannot start genesis from a deneb state".to_string()) - } + // See https://github.com/ethereum/consensus-specs/blob/v1.1.0/specs/bellatrix/beacon-chain.md#testing + if let ExecutionPayloadHeader::Merge(ref header) = execution_payload_header { + *state + .latest_execution_payload_header_merge_mut() + .or(Err("mismatched fork".to_string()))? = header.clone(); + } + } + + if spec + .capella_fork_epoch + .map_or(false, |fork_epoch| fork_epoch == T::genesis_epoch()) + { + upgrade_to_capella(&mut state, spec).unwrap(); + + // Remove intermediate fork from `state.fork`. + state.fork_mut().previous_version = spec.capella_fork_version; + + // Override latest execution payload header. + // See https://github.com/ethereum/consensus-specs/blob/v1.1.0/specs/bellatrix/beacon-chain.md#testing + if let ExecutionPayloadHeader::Capella(ref header) = execution_payload_header { + *state + .latest_execution_payload_header_capella_mut() + .or(Err("mismatched fork".to_string()))? = header.clone(); + } + } + + if spec + .deneb_fork_epoch + .map_or(false, |fork_epoch| fork_epoch == T::genesis_epoch()) + { + upgrade_to_deneb(&mut state, spec).unwrap(); + + // Remove intermediate fork from `state.fork`. + state.fork_mut().previous_version = spec.deneb_fork_version; + + // Override latest execution payload header. + // See https://github.com/ethereum/consensus-specs/blob/v1.1.0/specs/bellatrix/beacon-chain.md#testing + if let ExecutionPayloadHeader::Deneb(ref header) = execution_payload_header { + *state + .latest_execution_payload_header_deneb_mut() + .or(Err("mismatched fork".to_string()))? = header.clone(); } } @@ -331,5 +354,10 @@ fn initialize_state_with_validators( // Set genesis validators root for domain separation and chain versioning *state.genesis_validators_root_mut() = state.update_validators_tree_hash_cache().unwrap(); + // Sanity check for state fork matching config fork. + state + .fork_name(spec) + .map_err(|e| format!("state fork mismatch: {e:?}"))?; + Ok(state) } diff --git a/lighthouse/tests/beacon_node.rs b/lighthouse/tests/beacon_node.rs index f97f17a667..94996eb1a2 100644 --- a/lighthouse/tests/beacon_node.rs +++ b/lighthouse/tests/beacon_node.rs @@ -55,6 +55,12 @@ impl CommandLineTest { } fn run_with_zero_port(&mut self) -> CompletedTest { + // Required since Deneb was enabled on mainnet. + self.cmd.arg("--allow-insecure-genesis-sync"); + self.run_with_zero_port_and_no_genesis_sync() + } + + fn run_with_zero_port_and_no_genesis_sync(&mut self) -> CompletedTest { self.cmd.arg("-z"); self.run() } @@ -93,16 +99,16 @@ fn staking_flag() { } #[test] -fn allow_insecure_genesis_sync() { - CommandLineTest::new() - .run_with_zero_port() - .with_config(|config| { - assert_eq!(config.allow_insecure_genesis_sync, false); - }); +#[should_panic] +fn allow_insecure_genesis_sync_default() { + CommandLineTest::new().run_with_zero_port_and_no_genesis_sync(); +} +#[test] +fn allow_insecure_genesis_sync_enabled() { CommandLineTest::new() .flag("allow-insecure-genesis-sync", None) - .run_with_zero_port() + .run_with_zero_port_and_no_genesis_sync() .with_config(|config| { assert_eq!(config.allow_insecure_genesis_sync, true); }); @@ -851,6 +857,7 @@ fn network_port_flag_over_ipv4() { let port = 0; CommandLineTest::new() .flag("port", Some(port.to_string().as_str())) + .flag("allow-insecure-genesis-sync", None) .run() .with_config(|config| { assert_eq!( @@ -867,6 +874,7 @@ fn network_port_flag_over_ipv4() { let port = 9000; CommandLineTest::new() .flag("port", Some(port.to_string().as_str())) + .flag("allow-insecure-genesis-sync", None) .run() .with_config(|config| { assert_eq!( @@ -886,6 +894,7 @@ fn network_port_flag_over_ipv6() { CommandLineTest::new() .flag("listen-address", Some("::1")) .flag("port", Some(port.to_string().as_str())) + .flag("allow-insecure-genesis-sync", None) .run() .with_config(|config| { assert_eq!( @@ -903,6 +912,7 @@ fn network_port_flag_over_ipv6() { CommandLineTest::new() .flag("listen-address", Some("::1")) .flag("port", Some(port.to_string().as_str())) + .flag("allow-insecure-genesis-sync", None) .run() .with_config(|config| { assert_eq!( @@ -925,6 +935,7 @@ fn network_port_flag_over_ipv4_and_ipv6() { .flag("listen-address", Some("::1")) .flag("port", Some(port.to_string().as_str())) .flag("port6", Some(port6.to_string().as_str())) + .flag("allow-insecure-genesis-sync", None) .run() .with_config(|config| { assert_eq!( @@ -954,6 +965,7 @@ fn network_port_flag_over_ipv4_and_ipv6() { .flag("listen-address", Some("::1")) .flag("port", Some(port.to_string().as_str())) .flag("port6", Some(port6.to_string().as_str())) + .flag("allow-insecure-genesis-sync", None) .run() .with_config(|config| { assert_eq!( @@ -983,6 +995,7 @@ fn network_port_and_discovery_port_flags_over_ipv4() { CommandLineTest::new() .flag("port", Some(tcp4_port.to_string().as_str())) .flag("discovery-port", Some(disc4_port.to_string().as_str())) + .flag("allow-insecure-genesis-sync", None) .run() .with_config(|config| { assert_eq!( @@ -1003,6 +1016,7 @@ fn network_port_and_discovery_port_flags_over_ipv6() { .flag("listen-address", Some("::1")) .flag("port", Some(tcp6_port.to_string().as_str())) .flag("discovery-port", Some(disc6_port.to_string().as_str())) + .flag("allow-insecure-genesis-sync", None) .run() .with_config(|config| { assert_eq!( @@ -1028,6 +1042,7 @@ fn network_port_and_discovery_port_flags_over_ipv4_and_ipv6() { .flag("discovery-port", Some(disc4_port.to_string().as_str())) .flag("port6", Some(tcp6_port.to_string().as_str())) .flag("discovery-port6", Some(disc6_port.to_string().as_str())) + .flag("allow-insecure-genesis-sync", None) .run() .with_config(|config| { assert_eq!( @@ -1067,6 +1082,7 @@ fn network_port_discovery_quic_port_flags_over_ipv4_and_ipv6() { .flag("port6", Some(tcp6_port.to_string().as_str())) .flag("discovery-port6", Some(disc6_port.to_string().as_str())) .flag("quic-port6", Some(quic6_port.to_string().as_str())) + .flag("allow-insecure-genesis-sync", None) .run() .with_config(|config| { assert_eq!( @@ -1293,6 +1309,7 @@ fn enr_match_flag_over_ipv4() { .flag("listen-address", Some("127.0.0.2")) .flag("discovery-port", Some(udp4_port.to_string().as_str())) .flag("port", Some(tcp4_port.to_string().as_str())) + .flag("allow-insecure-genesis-sync", None) .run() .with_config(|config| { assert_eq!( @@ -1324,6 +1341,7 @@ fn enr_match_flag_over_ipv6() { .flag("listen-address", Some(ADDR)) .flag("discovery-port", Some(udp6_port.to_string().as_str())) .flag("port", Some(tcp6_port.to_string().as_str())) + .flag("allow-insecure-genesis-sync", None) .run() .with_config(|config| { assert_eq!( @@ -1364,6 +1382,7 @@ fn enr_match_flag_over_ipv4_and_ipv6() { .flag("listen-address", Some(IPV6_ADDR)) .flag("discovery-port6", Some(udp6_port.to_string().as_str())) .flag("port6", Some(tcp6_port.to_string().as_str())) + .flag("allow-insecure-genesis-sync", None) .run() .with_config(|config| { assert_eq!( @@ -1490,6 +1509,7 @@ fn http_port_flag() { .flag("http", None) .flag("http-port", Some(port1.to_string().as_str())) .flag("port", Some(port2.to_string().as_str())) + .flag("allow-insecure-genesis-sync", None) .run() .with_config(|config| assert_eq!(config.http_api.listen_port, port1)); } @@ -1647,6 +1667,7 @@ fn metrics_port_flag() { .flag("metrics", None) .flag("metrics-port", Some(port1.to_string().as_str())) .flag("port", Some(port2.to_string().as_str())) + .flag("allow-insecure-genesis-sync", None) .run() .with_config(|config| assert_eq!(config.http_metrics.listen_port, port1)); } diff --git a/scripts/local_testnet/geth.sh b/scripts/local_testnet/geth.sh index 5dc4575cf0..ab1a0ec6ee 100755 --- a/scripts/local_testnet/geth.sh +++ b/scripts/local_testnet/geth.sh @@ -50,4 +50,5 @@ exec $GETH_BINARY \ --bootnodes $EL_BOOTNODE_ENODE \ --port $network_port \ --http.port $http_port \ - --authrpc.port $auth_port + --authrpc.port $auth_port \ + 2>&1 | tee $data_dir/geth.log diff --git a/scripts/tests/genesis.json b/scripts/tests/genesis.json index 83f45f1a01..bfbc08c81e 100644 --- a/scripts/tests/genesis.json +++ b/scripts/tests/genesis.json @@ -13,7 +13,7 @@ "londonBlock": 0, "mergeForkBlock": 0, "shanghaiTime": 0, - "shardingForkTime": 0, + "cancunTime": 0, "terminalTotalDifficulty": 0, "terminalTotalDifficultyPassed": true }, diff --git a/scripts/tests/vars.env b/scripts/tests/vars.env index 98ae08f074..ffe7ac4aec 100644 --- a/scripts/tests/vars.env +++ b/scripts/tests/vars.env @@ -16,7 +16,7 @@ DEPOSIT_CONTRACT_ADDRESS=4242424242424242424242424242424242424242 GENESIS_FORK_VERSION=0x42424242 # Block hash generated from genesis.json in directory -ETH1_BLOCK_HASH=add7865f8346031c72287e2edc4a4952fd34fc0a8642403e8c1bce67f215c92b +ETH1_BLOCK_HASH=7a5c656343c3a66dcf75415958b500e8873f9dab0cd588e6cf0785b52a06dd34 VALIDATOR_COUNT=80 GENESIS_VALIDATOR_COUNT=80 @@ -41,8 +41,8 @@ CHAIN_ID=4242 # Hard fork configuration ALTAIR_FORK_EPOCH=0 BELLATRIX_FORK_EPOCH=0 -CAPELLA_FORK_EPOCH=1 -DENEB_FORK_EPOCH=18446744073709551615 +CAPELLA_FORK_EPOCH=0 +DENEB_FORK_EPOCH=0 TTD=0 @@ -62,4 +62,5 @@ PROPOSER_SCORE_BOOST=70 BN_ARGS="" # Enable doppelganger detection -VC_ARGS=" --enable-doppelganger-protection " \ No newline at end of file +VC_ARGS=" --enable-doppelganger-protection " + diff --git a/validator_client/Cargo.toml b/validator_client/Cargo.toml index 90a82b7e3b..8e587c6155 100644 --- a/validator_client/Cargo.toml +++ b/validator_client/Cargo.toml @@ -10,6 +10,7 @@ path = "src/lib.rs" [dev-dependencies] tokio = { workspace = true } +itertools = { workspace = true } [dependencies] tree_hash = { workspace = true } @@ -51,7 +52,6 @@ ring = { workspace = true } rand = { workspace = true, features = ["small_rng"] } lighthouse_metrics = { workspace = true } lazy_static = { workspace = true } -itertools = { workspace = true } monitoring_api = { workspace = true } sensitive_url = { workspace = true } task_executor = { workspace = true } diff --git a/validator_client/src/cli.rs b/validator_client/src/cli.rs index e4e5a2f130..16a265212e 100644 --- a/validator_client/src/cli.rs +++ b/validator_client/src/cli.rs @@ -145,6 +145,12 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> { future.") .takes_value(false) ) + .arg( + Arg::with_name("distributed") + .long("distributed") + .help("Enables functionality required for running the validator in a distributed validator cluster.") + .takes_value(false) + ) /* REST API related arguments */ .arg( Arg::with_name("http") diff --git a/validator_client/src/config.rs b/validator_client/src/config.rs index 7ac9e3e3bc..ae59829a3e 100644 --- a/validator_client/src/config.rs +++ b/validator_client/src/config.rs @@ -84,6 +84,8 @@ pub struct Config { pub builder_boost_factor: Option, /// If true, Lighthouse will prefer builder proposals, if available. pub prefer_builder_proposals: bool, + /// Whether we are running with distributed network support. + pub distributed: bool, pub web3_signer_keep_alive_timeout: Option, pub web3_signer_max_idle_connections: Option, } @@ -130,6 +132,7 @@ impl Default for Config { produce_block_v3: false, builder_boost_factor: None, prefer_builder_proposals: false, + distributed: false, web3_signer_keep_alive_timeout: Some(Duration::from_secs(90)), web3_signer_max_idle_connections: None, } @@ -233,6 +236,10 @@ impl Config { config.beacon_nodes_tls_certs = Some(tls_certs.split(',').map(PathBuf::from).collect()); } + if cli_args.is_present("distributed") { + config.distributed = true; + } + if cli_args.is_present("disable-run-on-all") { warn!( log, diff --git a/validator_client/src/duties_service.rs b/validator_client/src/duties_service.rs index 26747f8111..290803e257 100644 --- a/validator_client/src/duties_service.rs +++ b/validator_client/src/duties_service.rs @@ -6,7 +6,7 @@ //! The `DutiesService` is also responsible for sending events to the `BlockService` which trigger //! block production. -mod sync; +pub mod sync; use crate::beacon_node_fallback::{ApiTopic, BeaconNodeFallback, OfflineOnFailure, RequireSynced}; use crate::http_metrics::metrics::{get_int_gauge, set_int_gauge, ATTESTATION_DUTY}; @@ -42,6 +42,9 @@ const HISTORICAL_DUTIES_EPOCHS: u64 = 2; /// At start-up selection proofs will be computed with less lookahead out of necessity. const SELECTION_PROOF_SLOT_LOOKAHEAD: u64 = 8; +/// The attestation selection proof lookahead for those running with the --distributed flag. +const SELECTION_PROOF_SLOT_LOOKAHEAD_DVT: u64 = 1; + /// Fraction of a slot at which selection proof signing should happen (2 means half way). const SELECTION_PROOF_SCHEDULE_DENOM: u32 = 2; @@ -211,16 +214,21 @@ pub struct DutiesService { /// proposals for any validators which are not registered locally. pub proposers: RwLock, /// Map from validator index to sync committee duties. - pub sync_duties: SyncDutiesMap, + pub sync_duties: SyncDutiesMap, /// Provides the canonical list of locally-managed validators. pub validator_store: Arc>, /// Tracks the current slot. pub slot_clock: T, /// Provides HTTP access to remote beacon nodes. pub beacon_nodes: Arc>, - pub enable_high_validator_count_metrics: bool, + /// The runtime for spawning tasks. pub context: RuntimeContext, + /// The current chain spec. pub spec: ChainSpec, + //// Whether we permit large validator counts in the metrics. + pub enable_high_validator_count_metrics: bool, + /// If this validator is running in distributed mode. + pub distributed: bool, } impl DutiesService { @@ -997,7 +1005,13 @@ async fn fill_in_selection_proofs( continue; }; - let lookahead_slot = current_slot + SELECTION_PROOF_SLOT_LOOKAHEAD; + let selection_lookahead = if duties_service.distributed { + SELECTION_PROOF_SLOT_LOOKAHEAD_DVT + } else { + SELECTION_PROOF_SLOT_LOOKAHEAD + }; + + let lookahead_slot = current_slot + selection_lookahead; let mut relevant_duties = duties_by_slot.split_off(&lookahead_slot); std::mem::swap(&mut relevant_duties, &mut duties_by_slot); diff --git a/validator_client/src/duties_service/sync.rs b/validator_client/src/duties_service/sync.rs index de42fa587e..3618b47146 100644 --- a/validator_client/src/duties_service/sync.rs +++ b/validator_client/src/duties_service/sync.rs @@ -7,18 +7,18 @@ use crate::{ }; use futures::future::join_all; -use itertools::Itertools; use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard, RwLockWriteGuard}; use slog::{crit, debug, info, warn}; use slot_clock::SlotClock; use std::collections::{HashMap, HashSet}; +use std::marker::PhantomData; use std::sync::Arc; -use types::{ - ChainSpec, Epoch, EthSpec, PublicKeyBytes, Slot, SyncDuty, SyncSelectionProof, SyncSubnetId, -}; +use types::{ChainSpec, EthSpec, PublicKeyBytes, Slot, SyncDuty, SyncSelectionProof, SyncSubnetId}; -/// Number of epochs in advance to compute selection proofs. +/// Number of epochs in advance to compute selection proofs when not in `distributed` mode. pub const AGGREGATION_PRE_COMPUTE_EPOCHS: u64 = 2; +/// Number of slots in advance to compute selection proofs when in `distributed` mode. +pub const AGGREGATION_PRE_COMPUTE_SLOTS_DISTRIBUTED: u64 = 1; /// Top-level data-structure containing sync duty information. /// @@ -32,9 +32,12 @@ pub const AGGREGATION_PRE_COMPUTE_EPOCHS: u64 = 2; /// 2. One-at-a-time locking. For the innermost locks on the aggregator duties, all of the functions /// in this file take care to only lock one validator at a time. We never hold a lock while /// trying to obtain another one (hence no lock ordering issues). -pub struct SyncDutiesMap { +pub struct SyncDutiesMap { /// Map from sync committee period to duties for members of that sync committee. committees: RwLock>, + /// Whether we are in `distributed` mode and using reduced lookahead for aggregate pre-compute. + distributed: bool, + _phantom: PhantomData, } /// Duties for a single sync committee period. @@ -59,8 +62,8 @@ pub struct ValidatorDuties { /// Aggregator duties for a single validator. pub struct AggregatorDuties { - /// The epoch up to which aggregation proofs have already been computed (inclusive). - pre_compute_epoch: RwLock>, + /// The slot up to which aggregation proofs have already been computed (inclusive). + pre_compute_slot: RwLock>, /// Map from slot & subnet ID to proof that this validator is an aggregator. /// /// The slot is the slot at which the signed contribution and proof should be broadcast, @@ -82,15 +85,15 @@ pub struct SlotDuties { pub aggregators: HashMap>, } -impl Default for SyncDutiesMap { - fn default() -> Self { +impl SyncDutiesMap { + pub fn new(distributed: bool) -> Self { Self { committees: RwLock::new(HashMap::new()), + distributed, + _phantom: PhantomData, } } -} -impl SyncDutiesMap { /// Check if duties are already known for all of the given validators for `committee_period`. fn all_duties_known(&self, committee_period: u64, validator_indices: &[u64]) -> bool { self.committees @@ -104,22 +107,34 @@ impl SyncDutiesMap { }) } + /// Number of slots in advance to compute selection proofs + fn aggregation_pre_compute_slots(&self) -> u64 { + if self.distributed { + AGGREGATION_PRE_COMPUTE_SLOTS_DISTRIBUTED + } else { + E::slots_per_epoch() * AGGREGATION_PRE_COMPUTE_EPOCHS + } + } + /// Prepare for pre-computation of selection proofs for `committee_period`. /// - /// Return the epoch up to which proofs should be pre-computed, as well as a vec of - /// `(previous_pre_compute_epoch, sync_duty)` pairs for all validators which need to have proofs + /// Return the slot up to which proofs should be pre-computed, as well as a vec of + /// `(previous_pre_compute_slot, sync_duty)` pairs for all validators which need to have proofs /// computed. See `fill_in_aggregation_proofs` for the actual calculation. fn prepare_for_aggregator_pre_compute( &self, committee_period: u64, - current_epoch: Epoch, + current_slot: Slot, spec: &ChainSpec, - ) -> (Epoch, Vec<(Epoch, SyncDuty)>) { - let default_start_epoch = - std::cmp::max(current_epoch, first_epoch_of_period(committee_period, spec)); - let pre_compute_epoch = std::cmp::min( - current_epoch + AGGREGATION_PRE_COMPUTE_EPOCHS, - last_epoch_of_period(committee_period, spec), + ) -> (Slot, Vec<(Slot, SyncDuty)>) { + let default_start_slot = std::cmp::max( + current_slot, + first_slot_of_period::(committee_period, spec), + ); + let pre_compute_lookahead_slots = self.aggregation_pre_compute_slots(); + let pre_compute_slot = std::cmp::min( + current_slot + pre_compute_lookahead_slots, + last_slot_of_period::(committee_period, spec), ); let pre_compute_duties = self.committees.read().get(&committee_period).map_or_else( @@ -130,18 +145,18 @@ impl SyncDutiesMap { .values() .filter_map(|maybe_duty| { let duty = maybe_duty.as_ref()?; - let old_pre_compute_epoch = duty + let old_pre_compute_slot = duty .aggregation_duties - .pre_compute_epoch + .pre_compute_slot .write() - .replace(pre_compute_epoch); + .replace(pre_compute_slot); - match old_pre_compute_epoch { + match old_pre_compute_slot { // No proofs pre-computed previously, compute all from the start of - // the period or the current epoch (whichever is later). - None => Some((default_start_epoch, duty.duty.clone())), + // the period or the current slot (whichever is later). + None => Some((default_start_slot, duty.duty.clone())), // Proofs computed up to `prev`, start from the subsequent epoch. - Some(prev) if prev < pre_compute_epoch => { + Some(prev) if prev < pre_compute_slot => { Some((prev + 1, duty.duty.clone())) } // Proofs already known, no need to compute. @@ -151,7 +166,7 @@ impl SyncDutiesMap { .collect() }, ); - (pre_compute_epoch, pre_compute_duties) + (pre_compute_slot, pre_compute_duties) } fn get_or_create_committee_duties<'a, 'b>( @@ -176,7 +191,7 @@ impl SyncDutiesMap { /// Get duties for all validators for the given `wall_clock_slot`. /// /// This is the entry-point for the sync committee service. - pub fn get_duties_for_slot( + pub fn get_duties_for_slot( &self, wall_clock_slot: Slot, spec: &ChainSpec, @@ -253,7 +268,7 @@ impl ValidatorDuties { Self { duty, aggregation_duties: AggregatorDuties { - pre_compute_epoch: RwLock::new(None), + pre_compute_slot: RwLock::new(None), proofs: RwLock::new(HashMap::new()), }, } @@ -265,12 +280,12 @@ fn epoch_offset(spec: &ChainSpec) -> u64 { spec.epochs_per_sync_committee_period.as_u64() / 2 } -fn first_epoch_of_period(sync_committee_period: u64, spec: &ChainSpec) -> Epoch { - spec.epochs_per_sync_committee_period * sync_committee_period +fn first_slot_of_period(sync_committee_period: u64, spec: &ChainSpec) -> Slot { + (spec.epochs_per_sync_committee_period * sync_committee_period).start_slot(E::slots_per_epoch()) } -fn last_epoch_of_period(sync_committee_period: u64, spec: &ChainSpec) -> Epoch { - first_epoch_of_period(sync_committee_period + 1, spec) - 1 +fn last_slot_of_period(sync_committee_period: u64, spec: &ChainSpec) -> Slot { + first_slot_of_period::(sync_committee_period + 1, spec) - 1 } pub async fn poll_sync_committee_duties( @@ -278,11 +293,11 @@ pub async fn poll_sync_committee_duties( ) -> Result<(), Error> { let sync_duties = &duties_service.sync_duties; let spec = &duties_service.spec; - let current_epoch = duties_service + let current_slot = duties_service .slot_clock .now() - .ok_or(Error::UnableToReadSlotClock)? - .epoch(E::slots_per_epoch()); + .ok_or(Error::UnableToReadSlotClock)?; + let current_epoch = current_slot.epoch(E::slots_per_epoch()); // If the Altair fork is yet to be activated, do not attempt to poll for duties. if spec @@ -330,8 +345,8 @@ pub async fn poll_sync_committee_duties( } // Pre-compute aggregator selection proofs for the current period. - let (current_pre_compute_epoch, new_pre_compute_duties) = sync_duties - .prepare_for_aggregator_pre_compute(current_sync_committee_period, current_epoch, spec); + let (current_pre_compute_slot, new_pre_compute_duties) = sync_duties + .prepare_for_aggregator_pre_compute(current_sync_committee_period, current_slot, spec); if !new_pre_compute_duties.is_empty() { let sub_duties_service = duties_service.clone(); @@ -341,8 +356,8 @@ pub async fn poll_sync_committee_duties( sub_duties_service, &new_pre_compute_duties, current_sync_committee_period, - current_epoch, - current_pre_compute_epoch, + current_slot, + current_pre_compute_slot, ) .await }, @@ -368,11 +383,14 @@ pub async fn poll_sync_committee_duties( } // Pre-compute aggregator selection proofs for the next period. - if (current_epoch + AGGREGATION_PRE_COMPUTE_EPOCHS).sync_committee_period(spec)? + let aggregate_pre_compute_lookahead_slots = sync_duties.aggregation_pre_compute_slots(); + if (current_slot + aggregate_pre_compute_lookahead_slots) + .epoch(E::slots_per_epoch()) + .sync_committee_period(spec)? == next_sync_committee_period { - let (pre_compute_epoch, new_pre_compute_duties) = sync_duties - .prepare_for_aggregator_pre_compute(next_sync_committee_period, current_epoch, spec); + let (pre_compute_slot, new_pre_compute_duties) = sync_duties + .prepare_for_aggregator_pre_compute(next_sync_committee_period, current_slot, spec); if !new_pre_compute_duties.is_empty() { let sub_duties_service = duties_service.clone(); @@ -382,8 +400,8 @@ pub async fn poll_sync_committee_duties( sub_duties_service, &new_pre_compute_duties, next_sync_committee_period, - current_epoch, - pre_compute_epoch, + current_slot, + pre_compute_slot, ) .await }, @@ -495,10 +513,10 @@ pub async fn poll_sync_committee_duties_for_period( duties_service: Arc>, - pre_compute_duties: &[(Epoch, SyncDuty)], + pre_compute_duties: &[(Slot, SyncDuty)], sync_committee_period: u64, - current_epoch: Epoch, - pre_compute_epoch: Epoch, + current_slot: Slot, + pre_compute_slot: Slot, ) { let log = duties_service.context.log(); @@ -506,16 +524,16 @@ pub async fn fill_in_aggregation_proofs( log, "Calculating sync selection proofs"; "period" => sync_committee_period, - "current_epoch" => current_epoch, - "pre_compute_epoch" => pre_compute_epoch + "current_slot" => current_slot, + "pre_compute_slot" => pre_compute_slot ); - // Generate selection proofs for each validator at each slot, one epoch at a time. - for epoch in (current_epoch.as_u64()..=pre_compute_epoch.as_u64()).map(Epoch::new) { + // Generate selection proofs for each validator at each slot, one slot at a time. + for slot in (current_slot.as_u64()..=pre_compute_slot.as_u64()).map(Slot::new) { let mut validator_proofs = vec![]; - for (validator_start_epoch, duty) in pre_compute_duties { - // Proofs are already known at this epoch for this validator. - if epoch < *validator_start_epoch { + for (validator_start_slot, duty) in pre_compute_duties { + // Proofs are already known at this slot for this validator. + if slot < *validator_start_slot { continue; } @@ -533,67 +551,64 @@ pub async fn fill_in_aggregation_proofs( // Create futures to produce proofs. let duties_service_ref = &duties_service; - let futures = epoch - .slot_iter(E::slots_per_epoch()) - .cartesian_product(&subnet_ids) - .map(|(duty_slot, subnet_id)| async move { - // Construct proof for prior slot. - let slot = duty_slot - 1; - - let proof = match duties_service_ref - .validator_store - .produce_sync_selection_proof(&duty.pubkey, slot, *subnet_id) - .await - { - Ok(proof) => proof, - Err(ValidatorStoreError::UnknownPubkey(pubkey)) => { - // A pubkey can be missing when a validator was recently - // removed via the API. - debug!( - log, - "Missing pubkey for sync selection proof"; - "pubkey" => ?pubkey, - "pubkey" => ?duty.pubkey, - "slot" => slot, - ); - return None; - } - Err(e) => { - warn!( - log, - "Unable to sign selection proof"; - "error" => ?e, - "pubkey" => ?duty.pubkey, - "slot" => slot, - ); - return None; - } - }; - - match proof.is_aggregator::() { - Ok(true) => { - debug!( - log, - "Validator is sync aggregator"; - "validator_index" => duty.validator_index, - "slot" => slot, - "subnet_id" => %subnet_id, - ); - Some(((slot, *subnet_id), proof)) - } - Ok(false) => None, - Err(e) => { - warn!( - log, - "Error determining is_aggregator"; - "pubkey" => ?duty.pubkey, - "slot" => slot, - "error" => ?e, - ); - None - } + let futures = subnet_ids.iter().map(|subnet_id| async move { + // Construct proof for prior slot. + let proof_slot = slot - 1; + + let proof = match duties_service_ref + .validator_store + .produce_sync_selection_proof(&duty.pubkey, proof_slot, *subnet_id) + .await + { + Ok(proof) => proof, + Err(ValidatorStoreError::UnknownPubkey(pubkey)) => { + // A pubkey can be missing when a validator was recently + // removed via the API. + debug!( + log, + "Missing pubkey for sync selection proof"; + "pubkey" => ?pubkey, + "pubkey" => ?duty.pubkey, + "slot" => proof_slot, + ); + return None; + } + Err(e) => { + warn!( + log, + "Unable to sign selection proof"; + "error" => ?e, + "pubkey" => ?duty.pubkey, + "slot" => proof_slot, + ); + return None; } - }); + }; + + match proof.is_aggregator::() { + Ok(true) => { + debug!( + log, + "Validator is sync aggregator"; + "validator_index" => duty.validator_index, + "slot" => proof_slot, + "subnet_id" => %subnet_id, + ); + Some(((proof_slot, *subnet_id), proof)) + } + Ok(false) => None, + Err(e) => { + warn!( + log, + "Error determining is_aggregator"; + "pubkey" => ?duty.pubkey, + "slot" => proof_slot, + "error" => ?e, + ); + None + } + } + }); // Execute all the futures in parallel, collecting any successful results. let proofs = join_all(futures) @@ -635,7 +650,7 @@ pub async fn fill_in_aggregation_proofs( debug!( log, "Finished computing sync selection proofs"; - "epoch" => epoch, + "slot" => slot, "updated_validators" => num_validators_updated, ); } diff --git a/validator_client/src/lib.rs b/validator_client/src/lib.rs index 4828f43a0d..52de95a373 100644 --- a/validator_client/src/lib.rs +++ b/validator_client/src/lib.rs @@ -39,7 +39,7 @@ use account_utils::validator_definitions::ValidatorDefinitions; use attestation_service::{AttestationService, AttestationServiceBuilder}; use block_service::{BlockService, BlockServiceBuilder}; use clap::ArgMatches; -use duties_service::DutiesService; +use duties_service::{sync::SyncDutiesMap, DutiesService}; use environment::RuntimeContext; use eth2::{reqwest::ClientBuilder, types::Graffiti, BeaconNodeHttpClient, StatusCode, Timeouts}; use http_api::ApiSecret; @@ -451,13 +451,14 @@ impl ProductionValidatorClient { let duties_service = Arc::new(DutiesService { attesters: <_>::default(), proposers: <_>::default(), - sync_duties: <_>::default(), + sync_duties: SyncDutiesMap::new(config.distributed), slot_clock: slot_clock.clone(), beacon_nodes: beacon_nodes.clone(), validator_store: validator_store.clone(), spec: context.eth2_config.spec.clone(), context: duties_context, enable_high_validator_count_metrics: config.enable_high_validator_count_metrics, + distributed: config.distributed, }); // Update the metrics server. diff --git a/validator_client/src/sync_committee_service.rs b/validator_client/src/sync_committee_service.rs index 90b62cd3b4..f7abb3855a 100644 --- a/validator_client/src/sync_committee_service.rs +++ b/validator_client/src/sync_committee_service.rs @@ -161,7 +161,7 @@ impl SyncCommitteeService { let Some(slot_duties) = self .duties_service .sync_duties - .get_duties_for_slot::(slot, &self.duties_service.spec) + .get_duties_for_slot(slot, &self.duties_service.spec) else { debug!(log, "No duties known for slot {}", slot); return Ok(()); @@ -548,7 +548,7 @@ impl SyncCommitteeService { match self .duties_service .sync_duties - .get_duties_for_slot::(duty_slot, spec) + .get_duties_for_slot(duty_slot, spec) { Some(duties) => subscriptions.extend(subscriptions_from_sync_duties( duties.duties,