diff --git a/agent/flow-trace/00_INDEX.md b/agent/flow-trace/00_INDEX.md index d7299a280..f4716b3e1 100644 --- a/agent/flow-trace/00_INDEX.md +++ b/agent/flow-trace/00_INDEX.md @@ -191,3 +191,4 @@ _Found during source-code cross-referencing of these trace documents._ | 4 | **`activate` CLI command is misleading** | Low | Named "activate" but actually calls "register" — will fail for already-registered operators. There's no standalone way to trigger re-evaluation of active status; instead, `_updateOperatorStatus()` runs automatically inside `addTicketBalance()`, `bondLicense()`, etc. | | 5 | **Active-job load balancing bug fixed** | Info | The Rust `NodeStateStore.available_tickets()` subtracts `active_jobs` from total tickets, reducing the chance of busy nodes being selected for new E3s. Previously, the `Sortition` actor's `Handler` was missing match arms for `E3Failed` and `E3StageChanged`, causing these events to fall to the default `_ => ()` — the typed handlers for decrementing jobs were dead code. This has been fixed: E3Failed and E3StageChanged are now routed to their handlers, and `finalized_committees` is cleaned up in `decrement_jobs_for_e3` to prevent unbounded memory growth. | | 6 | **Committee member expulsion** | Info | `SlashingManager` can call `expelCommitteeMember()` mid-DKG. The `Sortition` actor enriches the raw `CommitteeMemberExpelled` event with the expelled member's `party_id` (resolved from its stored `Committee` list) and re-publishes it. `ThresholdKeyshare` then uses the enriched `party_id` to update its collectors, potentially completing DKG with fewer parties. `ThresholdKeyshare` itself does not hold committee state. | +| 7 | **ProofRequestActor failure bridge fixed** | Info | `ProofRequestActor` no longer leaves proof publication suppressed under log-only "will not be published" exits. `ComputeRequestError` and local proof-signing failures for DKG-path proofs (`C0` through `C5`) now emit `E3Failed { failed_at_stage: CommitteeFinalized, reason: DKGInvalidShares }`, while decryption-path proofs (`C6` and `C7`) emit `E3Failed { failed_at_stage: CiphertextReady, reason: DecryptionInvalidShares }`. | diff --git a/agent/flow-trace/04_DKG_AND_COMPUTATION.md b/agent/flow-trace/04_DKG_AND_COMPUTATION.md index f4cc411a4..a16bf1cfb 100644 --- a/agent/flow-trace/04_DKG_AND_COMPUTATION.md +++ b/agent/flow-trace/04_DKG_AND_COMPUTATION.md @@ -281,6 +281,13 @@ implements `ZkRequest::NodeDkgFold` (full per-node pipeline to a `NodeFold` proo `PublicKeyAggregator` and `ThresholdPlaintextAggregator` dispatch the aggregator requests instead of pairwise folding. +**Failure bridge:** `ProofRequestActor` now converts proof-generation worker failures +and local proof-signing failures into terminal round failures instead of only +logging that the proof-bearing artifact will not be published. DKG-path proofs +(`C0` through `C5`) emit `E3Failed { failed_at_stage: CommitteeFinalized, +reason: DKGInvalidShares }`; decryption-path proofs (`C6` and `C7`) emit +`E3Failed { failed_at_stage: CiphertextReady, reason: DecryptionInvalidShares }`. + ### Step 6: Collect All Threshold Shares (with C2/C3 Verification) ``` diff --git a/crates/zk-prover/src/actors/proof_request.rs b/crates/zk-prover/src/actors/proof_request.rs index 7895485bb..d62fd7e4f 100644 --- a/crates/zk-prover/src/actors/proof_request.rs +++ b/crates/zk-prover/src/actors/proof_request.rs @@ -685,6 +685,7 @@ impl ProofRequestActor { pending.sk_proof.expect("checked in is_complete"), ) else { error!("Failed to sign C4a SK proof — DecryptionKeyShared will not be published"); + self.fail_dkg_round(e3_id.clone(), &pending.ec, "C4a signing error"); return; }; @@ -702,6 +703,7 @@ impl ProofRequestActor { "Failed to sign C4b ESM proof [{}] — DecryptionKeyShared will not be published", idx ); + self.fail_dkg_round(e3_id.clone(), &pending.ec, "C4b signing error"); return; }; signed_esms.push(signed); @@ -802,6 +804,7 @@ impl ProofRequestActor { self.sign_proof(&e3_id, ProofType::C6ThresholdShareDecryption, proof) else { error!("Failed to sign C6 proof — DecryptionshareCreated will not be published"); + self.fail_decryption_round(e3_id.clone(), &pending.ec, "C6 signing error"); return; }; signed_proofs.push(signed); @@ -902,6 +905,7 @@ impl ProofRequestActor { let Some(signed) = self.sign_proof(&e3_id, ProofType::C5PkAggregation, proof) else { error!("Failed to sign C5 proof — PkAggregationProofSigned will not be published"); + self.fail_dkg_round(e3_id, &pending.ec, "C5 signing error"); return; }; @@ -985,6 +989,7 @@ impl ProofRequestActor { self.sign_proof(&e3_id, ProofType::C7DecryptedSharesAggregation, proof) else { error!("Failed to sign C7 proof — AggregationProofSigned will not be published"); + self.fail_decryption_round(e3_id.clone(), &pending.ec, "C7 signing error"); return; }; signed_proofs.push(signed); @@ -1085,6 +1090,38 @@ impl ProofRequestActor { } } + fn fail_dkg_round(&self, e3_id: E3id, ec: &EventContext, context: &str) { + if let Err(err) = self.bus.publish( + E3Failed { + e3_id: e3_id.clone(), + failed_at_stage: E3Stage::CommitteeFinalized, + reason: FailureReason::DKGInvalidShares, + }, + ec.clone(), + ) { + error!( + "Failed to publish E3Failed for {context} on E3 {}: {err}", + e3_id + ); + } + } + + fn fail_decryption_round(&self, e3_id: E3id, ec: &EventContext, context: &str) { + if let Err(err) = self.bus.publish( + E3Failed { + e3_id: e3_id.clone(), + failed_at_stage: E3Stage::CiphertextReady, + reason: FailureReason::DecryptionInvalidShares, + }, + ec.clone(), + ) { + error!( + "Failed to publish E3Failed for {context} on E3 {}: {err}", + e3_id + ); + } + } + fn sign_and_group_proofs( &self, e3_id: &E3id, @@ -1111,6 +1148,7 @@ impl ProofRequestActor { pending.pk_generation_proof.expect("checked"), ) else { error!("Failed to sign C1 proof — shares will not be published"); + self.fail_dkg_round(e3_id.clone(), ec, "C1 signing error"); return; }; @@ -1121,6 +1159,7 @@ impl ProofRequestActor { pending.sk_share_computation_proof.expect("checked"), ) else { error!("Failed to sign C2a proof — shares will not be published"); + self.fail_dkg_round(e3_id.clone(), ec, "C2a signing error"); return; }; @@ -1131,6 +1170,7 @@ impl ProofRequestActor { pending.e_sm_share_computation_proof.expect("checked"), ) else { error!("Failed to sign C2b proof — shares will not be published"); + self.fail_dkg_round(e3_id.clone(), ec, "C2b signing error"); return; }; @@ -1143,6 +1183,7 @@ impl ProofRequestActor { .map(|((recipient, _row), proof)| (*recipient, proof.clone())), ) else { error!("Failed to sign C3a proofs — shares will not be published"); + self.fail_dkg_round(e3_id.clone(), ec, "C3a signing error"); return; }; @@ -1155,6 +1196,7 @@ impl ProofRequestActor { .map(|((_esi, recipient, _row), proof)| (*recipient, proof.clone())), ) else { error!("Failed to sign C3b proofs — shares will not be published"); + self.fail_dkg_round(e3_id.clone(), ec, "C3b signing error"); return; }; @@ -1312,6 +1354,7 @@ impl ProofRequestActor { } Err(err) => { error!("Failed to sign C0 proof payload: {err} — proof will not be published"); + self.fail_dkg_round(e3_id, ec, "C0 signing error"); return; } } @@ -1395,6 +1438,7 @@ impl ProofRequestActor { "C0 proof request failed for E3 {}: {err} — key will not be published without proof", pending.e3_id ); + self.fail_dkg_round(pending.e3_id, &ec, "C0 proof request error"); return; } @@ -1406,6 +1450,7 @@ impl ProofRequestActor { self.threshold_correlation .retain(|_, (eid, _, _)| *eid != e3_id); self.pending_threshold.remove(&e3_id); + self.fail_dkg_round(e3_id, &ec, "DKG threshold proof request error"); return; } @@ -1418,6 +1463,7 @@ impl ProofRequestActor { self.decryption_correlation .retain(|_, (eid, _, _)| *eid != e3_id); self.pending_decryption.remove(&e3_id); + self.fail_dkg_round(e3_id, &ec, "C4 proof request error"); return; } @@ -1430,16 +1476,7 @@ impl ProofRequestActor { e3_id ); self.pending_share_decryption.remove(&e3_id); - if let Err(e) = self.bus.publish( - E3Failed { - e3_id, - failed_at_stage: E3Stage::CiphertextReady, - reason: FailureReason::DecryptionInvalidShares, - }, - ec.clone(), - ) { - error!("Failed to publish E3Failed for C6 error: {e}"); - } + self.fail_decryption_round(e3_id, &ec, "C6 proof request error"); return; } @@ -1450,16 +1487,7 @@ impl ProofRequestActor { ); self.pending_pk_aggregation.remove(&e3_id); - if let Err(e) = self.bus.publish( - E3Failed { - e3_id, - failed_at_stage: E3Stage::CommitteeFinalized, - reason: FailureReason::DKGInvalidShares, - }, - ec.clone(), - ) { - error!("Failed to publish E3Failed for C5 error: {e}"); - } + self.fail_dkg_round(e3_id, &ec, "C5 proof request error"); return; } @@ -1469,20 +1497,108 @@ impl ProofRequestActor { e3_id ); self.pending_aggregation.remove(&e3_id); - if let Err(e) = self.bus.publish( - E3Failed { - e3_id, - failed_at_stage: E3Stage::CiphertextReady, - reason: FailureReason::DecryptionInvalidShares, - }, - ec, - ) { - error!("Failed to publish E3Failed for C7 error: {e}"); - } + self.fail_decryption_round(e3_id, &ec, "C7 proof request error"); } } } +#[cfg(test)] +mod tests { + use super::*; + use alloy::signers::local::PrivateKeySigner; + use anyhow::Result; + use e3_events::{ + ComputeRequestErrorKind, Event, HistoryCollector, TakeEvents, Unsequenced, ZkError, + }; + use e3_test_helpers::get_common_setup; + use e3_utils::utility_types::ArcBytes; + + fn test_ctx(data: impl Into) -> EventContext { + EventContext::::from(data.into()).sequence(0) + } + + async fn next_event(history: &Addr>) -> Result { + let mut result = history.send(TakeEvents::::new(1)).await?; + assert!(!result.timed_out, "timed out waiting for an event"); + Ok(result.events.pop().expect("expected one event")) + } + + #[actix::test] + async fn c0_compute_error_emits_e3_failed() -> Result<()> { + let (bus, _rng, _seed, _params, _crp, _errors, history) = get_common_setup(None)?; + let mut actor = ProofRequestActor::new(&bus, PrivateKeySigner::random()); + let e3_id = E3id::new("44", 1); + let correlation_id = CorrelationId::new(); + + actor.pending.insert( + correlation_id, + PendingProofRequest { + e3_id: e3_id.clone(), + key: Arc::new(EncryptionKey::new(7, ArcBytes::from_bytes(&[1]))), + }, + ); + + actor.handle_compute_request_error(TypedEvent::new( + ComputeRequestError::new( + ComputeRequestErrorKind::Zk(ZkError::ProofGenerationFailed("boom".to_string())), + ComputeRequest::zk( + ZkRequest::PkBfv(PkBfvProofRequest::new( + ArcBytes::from_bytes(&[1]), + e3_fhe_params::BfvPreset::InsecureThreshold512, + )), + correlation_id, + e3_id.clone(), + ), + ), + test_ctx(E3Failed { + e3_id: e3_id.clone(), + failed_at_stage: E3Stage::CommitteeFinalized, + reason: FailureReason::DKGInvalidShares, + }), + )); + + let event = next_event(&history).await?; + assert!(matches!( + event.into_data(), + EnclaveEventData::E3Failed(data) + if data.e3_id == e3_id + && data.failed_at_stage == E3Stage::CommitteeFinalized + && data.reason == FailureReason::DKGInvalidShares + )); + assert!(actor.pending.is_empty()); + + Ok(()) + } + + #[actix::test] + async fn decryption_failure_helper_emits_e3_failed() -> Result<()> { + let (bus, _rng, _seed, _params, _crp, _errors, history) = get_common_setup(None)?; + let actor = ProofRequestActor::new(&bus, PrivateKeySigner::random()); + let e3_id = E3id::new("45", 1); + + actor.fail_decryption_round( + e3_id.clone(), + &test_ctx(E3Failed { + e3_id: e3_id.clone(), + failed_at_stage: E3Stage::CiphertextReady, + reason: FailureReason::DecryptionInvalidShares, + }), + "test decryption failure", + ); + + let event = next_event(&history).await?; + assert!(matches!( + event.into_data(), + EnclaveEventData::E3Failed(data) + if data.e3_id == e3_id + && data.failed_at_stage == E3Stage::CiphertextReady + && data.reason == FailureReason::DecryptionInvalidShares + )); + + Ok(()) + } +} + impl Actor for ProofRequestActor { type Context = Context; }