Skip to content
This repository was archived by the owner on Jan 22, 2025. It is now read-only.

Commit 96b4a38

Browse files
jstarrymergify[bot]
authored andcommitted
Allow validators to reset to the slot which matches their last voted slot (#28172)
* Add failing test * Allow resetting to duplicate was confirmed * feedback * feedback * bump * simplify change * Revert "simplify change" This reverts commit 72e5de3. * update comment * Update core/src/replay_stage.rs (cherry picked from commit c2bb2b8)
1 parent 411f0a3 commit 96b4a38

File tree

2 files changed

+174
-37
lines changed

2 files changed

+174
-37
lines changed

core/src/heaviest_subtree_fork_choice.rs

Lines changed: 28 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -858,37 +858,32 @@ impl HeaviestSubtreeForkChoice {
858858
tower
859859
.last_voted_slot_hash()
860860
.and_then(|last_voted_slot_hash| {
861-
let heaviest_slot_hash_on_same_voted_fork = self.best_slot(&last_voted_slot_hash);
862-
if heaviest_slot_hash_on_same_voted_fork.is_none() {
863-
if !tower.is_stray_last_vote() {
864-
// Unless last vote is stray and stale, self.bast_slot(last_voted_slot) must return
865-
// Some(_), justifying to panic! here.
866-
// Also, adjust_lockouts_after_replay() correctly makes last_voted_slot None,
867-
// if all saved votes are ancestors of replayed_root_slot. So this code shouldn't be
868-
// touched in that case as well.
869-
// In other words, except being stray, all other slots have been voted on while this
870-
// validator has been running, so we must be able to fetch best_slots for all of
871-
// them.
872-
panic!(
873-
"a bank at last_voted_slot({:?}) is a frozen bank so must have been \
874-
added to heaviest_subtree_fork_choice at time of freezing",
875-
last_voted_slot_hash,
876-
)
877-
} else {
878-
// fork_infos doesn't have corresponding data for the stale stray last vote,
879-
// meaning some inconsistency between saved tower and ledger.
880-
// (newer snapshot, or only a saved tower is moved over to new setup?)
881-
return None;
861+
match self.is_candidate(&last_voted_slot_hash) {
862+
Some(true) => self.best_slot(&last_voted_slot_hash),
863+
Some(false) => None,
864+
None => {
865+
if !tower.is_stray_last_vote() {
866+
// Unless last vote is stray and stale, self.is_candidate(last_voted_slot_hash) must return
867+
// Some(_), justifying to panic! here.
868+
// Also, adjust_lockouts_after_replay() correctly makes last_voted_slot None,
869+
// if all saved votes are ancestors of replayed_root_slot. So this code shouldn't be
870+
// touched in that case as well.
871+
// In other words, except being stray, all other slots have been voted on while this
872+
// validator has been running, so we must be able to fetch best_slots for all of
873+
// them.
874+
panic!(
875+
"a bank at last_voted_slot({:?}) is a frozen bank so must have been \
876+
added to heaviest_subtree_fork_choice at time of freezing",
877+
last_voted_slot_hash,
878+
)
879+
} else {
880+
// fork_infos doesn't have corresponding data for the stale stray last vote,
881+
// meaning some inconsistency between saved tower and ledger.
882+
// (newer snapshot, or only a saved tower is moved over to new setup?)
883+
None
884+
}
882885
}
883886
}
884-
let heaviest_slot_hash_on_same_voted_fork =
885-
heaviest_slot_hash_on_same_voted_fork.unwrap();
886-
887-
if heaviest_slot_hash_on_same_voted_fork == last_voted_slot_hash {
888-
None
889-
} else {
890-
Some(heaviest_slot_hash_on_same_voted_fork)
891-
}
892887
})
893888
}
894889

@@ -2957,9 +2952,10 @@ mod test {
29572952

29582953
// After marking the last vote in the tower as invalid, `heaviest_slot_on_same_voted_fork()`
29592954
// should disregard all descendants of that invalid vote
2960-
assert!(heaviest_subtree_fork_choice
2961-
.heaviest_slot_on_same_voted_fork(&tower)
2962-
.is_none());
2955+
assert_eq!(
2956+
heaviest_subtree_fork_choice.heaviest_slot_on_same_voted_fork(&tower),
2957+
None
2958+
);
29632959

29642960
// Adding another descendant to the invalid candidate won't
29652961
// update the best slot, even if it contains votes

core/src/replay_stage.rs

Lines changed: 146 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2579,9 +2579,18 @@ impl ReplayStage {
25792579
);
25802580
}
25812581

2582-
// Given a heaviest bank, `heaviest_bank` and the next votable bank
2583-
// `heaviest_bank_on_same_voted_fork` as the validator's last vote, return
2584-
// a bank to vote on, a bank to reset to,
2582+
/// Given a `heaviest_bank` and a `heaviest_bank_on_same_voted_fork`, return
2583+
/// a bank to vote on, a bank to reset to, and a list of switch failure
2584+
/// reasons.
2585+
///
2586+
/// If `heaviest_bank_on_same_voted_fork` is `None` due to that fork no
2587+
/// longer being valid to vote on, it's possible that a validator will not
2588+
/// be able to reset away from the invalid fork that they last voted on. To
2589+
/// resolve this scenario, validators need to wait until they can create a
2590+
/// switch proof for another fork or until the invalid fork is be marked
2591+
/// valid again if it was confirmed by the cluster.
2592+
/// Until this is resolved, leaders will build each of their
2593+
/// blocks from the last reset bank on the invalid fork.
25852594
pub fn select_vote_and_reset_forks(
25862595
heaviest_bank: &Arc<Bank>,
25872596
// Should only be None if there was no previous vote
@@ -5260,6 +5269,139 @@ pub mod tests {
52605269
);
52615270
}
52625271

5272+
#[test]
5273+
fn test_unconfirmed_duplicate_slots_and_lockouts_for_non_heaviest_fork() {
5274+
/*
5275+
Build fork structure:
5276+
5277+
slot 0
5278+
|
5279+
slot 1
5280+
/ \
5281+
slot 2 |
5282+
| |
5283+
slot 3 |
5284+
| |
5285+
slot 4 |
5286+
slot 5
5287+
*/
5288+
let forks = tr(0) / (tr(1) / (tr(2) / (tr(3) / (tr(4)))) / tr(5));
5289+
5290+
let mut vote_simulator = VoteSimulator::new(1);
5291+
vote_simulator.fill_bank_forks(forks, &HashMap::<Pubkey, Vec<u64>>::new(), true);
5292+
let (bank_forks, mut progress) = (vote_simulator.bank_forks, vote_simulator.progress);
5293+
let ledger_path = get_tmp_ledger_path!();
5294+
let blockstore = Arc::new(
5295+
Blockstore::open(&ledger_path).expect("Expected to be able to open database ledger"),
5296+
);
5297+
let mut tower = Tower::new_for_tests(8, 2.0 / 3.0);
5298+
5299+
// All forks have same weight so heaviest bank to vote/reset on should be the tip of
5300+
// the fork with the lower slot
5301+
let (vote_fork, reset_fork) = run_compute_and_select_forks(
5302+
&bank_forks,
5303+
&mut progress,
5304+
&mut tower,
5305+
&mut vote_simulator.heaviest_subtree_fork_choice,
5306+
&mut vote_simulator.latest_validator_votes_for_frozen_banks,
5307+
);
5308+
assert_eq!(vote_fork.unwrap(), 4);
5309+
assert_eq!(reset_fork.unwrap(), 4);
5310+
5311+
// Record the vote for 5 which is not on the heaviest fork.
5312+
tower.record_bank_vote(
5313+
&bank_forks.read().unwrap().get(5).unwrap(),
5314+
&Pubkey::default(),
5315+
);
5316+
5317+
// 4 should be the heaviest slot, but should not be votable
5318+
// because of lockout. 5 is the heaviest slot on the same fork as the last vote.
5319+
let (vote_fork, reset_fork) = run_compute_and_select_forks(
5320+
&bank_forks,
5321+
&mut progress,
5322+
&mut tower,
5323+
&mut vote_simulator.heaviest_subtree_fork_choice,
5324+
&mut vote_simulator.latest_validator_votes_for_frozen_banks,
5325+
);
5326+
assert!(vote_fork.is_none());
5327+
assert_eq!(reset_fork, Some(5));
5328+
5329+
// Mark 5 as duplicate
5330+
blockstore.store_duplicate_slot(5, vec![], vec![]).unwrap();
5331+
let mut duplicate_slots_tracker = DuplicateSlotsTracker::default();
5332+
let mut gossip_duplicate_confirmed_slots = GossipDuplicateConfirmedSlots::default();
5333+
let mut epoch_slots_frozen_slots = EpochSlotsFrozenSlots::default();
5334+
let bank5_hash = bank_forks.read().unwrap().bank_hash(5).unwrap();
5335+
assert_ne!(bank5_hash, Hash::default());
5336+
let duplicate_state = DuplicateState::new_from_state(
5337+
5,
5338+
&gossip_duplicate_confirmed_slots,
5339+
&mut vote_simulator.heaviest_subtree_fork_choice,
5340+
|| progress.is_dead(5).unwrap_or(false),
5341+
|| Some(bank5_hash),
5342+
);
5343+
let (ancestor_hashes_replay_update_sender, _ancestor_hashes_replay_update_receiver) =
5344+
unbounded();
5345+
check_slot_agrees_with_cluster(
5346+
5,
5347+
bank_forks.read().unwrap().root(),
5348+
&blockstore,
5349+
&mut duplicate_slots_tracker,
5350+
&mut epoch_slots_frozen_slots,
5351+
&mut vote_simulator.heaviest_subtree_fork_choice,
5352+
&mut DuplicateSlotsToRepair::default(),
5353+
&ancestor_hashes_replay_update_sender,
5354+
SlotStateUpdate::Duplicate(duplicate_state),
5355+
);
5356+
5357+
// 4 should be the heaviest slot, but should not be votable
5358+
// because of lockout. 5 is no longer valid due to it being a duplicate.
5359+
let (vote_fork, reset_fork) = run_compute_and_select_forks(
5360+
&bank_forks,
5361+
&mut progress,
5362+
&mut tower,
5363+
&mut vote_simulator.heaviest_subtree_fork_choice,
5364+
&mut vote_simulator.latest_validator_votes_for_frozen_banks,
5365+
);
5366+
assert!(vote_fork.is_none());
5367+
assert!(reset_fork.is_none());
5368+
5369+
// If slot 5 is marked as confirmed, it becomes the heaviest bank on same slot again
5370+
let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default();
5371+
gossip_duplicate_confirmed_slots.insert(5, bank5_hash);
5372+
let duplicate_confirmed_state = DuplicateConfirmedState::new_from_state(
5373+
bank5_hash,
5374+
|| progress.is_dead(5).unwrap_or(false),
5375+
|| Some(bank5_hash),
5376+
);
5377+
check_slot_agrees_with_cluster(
5378+
5,
5379+
bank_forks.read().unwrap().root(),
5380+
&blockstore,
5381+
&mut duplicate_slots_tracker,
5382+
&mut epoch_slots_frozen_slots,
5383+
&mut vote_simulator.heaviest_subtree_fork_choice,
5384+
&mut duplicate_slots_to_repair,
5385+
&ancestor_hashes_replay_update_sender,
5386+
SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state),
5387+
);
5388+
// The confirmed hash is detected in `progress`, which means
5389+
// it's confirmation on the replayed block. This means we have
5390+
// the right version of the block, so `duplicate_slots_to_repair`
5391+
// should be empty
5392+
assert!(duplicate_slots_to_repair.is_empty());
5393+
let (vote_fork, reset_fork) = run_compute_and_select_forks(
5394+
&bank_forks,
5395+
&mut progress,
5396+
&mut tower,
5397+
&mut vote_simulator.heaviest_subtree_fork_choice,
5398+
&mut vote_simulator.latest_validator_votes_for_frozen_banks,
5399+
);
5400+
// Should now pick 5 as the heaviest fork from last vote again.
5401+
assert!(vote_fork.is_none());
5402+
assert_eq!(reset_fork.unwrap(), 5);
5403+
}
5404+
52635405
#[test]
52645406
fn test_unconfirmed_duplicate_slots_and_lockouts() {
52655407
/*
@@ -5352,7 +5494,7 @@ pub mod tests {
53525494
&mut vote_simulator.latest_validator_votes_for_frozen_banks,
53535495
);
53545496
assert!(vote_fork.is_none());
5355-
assert_eq!(reset_fork.unwrap(), 3);
5497+
assert_eq!(reset_fork, Some(3));
53565498

53575499
// Now mark 2, an ancestor of 4, as duplicate
53585500
blockstore.store_duplicate_slot(2, vec![], vec![]).unwrap();
@@ -6427,7 +6569,6 @@ pub mod tests {
64276569
);
64286570
let (heaviest_bank, heaviest_bank_on_same_fork) = heaviest_subtree_fork_choice
64296571
.select_forks(&frozen_banks, tower, progress, ancestors, bank_forks);
6430-
assert!(heaviest_bank_on_same_fork.is_none());
64316572
let SelectVoteAndResetForkResult {
64326573
vote_bank,
64336574
reset_bank,

0 commit comments

Comments
 (0)