Skip to content
This repository was archived by the owner on Jan 22, 2025. It is now read-only.

Commit c2bb2b8

Browse files
authored
Allow validators to reset to the slot which matches their last voted slot (#28172)
* Add failing test * Allow resetting to duplicate was confirmed * feedback * feedback * bump * simplify change * Revert "simplify change" This reverts commit 72e5de3. * update comment * Update core/src/replay_stage.rs
1 parent 47b3ca5 commit c2bb2b8

File tree

2 files changed

+174
-37
lines changed

2 files changed

+174
-37
lines changed

core/src/heaviest_subtree_fork_choice.rs

Lines changed: 28 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -858,37 +858,32 @@ impl HeaviestSubtreeForkChoice {
858858
tower
859859
.last_voted_slot_hash()
860860
.and_then(|last_voted_slot_hash| {
861-
let heaviest_slot_hash_on_same_voted_fork = self.best_slot(&last_voted_slot_hash);
862-
if heaviest_slot_hash_on_same_voted_fork.is_none() {
863-
if !tower.is_stray_last_vote() {
864-
// Unless last vote is stray and stale, self.bast_slot(last_voted_slot) must return
865-
// Some(_), justifying to panic! here.
866-
// Also, adjust_lockouts_after_replay() correctly makes last_voted_slot None,
867-
// if all saved votes are ancestors of replayed_root_slot. So this code shouldn't be
868-
// touched in that case as well.
869-
// In other words, except being stray, all other slots have been voted on while this
870-
// validator has been running, so we must be able to fetch best_slots for all of
871-
// them.
872-
panic!(
873-
"a bank at last_voted_slot({:?}) is a frozen bank so must have been \
874-
added to heaviest_subtree_fork_choice at time of freezing",
875-
last_voted_slot_hash,
876-
)
877-
} else {
878-
// fork_infos doesn't have corresponding data for the stale stray last vote,
879-
// meaning some inconsistency between saved tower and ledger.
880-
// (newer snapshot, or only a saved tower is moved over to new setup?)
881-
return None;
861+
match self.is_candidate(&last_voted_slot_hash) {
862+
Some(true) => self.best_slot(&last_voted_slot_hash),
863+
Some(false) => None,
864+
None => {
865+
if !tower.is_stray_last_vote() {
866+
// Unless last vote is stray and stale, self.is_candidate(last_voted_slot_hash) must return
867+
// Some(_), justifying to panic! here.
868+
// Also, adjust_lockouts_after_replay() correctly makes last_voted_slot None,
869+
// if all saved votes are ancestors of replayed_root_slot. So this code shouldn't be
870+
// touched in that case as well.
871+
// In other words, except being stray, all other slots have been voted on while this
872+
// validator has been running, so we must be able to fetch best_slots for all of
873+
// them.
874+
panic!(
875+
"a bank at last_voted_slot({:?}) is a frozen bank so must have been \
876+
added to heaviest_subtree_fork_choice at time of freezing",
877+
last_voted_slot_hash,
878+
)
879+
} else {
880+
// fork_infos doesn't have corresponding data for the stale stray last vote,
881+
// meaning some inconsistency between saved tower and ledger.
882+
// (newer snapshot, or only a saved tower is moved over to new setup?)
883+
None
884+
}
882885
}
883886
}
884-
let heaviest_slot_hash_on_same_voted_fork =
885-
heaviest_slot_hash_on_same_voted_fork.unwrap();
886-
887-
if heaviest_slot_hash_on_same_voted_fork == last_voted_slot_hash {
888-
None
889-
} else {
890-
Some(heaviest_slot_hash_on_same_voted_fork)
891-
}
892887
})
893888
}
894889

@@ -2957,9 +2952,10 @@ mod test {
29572952

29582953
// After marking the last vote in the tower as invalid, `heaviest_slot_on_same_voted_fork()`
29592954
// should disregard all descendants of that invalid vote
2960-
assert!(heaviest_subtree_fork_choice
2961-
.heaviest_slot_on_same_voted_fork(&tower)
2962-
.is_none());
2955+
assert_eq!(
2956+
heaviest_subtree_fork_choice.heaviest_slot_on_same_voted_fork(&tower),
2957+
None
2958+
);
29632959

29642960
// Adding another descendant to the invalid candidate won't
29652961
// update the best slot, even if it contains votes

core/src/replay_stage.rs

Lines changed: 146 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2901,9 +2901,18 @@ impl ReplayStage {
29012901
);
29022902
}
29032903

2904-
// Given a heaviest bank, `heaviest_bank` and the next votable bank
2905-
// `heaviest_bank_on_same_voted_fork` as the validator's last vote, return
2906-
// a bank to vote on, a bank to reset to,
2904+
/// Given a `heaviest_bank` and a `heaviest_bank_on_same_voted_fork`, return
2905+
/// a bank to vote on, a bank to reset to, and a list of switch failure
2906+
/// reasons.
2907+
///
2908+
/// If `heaviest_bank_on_same_voted_fork` is `None` due to that fork no
2909+
/// longer being valid to vote on, it's possible that a validator will not
2910+
/// be able to reset away from the invalid fork that they last voted on. To
2911+
/// resolve this scenario, validators need to wait until they can create a
2912+
/// switch proof for another fork or until the invalid fork is be marked
2913+
/// valid again if it was confirmed by the cluster.
2914+
/// Until this is resolved, leaders will build each of their
2915+
/// blocks from the last reset bank on the invalid fork.
29072916
pub fn select_vote_and_reset_forks(
29082917
heaviest_bank: &Arc<Bank>,
29092918
// Should only be None if there was no previous vote
@@ -5605,6 +5614,139 @@ pub(crate) mod tests {
56055614
);
56065615
}
56075616

5617+
#[test]
5618+
fn test_unconfirmed_duplicate_slots_and_lockouts_for_non_heaviest_fork() {
5619+
/*
5620+
Build fork structure:
5621+
5622+
slot 0
5623+
|
5624+
slot 1
5625+
/ \
5626+
slot 2 |
5627+
| |
5628+
slot 3 |
5629+
| |
5630+
slot 4 |
5631+
slot 5
5632+
*/
5633+
let forks = tr(0) / (tr(1) / (tr(2) / (tr(3) / (tr(4)))) / tr(5));
5634+
5635+
let mut vote_simulator = VoteSimulator::new(1);
5636+
vote_simulator.fill_bank_forks(forks, &HashMap::<Pubkey, Vec<u64>>::new(), true);
5637+
let (bank_forks, mut progress) = (vote_simulator.bank_forks, vote_simulator.progress);
5638+
let ledger_path = get_tmp_ledger_path!();
5639+
let blockstore = Arc::new(
5640+
Blockstore::open(&ledger_path).expect("Expected to be able to open database ledger"),
5641+
);
5642+
let mut tower = Tower::new_for_tests(8, 2.0 / 3.0);
5643+
5644+
// All forks have same weight so heaviest bank to vote/reset on should be the tip of
5645+
// the fork with the lower slot
5646+
let (vote_fork, reset_fork) = run_compute_and_select_forks(
5647+
&bank_forks,
5648+
&mut progress,
5649+
&mut tower,
5650+
&mut vote_simulator.heaviest_subtree_fork_choice,
5651+
&mut vote_simulator.latest_validator_votes_for_frozen_banks,
5652+
);
5653+
assert_eq!(vote_fork.unwrap(), 4);
5654+
assert_eq!(reset_fork.unwrap(), 4);
5655+
5656+
// Record the vote for 5 which is not on the heaviest fork.
5657+
tower.record_bank_vote(
5658+
&bank_forks.read().unwrap().get(5).unwrap(),
5659+
&Pubkey::default(),
5660+
);
5661+
5662+
// 4 should be the heaviest slot, but should not be votable
5663+
// because of lockout. 5 is the heaviest slot on the same fork as the last vote.
5664+
let (vote_fork, reset_fork) = run_compute_and_select_forks(
5665+
&bank_forks,
5666+
&mut progress,
5667+
&mut tower,
5668+
&mut vote_simulator.heaviest_subtree_fork_choice,
5669+
&mut vote_simulator.latest_validator_votes_for_frozen_banks,
5670+
);
5671+
assert!(vote_fork.is_none());
5672+
assert_eq!(reset_fork, Some(5));
5673+
5674+
// Mark 5 as duplicate
5675+
blockstore.store_duplicate_slot(5, vec![], vec![]).unwrap();
5676+
let mut duplicate_slots_tracker = DuplicateSlotsTracker::default();
5677+
let mut gossip_duplicate_confirmed_slots = GossipDuplicateConfirmedSlots::default();
5678+
let mut epoch_slots_frozen_slots = EpochSlotsFrozenSlots::default();
5679+
let bank5_hash = bank_forks.read().unwrap().bank_hash(5).unwrap();
5680+
assert_ne!(bank5_hash, Hash::default());
5681+
let duplicate_state = DuplicateState::new_from_state(
5682+
5,
5683+
&gossip_duplicate_confirmed_slots,
5684+
&mut vote_simulator.heaviest_subtree_fork_choice,
5685+
|| progress.is_dead(5).unwrap_or(false),
5686+
|| Some(bank5_hash),
5687+
);
5688+
let (ancestor_hashes_replay_update_sender, _ancestor_hashes_replay_update_receiver) =
5689+
unbounded();
5690+
check_slot_agrees_with_cluster(
5691+
5,
5692+
bank_forks.read().unwrap().root(),
5693+
&blockstore,
5694+
&mut duplicate_slots_tracker,
5695+
&mut epoch_slots_frozen_slots,
5696+
&mut vote_simulator.heaviest_subtree_fork_choice,
5697+
&mut DuplicateSlotsToRepair::default(),
5698+
&ancestor_hashes_replay_update_sender,
5699+
SlotStateUpdate::Duplicate(duplicate_state),
5700+
);
5701+
5702+
// 4 should be the heaviest slot, but should not be votable
5703+
// because of lockout. 5 is no longer valid due to it being a duplicate.
5704+
let (vote_fork, reset_fork) = run_compute_and_select_forks(
5705+
&bank_forks,
5706+
&mut progress,
5707+
&mut tower,
5708+
&mut vote_simulator.heaviest_subtree_fork_choice,
5709+
&mut vote_simulator.latest_validator_votes_for_frozen_banks,
5710+
);
5711+
assert!(vote_fork.is_none());
5712+
assert!(reset_fork.is_none());
5713+
5714+
// If slot 5 is marked as confirmed, it becomes the heaviest bank on same slot again
5715+
let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default();
5716+
gossip_duplicate_confirmed_slots.insert(5, bank5_hash);
5717+
let duplicate_confirmed_state = DuplicateConfirmedState::new_from_state(
5718+
bank5_hash,
5719+
|| progress.is_dead(5).unwrap_or(false),
5720+
|| Some(bank5_hash),
5721+
);
5722+
check_slot_agrees_with_cluster(
5723+
5,
5724+
bank_forks.read().unwrap().root(),
5725+
&blockstore,
5726+
&mut duplicate_slots_tracker,
5727+
&mut epoch_slots_frozen_slots,
5728+
&mut vote_simulator.heaviest_subtree_fork_choice,
5729+
&mut duplicate_slots_to_repair,
5730+
&ancestor_hashes_replay_update_sender,
5731+
SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state),
5732+
);
5733+
// The confirmed hash is detected in `progress`, which means
5734+
// it's confirmation on the replayed block. This means we have
5735+
// the right version of the block, so `duplicate_slots_to_repair`
5736+
// should be empty
5737+
assert!(duplicate_slots_to_repair.is_empty());
5738+
let (vote_fork, reset_fork) = run_compute_and_select_forks(
5739+
&bank_forks,
5740+
&mut progress,
5741+
&mut tower,
5742+
&mut vote_simulator.heaviest_subtree_fork_choice,
5743+
&mut vote_simulator.latest_validator_votes_for_frozen_banks,
5744+
);
5745+
// Should now pick 5 as the heaviest fork from last vote again.
5746+
assert!(vote_fork.is_none());
5747+
assert_eq!(reset_fork.unwrap(), 5);
5748+
}
5749+
56085750
#[test]
56095751
fn test_unconfirmed_duplicate_slots_and_lockouts() {
56105752
/*
@@ -5697,7 +5839,7 @@ pub(crate) mod tests {
56975839
&mut vote_simulator.latest_validator_votes_for_frozen_banks,
56985840
);
56995841
assert!(vote_fork.is_none());
5700-
assert_eq!(reset_fork.unwrap(), 3);
5842+
assert_eq!(reset_fork, Some(3));
57015843

57025844
// Now mark 2, an ancestor of 4, as duplicate
57035845
blockstore.store_duplicate_slot(2, vec![], vec![]).unwrap();
@@ -6778,7 +6920,6 @@ pub(crate) mod tests {
67786920
);
67796921
let (heaviest_bank, heaviest_bank_on_same_fork) = heaviest_subtree_fork_choice
67806922
.select_forks(&frozen_banks, tower, progress, ancestors, bank_forks);
6781-
assert!(heaviest_bank_on_same_fork.is_none());
67826923
let SelectVoteAndResetForkResult {
67836924
vote_bank,
67846925
reset_bank,

0 commit comments

Comments
 (0)