Skip to content

Commit 2ed9da7

Browse files
committed
Merge branch 'develop' of https://github.com/stacks-network/stacks-core into feat/use-burnchain-timeout
2 parents d0bdfe3 + 1fa594a commit 2ed9da7

51 files changed

Lines changed: 3022 additions & 1174 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/bitcoin-tests.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,11 @@ jobs:
110110
- tests::nakamoto_integrations::continue_tenure_extend
111111
- tests::nakamoto_integrations::mock_mining
112112
- tests::nakamoto_integrations::multiple_miners
113+
- tests::nakamoto_integrations::follower_bootup_across_multiple_cycles
113114
- tests::nakamoto_integrations::utxo_check_on_startup_panic
114115
- tests::nakamoto_integrations::utxo_check_on_startup_recover
116+
- tests::signer::v0::multiple_miners_with_nakamoto_blocks
117+
- tests::signer::v0::partial_tenure_fork
115118
# Do not run this one until we figure out why it fails in CI
116119
# - tests::neon_integrations::bitcoin_reorg_flap
117120
# - tests::neon_integrations::bitcoin_reorg_flap_with_follower

CHANGELOG.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,28 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE
1717
- `get-tenure-info?` added
1818
- `get-block-info?` removed
1919

20+
## [2.5.0.0.7]
21+
22+
### Added
23+
24+
- Add warn logs for block validate rejections (#5079)
25+
- Neon mock miner replay (#5060)
26+
27+
### Changed
28+
29+
- Revert BurnchainHeaderHash serialization change (#5094)
30+
- boot_to_epoch_3 in SignerTest should wait for a new commit (#5087)
31+
- Fix block proposal rejection test (#5084)
32+
- Mock signing revamp (#5070)
33+
- Multi miner fixes jude (#5040)
34+
- Remove spurious deadlock condition whenever the sortition DB is opened
35+
36+
## [2.5.0.0.6]
37+
38+
### Changed
39+
40+
- If there is a getchunk/putchunk that fails due to a stale (or future) version NACK, the StackerDB sync state machine should immediately retry sync (#5066)
41+
2042
## [2.5.0.0.5]
2143

2244
### Added

clarity/src/vm/database/sqlite.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use rusqlite::{
2121
};
2222
use stacks_common::types::chainstate::{BlockHeaderHash, StacksBlockId};
2323
use stacks_common::types::sqlite::NO_PARAMS;
24-
use stacks_common::util::db_common::tx_busy_handler;
24+
use stacks_common::util::db::tx_busy_handler;
2525
use stacks_common::util::hash::Sha512Trunc256Sum;
2626

2727
use super::clarity_store::{make_contract_hash_key, ContractCommitment};

libstackerdb/src/libstackerdb.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ pub struct SlotMetadata {
8282
}
8383

8484
/// Stacker DB chunk (i.e. as a reply to a chunk request)
85-
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
85+
#[derive(Clone, PartialEq, Serialize, Deserialize)]
8686
pub struct StackerDBChunkData {
8787
/// slot ID
8888
pub slot_id: u32,
@@ -98,6 +98,31 @@ pub struct StackerDBChunkData {
9898
pub data: Vec<u8>,
9999
}
100100

101+
impl fmt::Debug for StackerDBChunkData {
102+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
103+
if self.data.len() < 128 {
104+
write!(
105+
f,
106+
"StackerDBChunkData({},{},{},{})",
107+
self.slot_id,
108+
self.slot_version,
109+
&self.sig,
110+
&to_hex(&self.data)
111+
)
112+
} else {
113+
write!(
114+
f,
115+
"StackerDBChunkData({},{},{},{}...({}))",
116+
self.slot_id,
117+
self.slot_version,
118+
&self.sig,
119+
&to_hex(&self.data[..128]),
120+
self.data.len()
121+
)
122+
}
123+
}
124+
}
125+
101126
/// StackerDB post chunk acknowledgement
102127
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
103128
pub struct StackerDBChunkAckData {

stacks-common/src/util/db.rs

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
// Copyright (C) 2013-2020 Blockstack PBC, a public benefit corporation
2+
// Copyright (C) 2020 Stacks Open Internet Foundation
3+
//
4+
// This program is free software: you can redistribute it and/or modify
5+
// it under the terms of the GNU General Public License as published by
6+
// the Free Software Foundation, either version 3 of the License, or
7+
// (at your option) any later version.
8+
//
9+
// This program is distributed in the hope that it will be useful,
10+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
// GNU General Public License for more details.
13+
//
14+
// You should have received a copy of the GNU General Public License
15+
// along with this program. If not, see <http://www.gnu.org/licenses/>.
16+
17+
use std::backtrace::Backtrace;
18+
use std::sync::{LazyLock, Mutex};
19+
use std::thread;
20+
use std::time::{Duration, Instant};
21+
22+
use hashbrown::HashMap;
23+
use rand::{thread_rng, Rng};
24+
use rusqlite::Connection;
25+
26+
use crate::util::sleep_ms;
27+
28+
/// Keep track of DB locks, for deadlock debugging
29+
/// - **key:** `rusqlite::Connection` debug print
30+
/// - **value:** Lock holder (thread name + timestamp)
31+
///
32+
/// This uses a `Mutex` inside of `LazyLock` because:
33+
/// - Using `Mutex` alone, it can't be statically initialized because `HashMap::new()` isn't `const`
34+
/// - Using `LazyLock` alone doesn't allow interior mutability
35+
static LOCK_TABLE: LazyLock<Mutex<HashMap<String, String>>> =
36+
LazyLock::new(|| Mutex::new(HashMap::new()));
37+
/// Generate timestanps for use in `LOCK_TABLE`
38+
/// `Instant` is preferable to `SystemTime` because it uses `CLOCK_MONOTONIC` and is not affected by NTP adjustments
39+
static LOCK_TABLE_TIMER: LazyLock<Instant> = LazyLock::new(Instant::now);
40+
41+
/// Call when using an operation which locks a database
42+
/// Updates `LOCK_TABLE`
43+
pub fn update_lock_table(conn: &Connection) {
44+
let timestamp = LOCK_TABLE_TIMER.elapsed().as_millis();
45+
// The debug format for `Connection` includes the path
46+
let k = format!("{conn:?}");
47+
let v = format!("{:?}@{timestamp}", thread::current().name());
48+
LOCK_TABLE.lock().unwrap().insert(k, v);
49+
}
50+
51+
/// Called by `rusqlite` if we are waiting too long on a database lock
52+
/// If called too many times, will assume a deadlock and panic
53+
pub fn tx_busy_handler(run_count: i32) -> bool {
54+
const TIMEOUT: Duration = Duration::from_secs(300);
55+
const AVG_SLEEP_TIME_MS: u64 = 100;
56+
57+
// First, check if this is taking unreasonably long. If so, it's probably a deadlock
58+
let run_count = run_count.unsigned_abs();
59+
let approx_time_elapsed =
60+
Duration::from_millis(AVG_SLEEP_TIME_MS.saturating_mul(u64::from(run_count)));
61+
if approx_time_elapsed > TIMEOUT {
62+
error!("Deadlock detected. Waited {} seconds (estimated) for database lock. Giving up", approx_time_elapsed.as_secs();
63+
"run_count" => run_count,
64+
"backtrace" => ?Backtrace::capture()
65+
);
66+
for (k, v) in LOCK_TABLE.lock().unwrap().iter() {
67+
error!("Database '{k}' last locked by {v}");
68+
}
69+
panic!("Deadlock in thread {:?}", thread::current().name());
70+
}
71+
72+
let mut sleep_time_ms = 2u64.saturating_pow(run_count);
73+
74+
sleep_time_ms = sleep_time_ms.saturating_add(thread_rng().gen_range(0..sleep_time_ms));
75+
76+
if sleep_time_ms > AVG_SLEEP_TIME_MS {
77+
let jitter = 10;
78+
sleep_time_ms =
79+
thread_rng().gen_range((AVG_SLEEP_TIME_MS - jitter)..(AVG_SLEEP_TIME_MS + jitter));
80+
}
81+
82+
let msg = format!("Database is locked; sleeping {sleep_time_ms}ms and trying again");
83+
if run_count > 10 && run_count % 10 == 0 {
84+
warn!("{msg}";
85+
"run_count" => run_count,
86+
"backtrace" => ?Backtrace::capture()
87+
);
88+
} else {
89+
debug!("{msg}");
90+
}
91+
92+
sleep_ms(sleep_time_ms);
93+
true
94+
}

stacks-common/src/util/mod.rs

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ pub mod log;
1919
#[macro_use]
2020
pub mod macros;
2121
pub mod chunked_encoding;
22+
pub mod db;
2223
pub mod hash;
2324
pub mod pair;
2425
pub mod pipe;
@@ -85,32 +86,6 @@ impl error::Error for HexError {
8586
}
8687
}
8788

88-
pub mod db_common {
89-
use std::{thread, time};
90-
91-
use rand::{thread_rng, Rng};
92-
93-
pub fn tx_busy_handler(run_count: i32) -> bool {
94-
let mut sleep_count = 10;
95-
if run_count > 0 {
96-
sleep_count = 2u64.saturating_pow(run_count as u32);
97-
}
98-
sleep_count = sleep_count.saturating_add(thread_rng().gen::<u64>() % sleep_count);
99-
100-
if sleep_count > 5000 {
101-
sleep_count = 5000;
102-
}
103-
104-
debug!(
105-
"Database is locked; sleeping {}ms and trying again",
106-
&sleep_count
107-
);
108-
109-
thread::sleep(time::Duration::from_millis(sleep_count));
110-
true
111-
}
112-
}
113-
11489
/// Write any `serde_json` object directly to a file
11590
pub fn serialize_json_to_file<J, P>(json: &J, path: P) -> Result<(), std::io::Error>
11691
where

stackslib/src/burnchains/db.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1411,6 +1411,8 @@ impl BurnchainDB {
14111411
Ok(())
14121412
}
14131413

1414+
/// Stores a newly-parsed burnchain block's relevant data into the DB.
1415+
/// The given block's operations will be validated.
14141416
pub fn store_new_burnchain_block<B: BurnchainHeaderReader>(
14151417
&mut self,
14161418
burnchain: &Burnchain,

stackslib/src/chainstate/burn/db/processing.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ impl<'a> SortitionHandleTx<'a> {
211211
"SORTITION-HASH({}): {}",
212212
this_block_height, &snapshot.sortition_hash
213213
);
214-
debug!(
214+
info!(
215215
"CONSENSUS({}): {}",
216216
this_block_height, &snapshot.consensus_hash
217217
);

stackslib/src/chainstate/burn/db/sortdb.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3456,6 +3456,14 @@ impl SortitionDB {
34563456
SortitionDB::apply_schema_9(&tx.deref(), epochs)?;
34573457
tx.commit()?;
34583458
} else if version == expected_version {
3459+
// this transaction is almost never needed
3460+
let validated_epochs = StacksEpoch::validate_epochs(epochs);
3461+
let existing_epochs = Self::get_stacks_epochs(self.conn())?;
3462+
if existing_epochs == validated_epochs {
3463+
return Ok(());
3464+
}
3465+
3466+
// epochs are out of date
34593467
let tx = self.tx_begin()?;
34603468
SortitionDB::validate_and_replace_epochs(&tx, epochs)?;
34613469
tx.commit()?;

stackslib/src/chainstate/coordinator/mod.rs

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -543,20 +543,24 @@ impl<
543543
in_nakamoto_epoch: false,
544544
};
545545

546-
let mut nakamoto_available = false;
547546
loop {
548-
if nakamoto_available
549-
|| inst
550-
.can_process_nakamoto()
551-
.expect("FATAL: could not determine if Nakamoto is available")
552-
{
553-
// short-circuit to avoid gratuitous I/O
554-
nakamoto_available = true;
555-
if !inst.handle_comms_nakamoto(&comms, miner_status.clone()) {
547+
let bits = comms.wait_on();
548+
if inst.in_subsequent_nakamoto_reward_cycle() {
549+
debug!("Coordinator: in subsequent Nakamoto reward cycle");
550+
if !inst.handle_comms_nakamoto(bits, miner_status.clone()) {
551+
return;
552+
}
553+
} else if inst.in_first_nakamoto_reward_cycle() {
554+
debug!("Coordinator: in first Nakamoto reward cycle");
555+
if !inst.handle_comms_nakamoto(bits, miner_status.clone()) {
556+
return;
557+
}
558+
if !inst.handle_comms_epoch2(bits, miner_status.clone()) {
556559
return;
557560
}
558561
} else {
559-
if !inst.handle_comms_epoch2(&comms, miner_status.clone()) {
562+
debug!("Coordinator: in epoch2 reward cycle");
563+
if !inst.handle_comms_epoch2(bits, miner_status.clone()) {
560564
return;
561565
}
562566
}
@@ -566,13 +570,8 @@ impl<
566570
/// This is the Stacks 2.x coordinator loop body, which handles communications
567571
/// from the given `comms`. It returns `true` if the coordinator is still running, and `false`
568572
/// if not.
569-
pub fn handle_comms_epoch2(
570-
&mut self,
571-
comms: &CoordinatorReceivers,
572-
miner_status: Arc<Mutex<MinerStatus>>,
573-
) -> bool {
573+
pub fn handle_comms_epoch2(&mut self, bits: u8, miner_status: Arc<Mutex<MinerStatus>>) -> bool {
574574
// timeout so that we handle Ctrl-C a little gracefully
575-
let bits = comms.wait_on();
576575
if (bits & (CoordinatorEvents::NEW_STACKS_BLOCK as u8)) != 0 {
577576
signal_mining_blocked(miner_status.clone());
578577
debug!("Received new stacks block notice");

0 commit comments

Comments
 (0)