From 16597ebba56a8e7db16e11582cde3199584fa687 Mon Sep 17 00:00:00 2001
From: jkrvivian <jkrvivian@gmail.com>
Date: Thu, 27 Feb 2025 14:58:32 +0800
Subject: [PATCH 01/21] fix(test): Use a timer in
 test_checkpoint_executor_crash_recovery

---
 .../checkpoints/checkpoint_executor/tests.rs  | 53 +++++++++++++------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/crates/iota-core/src/checkpoints/checkpoint_executor/tests.rs b/crates/iota-core/src/checkpoints/checkpoint_executor/tests.rs
index aba6719606e..511a5e0f80b 100644
--- a/crates/iota-core/src/checkpoints/checkpoint_executor/tests.rs
+++ b/crates/iota-core/src/checkpoints/checkpoint_executor/tests.rs
@@ -64,19 +64,30 @@ pub async fn test_checkpoint_executor_crash_recovery() {
     let epoch_store = state.epoch_store_for_testing().clone();
     let executor_handle =
         spawn_monitored_task!(async move { executor.run_epoch(epoch_store, None).await });
-    tokio::time::sleep(Duration::from_secs(5)).await;
 
-    // ensure we executed all synced checkpoints
-    let highest_executed = checkpoint_store
-        .get_highest_executed_checkpoint_seq_number()
-        .unwrap()
-        .expect("Expected highest executed to not be None");
-    assert_eq!(highest_executed, 2 * (buffer_size as u64) - 1,);
+    // Use a timer to ensure all checkpoints are executed
+    let timeout_duration = Duration::from_secs(20);
+    tokio::time::timeout(timeout_duration, async {
+        loop {
+            let highest_executed = checkpoint_store
+                .get_highest_executed_checkpoint_seq_number()
+                .unwrap()
+                .unwrap_or_default();
+
+            if highest_executed == 2 * (buffer_size as u64) - 1 {
+                break;
+            }
+
+            tokio::time::sleep(Duration::from_millis(100)).await;
+        }
+    })
+    .await
+    .expect("Timeout waiting for checkpoints to be executed");
 
     // Simulate node restart
     executor_handle.abort();
 
-    // sync more checkpoints in the meantime
+    // Sync more checkpoints in the meantime
     let _ = sync_new_checkpoints(
         &checkpoint_store,
         &checkpoint_sender,
@@ -85,8 +96,7 @@ pub async fn test_checkpoint_executor_crash_recovery() {
         &committee,
     );
 
-    // restart checkpoint executor and ensure that it picks
-    // up where it left off
+    // Restart checkpoint executor and ensure that it picks up where it left off
     let mut executor = CheckpointExecutor::new_for_tests(
         checkpoint_sender.subscribe(),
         checkpoint_store.clone(),
@@ -97,13 +107,24 @@ pub async fn test_checkpoint_executor_crash_recovery() {
     let epoch_store = state.epoch_store_for_testing().clone();
     let executor_handle =
         spawn_monitored_task!(async move { executor.run_epoch(epoch_store, None).await });
-    tokio::time::sleep(Duration::from_secs(15)).await;
 
-    let highest_executed = checkpoint_store
-        .get_highest_executed_checkpoint_seq_number()
-        .unwrap()
-        .expect("Expected highest executed to not be None");
-    assert_eq!(highest_executed, 4 * (buffer_size as u64) - 1);
+    // Use a timer to ensure all checkpoints are executed
+    tokio::time::timeout(timeout_duration, async {
+        loop {
+            let highest_executed = checkpoint_store
+                .get_highest_executed_checkpoint_seq_number()
+                .unwrap()
+                .unwrap_or_default();
+
+            if highest_executed == 4 * (buffer_size as u64) - 1 {
+                break;
+            }
+
+            tokio::time::sleep(Duration::from_millis(100)).await;
+        }
+    })
+    .await
+    .expect("Timeout waiting for checkpoints to be executed after restart");
 
     executor_handle.abort();
 }

From 208299d06f8be17ce7e08273186e6c3d641582de Mon Sep 17 00:00:00 2001
From: jkrvivian <jkrvivian@gmail.com>
Date: Thu, 27 Feb 2025 19:50:12 +0800
Subject: [PATCH 02/21] fix(test): Reduce number of synced checkpoints in
 test_checkpoint_executor_crash_recovery

---
 .../src/checkpoints/checkpoint_executor/tests.rs          | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crates/iota-core/src/checkpoints/checkpoint_executor/tests.rs b/crates/iota-core/src/checkpoints/checkpoint_executor/tests.rs
index 511a5e0f80b..3fa2225b4c7 100644
--- a/crates/iota-core/src/checkpoints/checkpoint_executor/tests.rs
+++ b/crates/iota-core/src/checkpoints/checkpoint_executor/tests.rs
@@ -56,7 +56,7 @@ pub async fn test_checkpoint_executor_crash_recovery() {
     let checkpoints = sync_new_checkpoints(
         &checkpoint_store,
         &checkpoint_sender,
-        2 * buffer_size,
+        buffer_size,
         None,
         &committee,
     );
@@ -74,7 +74,7 @@ pub async fn test_checkpoint_executor_crash_recovery() {
                 .unwrap()
                 .unwrap_or_default();
 
-            if highest_executed == 2 * (buffer_size as u64) - 1 {
+            if highest_executed == (buffer_size as u64) - 1 {
                 break;
             }
 
@@ -91,7 +91,7 @@ pub async fn test_checkpoint_executor_crash_recovery() {
     let _ = sync_new_checkpoints(
         &checkpoint_store,
         &checkpoint_sender,
-        2 * buffer_size,
+        buffer_size,
         Some(checkpoints.last().cloned().unwrap()),
         &committee,
     );
@@ -116,7 +116,7 @@ pub async fn test_checkpoint_executor_crash_recovery() {
                 .unwrap()
                 .unwrap_or_default();
 
-            if highest_executed == 4 * (buffer_size as u64) - 1 {
+            if highest_executed == 2 * (buffer_size as u64) - 1 {
                 break;
             }
 

From 492c8775eda548c6db9c2ce401ab30c55460a4c4 Mon Sep 17 00:00:00 2001
From: jkrvivian <jkrvivian@gmail.com>
Date: Thu, 27 Feb 2025 20:44:08 +0800
Subject: [PATCH 03/21] fix(test): Increase timeout in
 test_checkpoint_executor_crash_recovery

---
 crates/iota-core/src/checkpoints/checkpoint_executor/tests.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/iota-core/src/checkpoints/checkpoint_executor/tests.rs b/crates/iota-core/src/checkpoints/checkpoint_executor/tests.rs
index 3fa2225b4c7..7c895699a99 100644
--- a/crates/iota-core/src/checkpoints/checkpoint_executor/tests.rs
+++ b/crates/iota-core/src/checkpoints/checkpoint_executor/tests.rs
@@ -66,7 +66,7 @@ pub async fn test_checkpoint_executor_crash_recovery() {
         spawn_monitored_task!(async move { executor.run_epoch(epoch_store, None).await });
 
     // Use a timer to ensure all checkpoints are executed
-    let timeout_duration = Duration::from_secs(20);
+    let timeout_duration = Duration::from_secs(60);
     tokio::time::timeout(timeout_duration, async {
         loop {
             let highest_executed = checkpoint_store

From e6f13ac36858db232f56bff5280f4058d6d629b6 Mon Sep 17 00:00:00 2001
From: jkrvivian <jkrvivian@gmail.com>
Date: Mon, 3 Mar 2025 21:15:12 +0800
Subject: [PATCH 04/21] fix(network): Add timeout to recv() in
 test_byzantine_peer_handling

---
 crates/iota-network/src/randomness/tests.rs | 30 +++++++++++++++------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/crates/iota-network/src/randomness/tests.rs b/crates/iota-network/src/randomness/tests.rs
index ff6906369bb..58aac6014d1 100644
--- a/crates/iota-network/src/randomness/tests.rs
+++ b/crates/iota-network/src/randomness/tests.rs
@@ -438,12 +438,29 @@ async fn test_byzantine_peer_handling() {
             None,
         );
     }
+
+    // Use tokio timeout to ensure the test has sometime to meet expected results.
+    async fn receive_with_timeout(
+        rx: &mut mpsc::Receiver<(u64, RandomnessRound, Vec<u8>)>,
+        expected_epoch: u64,
+        expected_round: u64,
+    ) -> Result<(), ()> {
+        let timeout = std::time::Duration::from_secs(30);
+        let start = std::time::Instant::now();
+        while start.elapsed() < timeout {
+            if let Some((epoch, round, bytes)) = rx.recv().await {
+                if epoch == expected_epoch && round.0 == expected_round && !bytes.is_empty() {
+                    return Ok(());
+                }
+            }
+            tokio::time::sleep(std::time::Duration::from_millis(100)).await;
+        }
+        Err(())
+    }
+
     for rx in &mut randomness_rxs[2..] {
         // Validators (2, 3) can communicate normally.
-        let (epoch, round, bytes) = rx.recv().await.unwrap();
-        assert_eq!(0, epoch);
-        assert_eq!(0, round.0);
-        assert_ne!(0, bytes.len());
+        receive_with_timeout(rx, 0, 0).await.unwrap();
     }
     for rx in &mut randomness_rxs[..2] {
         // Validators (0, 1) are byzantine.
@@ -473,10 +490,7 @@ async fn test_byzantine_peer_handling() {
     }
     for rx in &mut randomness_rxs[..2] {
         // Validators (0, 1) can communicate normally in new epoch.
-        let (epoch, round, bytes) = rx.recv().await.unwrap();
-        assert_eq!(1, epoch);
-        assert_eq!(0, round.0);
-        assert_ne!(0, bytes.len());
+        receive_with_timeout(rx, 1, 0).await.unwrap();
     }
     for rx in &mut randomness_rxs[2..] {
         // Validators (2, 3) are still on old epoch.

From 6d15e20e4586ad3830960623d15e5b0ce04d04cd Mon Sep 17 00:00:00 2001
From: jkrvivian <jkrvivian@gmail.com>
Date: Tue, 4 Mar 2025 14:32:41 +0800
Subject: [PATCH 05/21] fix: Resolve comments

---
 crates/iota-network/src/randomness/tests.rs | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/crates/iota-network/src/randomness/tests.rs b/crates/iota-network/src/randomness/tests.rs
index 58aac6014d1..a538250e080 100644
--- a/crates/iota-network/src/randomness/tests.rs
+++ b/crates/iota-network/src/randomness/tests.rs
@@ -439,7 +439,7 @@ async fn test_byzantine_peer_handling() {
         );
     }
 
-    // Use tokio timeout to ensure the test has sometime to meet expected results.
+    // Use tokio timeout to ensure the test has some time to meet expected results.
     async fn receive_with_timeout(
         rx: &mut mpsc::Receiver<(u64, RandomnessRound, Vec<u8>)>,
         expected_epoch: u64,
@@ -460,7 +460,9 @@ async fn test_byzantine_peer_handling() {
 
     for rx in &mut randomness_rxs[2..] {
         // Validators (2, 3) can communicate normally.
-        receive_with_timeout(rx, 0, 0).await.unwrap();
+        receive_with_timeout(rx, 0, 0)
+            .await
+            .expect("Validators (2, 3) should receive randomness in epoch 0, round 0");
     }
     for rx in &mut randomness_rxs[..2] {
         // Validators (0, 1) are byzantine.
@@ -490,7 +492,9 @@ async fn test_byzantine_peer_handling() {
     }
     for rx in &mut randomness_rxs[..2] {
         // Validators (0, 1) can communicate normally in new epoch.
-        receive_with_timeout(rx, 1, 0).await.unwrap();
+        receive_with_timeout(rx, 1, 0)
+            .await
+            .expect("Validators (0, 1) should receive randomness in epoch 1, round 0");
     }
     for rx in &mut randomness_rxs[2..] {
         // Validators (2, 3) are still on old epoch.

From 09a2989ddbf99ff11d0949853d8a447f7e8dd6b2 Mon Sep 17 00:00:00 2001
From: jkrvivian <jkrvivian@gmail.com>
Date: Wed, 5 Mar 2025 14:42:23 +0800
Subject: [PATCH 06/21] fix: Check the first received result in
 receive_with_timeout

---
 crates/iota-network/src/randomness/tests.rs | 22 +++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/crates/iota-network/src/randomness/tests.rs b/crates/iota-network/src/randomness/tests.rs
index a538250e080..31aa337c8f2 100644
--- a/crates/iota-network/src/randomness/tests.rs
+++ b/crates/iota-network/src/randomness/tests.rs
@@ -445,17 +445,23 @@ async fn test_byzantine_peer_handling() {
         expected_epoch: u64,
         expected_round: u64,
     ) -> Result<(), ()> {
-        let timeout = std::time::Duration::from_secs(30);
-        let start = std::time::Instant::now();
-        while start.elapsed() < timeout {
-            if let Some((epoch, round, bytes)) = rx.recv().await {
-                if epoch == expected_epoch && round.0 == expected_round && !bytes.is_empty() {
-                    return Ok(());
+        loop {
+            tokio::select! {
+                received = rx.recv() => match received {
+                    Some((epoch, round, bytes)) => {
+                        assert_eq!(expected_epoch, epoch);
+                        assert_eq!(expected_round, round.0);
+                        assert_ne!(0, bytes.len());
+
+                        return Ok(());
+                    },
+                    None => tokio::time::sleep(std::time::Duration::from_millis(100)).await,
+                },
+                _ = tokio::time::sleep(std::time::Duration::from_secs(30)) => {
+                    return Err(());
                 }
             }
-            tokio::time::sleep(std::time::Duration::from_millis(100)).await;
         }
-        Err(())
     }
 
     for rx in &mut randomness_rxs[2..] {

From 0d711edfdc77d6bb56ab154b1d1612e2e480925d Mon Sep 17 00:00:00 2001
From: jkrvivian <jkrvivian@gmail.com>
Date: Wed, 5 Mar 2025 15:14:44 +0800
Subject: [PATCH 07/21] fix: Add err message to receive_with_timeout

---
 crates/iota-network/src/randomness/tests.rs | 30 ++++++++++-----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/crates/iota-network/src/randomness/tests.rs b/crates/iota-network/src/randomness/tests.rs
index 31aa337c8f2..41e0eb81b4b 100644
--- a/crates/iota-network/src/randomness/tests.rs
+++ b/crates/iota-network/src/randomness/tests.rs
@@ -444,22 +444,22 @@ async fn test_byzantine_peer_handling() {
         rx: &mut mpsc::Receiver<(u64, RandomnessRound, Vec<u8>)>,
         expected_epoch: u64,
         expected_round: u64,
-    ) -> Result<(), ()> {
-        loop {
-            tokio::select! {
-                received = rx.recv() => match received {
-                    Some((epoch, round, bytes)) => {
-                        assert_eq!(expected_epoch, epoch);
-                        assert_eq!(expected_round, round.0);
-                        assert_ne!(0, bytes.len());
-
-                        return Ok(());
-                    },
-                    None => tokio::time::sleep(std::time::Duration::from_millis(100)).await,
+    ) -> Result<(), String> {
+        tokio::select! {
+            received = rx.recv() => match received {
+                Some((epoch, round, bytes)) => {
+                    assert_eq!(expected_epoch, epoch);
+                    assert_eq!(expected_round, round.0);
+                    assert_ne!(0, bytes.len());
+
+                    Ok(())
                 },
-                _ = tokio::time::sleep(std::time::Duration::from_secs(30)) => {
-                    return Err(());
-                }
+                None => {
+                    Err("Randomness channels has been closed".to_string())
+                },
+            },
+            _ = tokio::time::sleep(std::time::Duration::from_secs(30)) => {
+                return Err("Timeout expired to receive randomness".to_string());
             }
         }
     }

From 9e44698a5f761958ab32750c952eaca4a782b4ab Mon Sep 17 00:00:00 2001
From: jkrvivian <jkrvivian@gmail.com>
Date: Wed, 5 Mar 2025 15:48:54 +0800
Subject: [PATCH 08/21] fix: Fix clippy errors

---
 crates/iota-network/src/randomness/tests.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/iota-network/src/randomness/tests.rs b/crates/iota-network/src/randomness/tests.rs
index 41e0eb81b4b..11198fa3f77 100644
--- a/crates/iota-network/src/randomness/tests.rs
+++ b/crates/iota-network/src/randomness/tests.rs
@@ -459,7 +459,7 @@ async fn test_byzantine_peer_handling() {
                 },
             },
             _ = tokio::time::sleep(std::time::Duration::from_secs(30)) => {
-                return Err("Timeout expired to receive randomness".to_string());
+                Err("Timeout expired to receive randomness".to_string())
             }
         }
     }

From 03348694ee0ce39105ec11b285b35c9ffb2058a9 Mon Sep 17 00:00:00 2001
From: Vlad Semenov <vlad.semenov@iota.org>
Date: Thu, 6 Mar 2025 22:08:59 +0300
Subject: [PATCH 09/21] fix: increase client send_signatures request timeout

---
 crates/iota-network/src/randomness/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/iota-network/src/randomness/mod.rs b/crates/iota-network/src/randomness/mod.rs
index 3ada502b18e..06690023998 100644
--- a/crates/iota-network/src/randomness/mod.rs
+++ b/crates/iota-network/src/randomness/mod.rs
@@ -982,7 +982,7 @@ impl RandomnessEventLoop {
                     continue; // don't send partial sigs to self
                 }
                 let mut client = RandomnessClient::new(peer.clone());
-                const SEND_PARTIAL_SIGNATURES_TIMEOUT: Duration = Duration::from_secs(10);
+                const SEND_PARTIAL_SIGNATURES_TIMEOUT: Duration = Duration::from_secs(30);
                 let full_sig = full_sig.get().cloned();
                 let request = anemo::Request::new(SendSignaturesRequest {
                     epoch,

From c927c33f51b47652044a6fb84558e7eccbef0541 Mon Sep 17 00:00:00 2001
From: jkrvivian <jkrvivian@gmail.com>
Date: Fri, 7 Mar 2025 18:35:01 +0800
Subject: [PATCH 10/21] revert: Revert adding receive_with_timeout

---
 crates/iota-network/src/randomness/tests.rs | 37 ++++++---------------
 1 file changed, 10 insertions(+), 27 deletions(-)

diff --git a/crates/iota-network/src/randomness/tests.rs b/crates/iota-network/src/randomness/tests.rs
index 11198fa3f77..46b6aaced88 100644
--- a/crates/iota-network/src/randomness/tests.rs
+++ b/crates/iota-network/src/randomness/tests.rs
@@ -439,36 +439,15 @@ async fn test_byzantine_peer_handling() {
         );
     }
 
-    // Use tokio timeout to ensure the test has some time to meet expected results.
-    async fn receive_with_timeout(
-        rx: &mut mpsc::Receiver<(u64, RandomnessRound, Vec<u8>)>,
-        expected_epoch: u64,
-        expected_round: u64,
-    ) -> Result<(), String> {
-        tokio::select! {
-            received = rx.recv() => match received {
-                Some((epoch, round, bytes)) => {
-                    assert_eq!(expected_epoch, epoch);
-                    assert_eq!(expected_round, round.0);
-                    assert_ne!(0, bytes.len());
-
-                    Ok(())
-                },
-                None => {
-                    Err("Randomness channels has been closed".to_string())
-                },
-            },
-            _ = tokio::time::sleep(std::time::Duration::from_secs(30)) => {
-                Err("Timeout expired to receive randomness".to_string())
-            }
-        }
-    }
-
     for rx in &mut randomness_rxs[2..] {
         // Validators (2, 3) can communicate normally.
-        receive_with_timeout(rx, 0, 0)
+        let (epoch, round, bytes) = rx
+            .recv()
             .await
             .expect("Validators (2, 3) should receive randomness in epoch 0, round 0");
+        assert_eq!(0, epoch);
+        assert_eq!(0, round.0);
+        assert_ne!(0, bytes.len());
     }
     for rx in &mut randomness_rxs[..2] {
         // Validators (0, 1) are byzantine.
@@ -498,9 +477,13 @@ async fn test_byzantine_peer_handling() {
     }
     for rx in &mut randomness_rxs[..2] {
         // Validators (0, 1) can communicate normally in new epoch.
-        receive_with_timeout(rx, 1, 0)
+        let (epoch, round, bytes) = rx
+            .recv()
             .await
             .expect("Validators (0, 1) should receive randomness in epoch 1, round 0");
+        assert_eq!(1, epoch);
+        assert_eq!(0, round.0);
+        assert_ne!(0, bytes.len());
     }
     for rx in &mut randomness_rxs[2..] {
         // Validators (2, 3) are still on old epoch.

From 21260a2db1c0f1669d369b7a3e2f15df131c42dd Mon Sep 17 00:00:00 2001
From: Vlad Semenov <vlad.semenov@iota.org>
Date: Fri, 7 Mar 2025 14:55:04 +0300
Subject: [PATCH 11/21] fix: add safety timeouts as
 test_byzantine_peer_handling can loop forever

---
 crates/iota-network/src/randomness/tests.rs | 49 ++++++++++++++-------
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/crates/iota-network/src/randomness/tests.rs b/crates/iota-network/src/randomness/tests.rs
index 46b6aaced88..8606ec7da59 100644
--- a/crates/iota-network/src/randomness/tests.rs
+++ b/crates/iota-network/src/randomness/tests.rs
@@ -439,15 +439,25 @@ async fn test_byzantine_peer_handling() {
         );
     }
 
-    for rx in &mut randomness_rxs[2..] {
+    // This test can just deadlock, ie. run indefinitely without making any progress.
+    // We can control it by waiting on expected randomness for `timeout` secs.
+    // For some reason it takes so much time for honest peers to produce randomness in presence of byzantine peers.
+    let timeout = std::time::Duration::from_secs(60);
+    let (rx2_mut, rx3_mut) = randomness_rxs.split_at_mut(3);
+    let rnd2_fut = rx2_mut[2].recv();
+    let rnd3_fut = rx3_mut[0].recv();
+    tokio::select! {
         // Validators (2, 3) can communicate normally.
-        let (epoch, round, bytes) = rx
-            .recv()
-            .await
-            .expect("Validators (2, 3) should receive randomness in epoch 0, round 0");
-        assert_eq!(0, epoch);
-        assert_eq!(0, round.0);
-        assert_ne!(0, bytes.len());
+        rnds = futures::future::join_all([rnd2_fut, rnd3_fut]) => {
+            for rnd in rnds {
+                let (epoch, round, bytes) = rnd
+                    .expect("Validators (2, 3) should receive randomness in epoch 0, round 0");
+                assert_eq!(0, epoch);
+                assert_eq!(0, round.0);
+                assert_ne!(0, bytes.len());
+            }
+        },
+        _ = tokio::time::sleep(timeout) => panic!("Timeout expired to receive randomness"),
     }
     for rx in &mut randomness_rxs[..2] {
         // Validators (0, 1) are byzantine.
@@ -475,15 +485,22 @@ async fn test_byzantine_peer_handling() {
             None,
         );
     }
-    for rx in &mut randomness_rxs[..2] {
+
+    let (rx0_mut, rx1_mut) = randomness_rxs.split_at_mut(1);
+    let rnd0_fut = rx0_mut[0].recv();
+    let rnd1_fut = rx1_mut[0].recv();
+    tokio::select! {
         // Validators (0, 1) can communicate normally in new epoch.
-        let (epoch, round, bytes) = rx
-            .recv()
-            .await
-            .expect("Validators (0, 1) should receive randomness in epoch 1, round 0");
-        assert_eq!(1, epoch);
-        assert_eq!(0, round.0);
-        assert_ne!(0, bytes.len());
+        rnds = futures::future::join_all([rnd0_fut, rnd1_fut]) => {
+            for rnd in rnds {
+                let (epoch, round, bytes) = rnd
+                    .expect("Validators (0, 1) should receive randomness in epoch 1, round 0");
+                assert_eq!(1, epoch);
+                assert_eq!(0, round.0);
+                assert_ne!(0, bytes.len());
+            }
+        },
+        _ = tokio::time::sleep(timeout) => panic!("Timeout expired to receive randomness"),
     }
     for rx in &mut randomness_rxs[2..] {
         // Validators (2, 3) are still on old epoch.

From 5dbe3840d2329355b6dbf0237d6660bed61ff96b Mon Sep 17 00:00:00 2001
From: Vlad Semenov <vlad.semenov@iota.org>
Date: Fri, 7 Mar 2025 15:32:53 +0300
Subject: [PATCH 12/21] fix: ci-fmt

---
 crates/iota-network/src/randomness/tests.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/crates/iota-network/src/randomness/tests.rs b/crates/iota-network/src/randomness/tests.rs
index 8606ec7da59..e75df778af9 100644
--- a/crates/iota-network/src/randomness/tests.rs
+++ b/crates/iota-network/src/randomness/tests.rs
@@ -439,9 +439,10 @@ async fn test_byzantine_peer_handling() {
         );
     }
 
-    // This test can just deadlock, ie. run indefinitely without making any progress.
-    // We can control it by waiting on expected randomness for `timeout` secs.
-    // For some reason it takes so much time for honest peers to produce randomness in presence of byzantine peers.
+    // This test can just deadlock, ie. run indefinitely without making any
+    // progress. We can control it by waiting on expected randomness for
+    // `timeout` secs. For some reason it takes so much time for honest peers to
+    // produce randomness in presence of byzantine peers.
     let timeout = std::time::Duration::from_secs(60);
     let (rx2_mut, rx3_mut) = randomness_rxs.split_at_mut(3);
     let rnd2_fut = rx2_mut[2].recv();

From 81f71e6b9e4a906c07459b6659fe6ca23a5fcdd8 Mon Sep 17 00:00:00 2001
From: Vlad Semenov <vlad.semenov@iota.org>
Date: Fri, 7 Mar 2025 15:55:13 +0300
Subject: [PATCH 13/21] fix: use timeout function

---
 crates/iota-network/src/randomness/tests.rs | 46 ++++++++++-----------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/crates/iota-network/src/randomness/tests.rs b/crates/iota-network/src/randomness/tests.rs
index e75df778af9..6ddc6d75a2e 100644
--- a/crates/iota-network/src/randomness/tests.rs
+++ b/crates/iota-network/src/randomness/tests.rs
@@ -447,18 +447,15 @@ async fn test_byzantine_peer_handling() {
     let (rx2_mut, rx3_mut) = randomness_rxs.split_at_mut(3);
     let rnd2_fut = rx2_mut[2].recv();
     let rnd3_fut = rx3_mut[0].recv();
-    tokio::select! {
-        // Validators (2, 3) can communicate normally.
-        rnds = futures::future::join_all([rnd2_fut, rnd3_fut]) => {
-            for rnd in rnds {
-                let (epoch, round, bytes) = rnd
-                    .expect("Validators (2, 3) should receive randomness in epoch 0, round 0");
-                assert_eq!(0, epoch);
-                assert_eq!(0, round.0);
-                assert_ne!(0, bytes.len());
-            }
-        },
-        _ = tokio::time::sleep(timeout) => panic!("Timeout expired to receive randomness"),
+    // Validators (2, 3) can communicate normally.
+    let rnds = tokio::time::timeout(timeout, futures::future::join_all([rnd2_fut, rnd3_fut]))
+        .await
+        .expect("Honest peers (2, 3) should produce randomness in time");
+    for rnd in rnds {
+        let (epoch, round, bytes) = rnd.expect("Channel is not closed and randomness is produced");
+        assert_eq!(0, epoch, "Honest peers produce randomness in epoch 0");
+        assert_eq!(0, round.0, "Honest peers produce randomness in round 0");
+        assert_ne!(0, bytes.len(), "Honest peers produce non-empty randomness");
     }
     for rx in &mut randomness_rxs[..2] {
         // Validators (0, 1) are byzantine.
@@ -490,18 +487,19 @@ async fn test_byzantine_peer_handling() {
     let (rx0_mut, rx1_mut) = randomness_rxs.split_at_mut(1);
     let rnd0_fut = rx0_mut[0].recv();
     let rnd1_fut = rx1_mut[0].recv();
-    tokio::select! {
-        // Validators (0, 1) can communicate normally in new epoch.
-        rnds = futures::future::join_all([rnd0_fut, rnd1_fut]) => {
-            for rnd in rnds {
-                let (epoch, round, bytes) = rnd
-                    .expect("Validators (0, 1) should receive randomness in epoch 1, round 0");
-                assert_eq!(1, epoch);
-                assert_eq!(0, round.0);
-                assert_ne!(0, bytes.len());
-            }
-        },
-        _ = tokio::time::sleep(timeout) => panic!("Timeout expired to receive randomness"),
+    // Validators (0, 1) can communicate normally in new epoch.
+    let rnds = tokio::time::timeout(timeout, futures::future::join_all([rnd0_fut, rnd1_fut]))
+        .await
+        .expect("Byzantine peers (0, 1) should produce randomness in time");
+    for rnd in rnds {
+        let (epoch, round, bytes) = rnd.expect("Channel is not closed and randomness is produced");
+        assert_eq!(1, epoch, "Byzantine peers produce randomness in epoch 1");
+        assert_eq!(0, round.0, "Byzantine peers produce randomness in round 0");
+        assert_ne!(
+            0,
+            bytes.len(),
+            "Byzantine peers produce non-empty randomness"
+        );
     }
     for rx in &mut randomness_rxs[2..] {
         // Validators (2, 3) are still on old epoch.

From 1666d916443e8b8ca8d26cb00323f37bca119074 Mon Sep 17 00:00:00 2001
From: Vlad Semenov <vlad.semenov@iota.org>
Date: Fri, 7 Mar 2025 21:41:07 +0300
Subject: [PATCH 14/21] fix: increase timeout for tests only

---
 crates/iota-network/src/randomness/mod.rs | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/crates/iota-network/src/randomness/mod.rs b/crates/iota-network/src/randomness/mod.rs
index 06690023998..7006f87c12a 100644
--- a/crates/iota-network/src/randomness/mod.rs
+++ b/crates/iota-network/src/randomness/mod.rs
@@ -982,7 +982,15 @@ impl RandomnessEventLoop {
                     continue; // don't send partial sigs to self
                 }
                 let mut client = RandomnessClient::new(peer.clone());
-                const SEND_PARTIAL_SIGNATURES_TIMEOUT: Duration = Duration::from_secs(30);
+                // `test_byzantine_peer_handling` built in debug mode takes
+                // longer to verify invalid signatures and thus needs larger
+                // timeouts.
+                #[cfg(test)]
+                const SEND_PARTIAL_SIGNATURES_TIMEOUT: Duration = Duration::from_secs(100);
+                // In release signature verification should take less, so
+                // smaller timeout should be enough.
+                #[cfg(not(test))]
+                const SEND_PARTIAL_SIGNATURES_TIMEOUT: Duration = Duration::from_secs(10);
                 let full_sig = full_sig.get().cloned();
                 let request = anemo::Request::new(SendSignaturesRequest {
                     epoch,

From 2d1f0b5b26b41f22cc410c15116c5a56b3242530 Mon Sep 17 00:00:00 2001
From: Vlad Semenov <vlad.semenov@iota.org>
Date: Mon, 10 Mar 2025 16:07:51 +0300
Subject: [PATCH 15/21] fix: revert test timeout overkill

---
 crates/iota-network/src/randomness/tests.rs | 52 +++++++--------------
 1 file changed, 18 insertions(+), 34 deletions(-)

diff --git a/crates/iota-network/src/randomness/tests.rs b/crates/iota-network/src/randomness/tests.rs
index 6ddc6d75a2e..46b6aaced88 100644
--- a/crates/iota-network/src/randomness/tests.rs
+++ b/crates/iota-network/src/randomness/tests.rs
@@ -439,23 +439,15 @@ async fn test_byzantine_peer_handling() {
         );
     }
 
-    // This test can just deadlock, ie. run indefinitely without making any
-    // progress. We can control it by waiting on expected randomness for
-    // `timeout` secs. For some reason it takes so much time for honest peers to
-    // produce randomness in presence of byzantine peers.
-    let timeout = std::time::Duration::from_secs(60);
-    let (rx2_mut, rx3_mut) = randomness_rxs.split_at_mut(3);
-    let rnd2_fut = rx2_mut[2].recv();
-    let rnd3_fut = rx3_mut[0].recv();
-    // Validators (2, 3) can communicate normally.
-    let rnds = tokio::time::timeout(timeout, futures::future::join_all([rnd2_fut, rnd3_fut]))
-        .await
-        .expect("Honest peers (2, 3) should produce randomness in time");
-    for rnd in rnds {
-        let (epoch, round, bytes) = rnd.expect("Channel is not closed and randomness is produced");
-        assert_eq!(0, epoch, "Honest peers produce randomness in epoch 0");
-        assert_eq!(0, round.0, "Honest peers produce randomness in round 0");
-        assert_ne!(0, bytes.len(), "Honest peers produce non-empty randomness");
+    for rx in &mut randomness_rxs[2..] {
+        // Validators (2, 3) can communicate normally.
+        let (epoch, round, bytes) = rx
+            .recv()
+            .await
+            .expect("Validators (2, 3) should receive randomness in epoch 0, round 0");
+        assert_eq!(0, epoch);
+        assert_eq!(0, round.0);
+        assert_ne!(0, bytes.len());
     }
     for rx in &mut randomness_rxs[..2] {
         // Validators (0, 1) are byzantine.
@@ -483,23 +475,15 @@ async fn test_byzantine_peer_handling() {
             None,
         );
     }
-
-    let (rx0_mut, rx1_mut) = randomness_rxs.split_at_mut(1);
-    let rnd0_fut = rx0_mut[0].recv();
-    let rnd1_fut = rx1_mut[0].recv();
-    // Validators (0, 1) can communicate normally in new epoch.
-    let rnds = tokio::time::timeout(timeout, futures::future::join_all([rnd0_fut, rnd1_fut]))
-        .await
-        .expect("Byzantine peers (0, 1) should produce randomness in time");
-    for rnd in rnds {
-        let (epoch, round, bytes) = rnd.expect("Channel is not closed and randomness is produced");
-        assert_eq!(1, epoch, "Byzantine peers produce randomness in epoch 1");
-        assert_eq!(0, round.0, "Byzantine peers produce randomness in round 0");
-        assert_ne!(
-            0,
-            bytes.len(),
-            "Byzantine peers produce non-empty randomness"
-        );
+    for rx in &mut randomness_rxs[..2] {
+        // Validators (0, 1) can communicate normally in new epoch.
+        let (epoch, round, bytes) = rx
+            .recv()
+            .await
+            .expect("Validators (0, 1) should receive randomness in epoch 1, round 0");
+        assert_eq!(1, epoch);
+        assert_eq!(0, round.0);
+        assert_ne!(0, bytes.len());
     }
     for rx in &mut randomness_rxs[2..] {
         // Validators (2, 3) are still on old epoch.

From c859223ac8ad19cfaeb69681418f6bb4f4f673c2 Mon Sep 17 00:00:00 2001
From: Vlad Semenov <vlad.semenov@iota.org>
Date: Mon, 10 Mar 2025 16:09:21 +0300
Subject: [PATCH 16/21] test(ignore): test_byzantine_peer_handling still fails
 on arm64, needs investigation

---
 crates/iota-network/src/randomness/tests.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crates/iota-network/src/randomness/tests.rs b/crates/iota-network/src/randomness/tests.rs
index 46b6aaced88..058061c7bd5 100644
--- a/crates/iota-network/src/randomness/tests.rs
+++ b/crates/iota-network/src/randomness/tests.rs
@@ -371,6 +371,7 @@ async fn test_restart_recovery() {
 }
 
 #[tokio::test]
+#[ignore = "https://github.com/iotaledger/iota/issues/5620"]
 async fn test_byzantine_peer_handling() {
     telemetry_subscribers::init_for_testing();
     let committee_fixture = CommitteeFixture::generate(rand::rngs::OsRng, 0, 4);

From 39237fe7cb66785740eadb15f14af73cb2b76747 Mon Sep 17 00:00:00 2001
From: Vlad Semenov <vlad.semenov@iota.org>
Date: Mon, 10 Mar 2025 16:41:03 +0300
Subject: [PATCH 17/21] fix: increate timeout value just of arm64 runners to
 check if it works

---
 crates/iota-network/src/randomness/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/iota-network/src/randomness/mod.rs b/crates/iota-network/src/randomness/mod.rs
index 7006f87c12a..997e9f1e823 100644
--- a/crates/iota-network/src/randomness/mod.rs
+++ b/crates/iota-network/src/randomness/mod.rs
@@ -986,7 +986,7 @@ impl RandomnessEventLoop {
                 // longer to verify invalid signatures and thus needs larger
                 // timeouts.
                 #[cfg(test)]
-                const SEND_PARTIAL_SIGNATURES_TIMEOUT: Duration = Duration::from_secs(100);
+                const SEND_PARTIAL_SIGNATURES_TIMEOUT: Duration = Duration::from_secs(500);
                 // In release signature verification should take less, so
                 // smaller timeout should be enough.
                 #[cfg(not(test))]

From d897114787a911f02d5ab4c932d2cbcd59cb5544 Mon Sep 17 00:00:00 2001
From: Vlad Semenov <vlad.semenov@iota.org>
Date: Mon, 10 Mar 2025 18:43:28 +0300
Subject: [PATCH 18/21] Revert "test(ignore): test_byzantine_peer_handling
 still fails on arm64, needs investigation"

This reverts commit 91be397ff8c7171161a0f1a2a8c60aaee8166bde.
---
 crates/iota-network/src/randomness/tests.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/crates/iota-network/src/randomness/tests.rs b/crates/iota-network/src/randomness/tests.rs
index 058061c7bd5..46b6aaced88 100644
--- a/crates/iota-network/src/randomness/tests.rs
+++ b/crates/iota-network/src/randomness/tests.rs
@@ -371,7 +371,6 @@ async fn test_restart_recovery() {
 }
 
 #[tokio::test]
-#[ignore = "https://github.com/iotaledger/iota/issues/5620"]
 async fn test_byzantine_peer_handling() {
     telemetry_subscribers::init_for_testing();
     let committee_fixture = CommitteeFixture::generate(rand::rngs::OsRng, 0, 4);

From 9b54c4a4408503a668b493b9fa0ff04b9d594507 Mon Sep 17 00:00:00 2001
From: Vlad Semenov <vlad.semenov@iota.org>
Date: Tue, 11 Mar 2025 18:14:11 +0300
Subject: [PATCH 19/21] ci: run rust tests on self-hosted-arm64 to test if
 randomness timeout fix works

---
 .github/workflows/_rust_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_rust_tests.yml b/.github/workflows/_rust_tests.yml
index 22903ca8b9c..824cd858bda 100644
--- a/.github/workflows/_rust_tests.yml
+++ b/.github/workflows/_rust_tests.yml
@@ -61,7 +61,7 @@ jobs:
     if: |
       !cancelled() && (inputs.isRust || inputs.isPgIntegration || inputs.isMoveExampleUsedByOthers)
     timeout-minutes: 90
-    runs-on: [self-hosted-x64]
+    runs-on: [self-hosted-arm64]
     env:
       POSTGRES_USER: postgres
       POSTGRES_PASSWORD: postgrespw

From 69163f262169b447147dac811d9d527af83b59f6 Mon Sep 17 00:00:00 2001
From: Vlad Semenov <vlad.semenov@iota.org>
Date: Tue, 11 Mar 2025 19:46:37 +0300
Subject: [PATCH 20/21] Revert "ci: run rust tests on self-hosted-arm64 to test
 if randomness timeout fix works"

test_byzantine_peer_handling succeeded on selfhosted-arm64 with 500 sec timeout:
https://github.com/iotaledger/iota/actions/runs/13791724155/job/38573592500#step:9:2993

This reverts commit a4511f2c08ba0366774e142567ca56f43a35b3f7.
---
 .github/workflows/_rust_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_rust_tests.yml b/.github/workflows/_rust_tests.yml
index 824cd858bda..22903ca8b9c 100644
--- a/.github/workflows/_rust_tests.yml
+++ b/.github/workflows/_rust_tests.yml
@@ -61,7 +61,7 @@ jobs:
     if: |
       !cancelled() && (inputs.isRust || inputs.isPgIntegration || inputs.isMoveExampleUsedByOthers)
     timeout-minutes: 90
-    runs-on: [self-hosted-arm64]
+    runs-on: [self-hosted-x64]
     env:
       POSTGRES_USER: postgres
       POSTGRES_PASSWORD: postgrespw

From c4f4882e8e15620fddac6c28bc99798054ea182e Mon Sep 17 00:00:00 2001
From: muXxer <git@muxxer.de>
Date: Wed, 12 Mar 2025 12:42:00 +0100
Subject: [PATCH 21/21] fix: align SEND_PARTIAL_SIGNATURES_TIMEOUT to nextest
 timeout

---
 crates/iota-network/src/randomness/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/iota-network/src/randomness/mod.rs b/crates/iota-network/src/randomness/mod.rs
index 997e9f1e823..30085346d0a 100644
--- a/crates/iota-network/src/randomness/mod.rs
+++ b/crates/iota-network/src/randomness/mod.rs
@@ -986,7 +986,7 @@ impl RandomnessEventLoop {
                 // longer to verify invalid signatures and thus needs larger
                 // timeouts.
                 #[cfg(test)]
-                const SEND_PARTIAL_SIGNATURES_TIMEOUT: Duration = Duration::from_secs(500);
+                const SEND_PARTIAL_SIGNATURES_TIMEOUT: Duration = Duration::from_secs(300);
                 // In release signature verification should take less, so
                 // smaller timeout should be enough.
                 #[cfg(not(test))]