Skip to content

Commit

Permalink
fix(ci): split NU5 sync into two GitHub actions jobs (#4840)
Browse files Browse the repository at this point in the history
* Split the NU5 job at block 1,740,000

* Fix the split regex

* Fix the lightwalletd sync to tip regex
  • Loading branch information
teor2345 authored Jul 29, 2022
1 parent 1cad4c5 commit 65b0a8b
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 15 deletions.
59 changes: 58 additions & 1 deletion .github/workflows/deploy-gcp-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -629,10 +629,67 @@ jobs:
'(estimated progress.*network_upgrade.*=.*Nu5)|(test result:.*finished in)' \
"
# follow the logs of the test we just launched, up to block 1,740,000 (or the test finishing)
#
# We chose this height because it was about 5 hours into the NU5 sync, at the end of July 2022.
# This is a temporary workaround until we improve sync speeds.
logs-1740k:
name: Log ${{ inputs.test_id }} test (1740k)
needs: [ logs-canopy ]
# If the previous job fails, we still want to show the logs.
if: ${{ !cancelled() }}
runs-on: ubuntu-latest
permissions:
contents: 'read'
id-token: 'write'
steps:
- uses: actions/[email protected]
with:
persist-credentials: false
fetch-depth: '2'

- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4
with:
short-length: 7

- name: Downcase network name for disks
run: |
NETWORK_CAPS=${{ inputs.network }}
echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
# Setup gcloud CLI
- name: Authenticate to Google Cloud
id: auth
uses: google-github-actions/[email protected]
with:
workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
service_account: '[email protected]'
token_format: 'access_token'

# Show recent logs, following until block 1,740,000 (or the test finishes)
- name: Show logs for ${{ inputs.test_id }} test (1740k)
run: |
gcloud compute ssh \
${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
--zone ${{ env.ZONE }} \
--quiet \
--ssh-flag="-o ServerAliveInterval=5" \
--command \
"\
docker logs \
--tail all \
--follow \
${{ inputs.test_id }} | \
tee --output-error=exit /dev/stderr | \
grep --max-count=1 --extended-regexp --color=always \
'(estimated progress.*current_height.*=.*174[0-9][0-9][0-9][0-9].*remaining_sync_blocks)|(test result:.*finished in)' \
"
# follow the logs of the test we just launched, up to the last checkpoint (or the test finishing)
logs-checkpoint:
name: Log ${{ inputs.test_id }} test (checkpoint)
needs: [ logs-canopy ]
needs: [ logs-1740k ]
# If the previous job fails, we still want to show the logs.
if: ${{ !cancelled() }}
runs-on: ubuntu-latest
Expand Down
12 changes: 4 additions & 8 deletions zebrad/tests/acceptance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,9 @@ use common::{
},
sync::{
create_cached_database_height, sync_until, MempoolBehavior, LARGE_CHECKPOINT_TEST_HEIGHT,
LARGE_CHECKPOINT_TIMEOUT, MEDIUM_CHECKPOINT_TEST_HEIGHT, STOP_AT_HEIGHT_REGEX,
STOP_ON_LOAD_TIMEOUT, SYNC_FINISHED_REGEX, TINY_CHECKPOINT_TEST_HEIGHT,
TINY_CHECKPOINT_TIMEOUT,
LARGE_CHECKPOINT_TIMEOUT, LIGHTWALLETD_SYNC_FINISHED_REGEX, MEDIUM_CHECKPOINT_TEST_HEIGHT,
STOP_AT_HEIGHT_REGEX, STOP_ON_LOAD_TIMEOUT, SYNC_FINISHED_REGEX,
TINY_CHECKPOINT_TEST_HEIGHT, TINY_CHECKPOINT_TIMEOUT,
},
};

Expand Down Expand Up @@ -1440,14 +1440,10 @@ fn lightwalletd_integration_test(test_type: LightwalletdTestType) -> Result<()>
// But if the logs just stop, we can't tell the difference between a hang and fully synced.
// So we assume `lightwalletd` will sync and log large groups of blocks,
// and check for logs with heights near the mainnet tip height.
//
// TODO: update the regex to `1[8-9][0-9]{5}` when mainnet reaches block 1_800_000
let lightwalletd_thread = std::thread::spawn(move || -> Result<_> {
tracing::info!(?test_type, "waiting for lightwalletd to sync to the tip");

lightwalletd.expect_stdout_line_matches(
"([Aa]dding block to cache 1[7-9][0-9]{5})|([Ww]aiting for block: 1[7-9][0-9]{5})",
)?;
lightwalletd.expect_stdout_line_matches(LIGHTWALLETD_SYNC_FINISHED_REGEX)?;

Ok(lightwalletd)
});
Expand Down
14 changes: 9 additions & 5 deletions zebrad/tests/common/lightwalletd/wallet_grpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ use tempfile::TempDir;

use zebra_test::{args, net::random_known_port, prelude::*};

use crate::common::{config::testdir, lightwalletd::LightWalletdTestDirExt};
use crate::common::{
config::testdir, lightwalletd::LightWalletdTestDirExt, sync::LIGHTWALLETD_SYNC_FINISHED_REGEX,
};

use super::LightwalletdTestType;

Expand All @@ -20,14 +22,14 @@ pub type LightwalletdRpcClient =
/// using the `lightwalletd_state_path`, with its gRPC server functionality enabled.
///
/// Expects cached state based on the `test_type`.
/// Waits for `lightwalletd` to log "waiting for block" if `wait_for_blocks` is true.
/// Waits for `lightwalletd` to sync to near the tip, if `wait_for_sync` is true.
///
/// Returns the lightwalletd instance and the port number that it is listening for RPC connections.
pub fn spawn_lightwalletd_with_rpc_server(
zebrad_rpc_address: SocketAddr,
lightwalletd_state_path: Option<PathBuf>,
test_type: LightwalletdTestType,
wait_for_blocks: bool,
wait_for_sync: bool,
) -> Result<(TestChild<TempDir>, u16)> {
let lightwalletd_dir = testdir()?.with_lightwalletd_config(zebrad_rpc_address)?;

Expand All @@ -45,9 +47,11 @@ pub fn spawn_lightwalletd_with_rpc_server(
.with_failure_regex_iter(lightwalletd_failure_messages, lightwalletd_ignore_messages);

lightwalletd.expect_stdout_line_matches("Starting gRPC server")?;
if wait_for_blocks {
lightwalletd.expect_stdout_line_matches("[Ww]aiting for block")?;

if wait_for_sync {
lightwalletd.expect_stdout_line_matches(LIGHTWALLETD_SYNC_FINISHED_REGEX)?;
}

Ok((lightwalletd, lightwalletd_rpc_port))
}

Expand Down
10 changes: 9 additions & 1 deletion zebrad/tests/common/sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pub const LARGE_CHECKPOINT_TEST_HEIGHT: Height =

pub const STOP_AT_HEIGHT_REGEX: &str = "stopping at configured height";

/// The text that should be logged when the initial sync finishes at the estimated chain tip.
/// The text that should be logged when Zebra's initial sync finishes at the estimated chain tip.
///
/// This message is only logged if:
/// - we have reached the estimated chain tip,
Expand All @@ -45,6 +45,14 @@ pub const STOP_AT_HEIGHT_REGEX: &str = "stopping at configured height";
pub const SYNC_FINISHED_REGEX: &str =
r"finished initial sync to chain tip, using gossiped blocks .*sync_percent.*=.*100\.";

/// The text that should be logged when `lightwalletd`'s initial sync is near the chain tip.
///
/// We can't guarantee a "Waiting for block" log, so we just check for a block near the tip height.
///
/// TODO: update the regex to `1[8-9][0-9]{5}` when mainnet reaches block 1_800_000
pub const LIGHTWALLETD_SYNC_FINISHED_REGEX: &str =
r"([Aa]dding block to cache 1[7-9][0-9]{5})|([Ww]aiting for block: 1[7-9][0-9]{5})";

/// The maximum amount of time Zebra should take to reload after shutting down.
///
/// This should only take a second, but sometimes CI VMs or RocksDB can be slow.
Expand Down

0 comments on commit 65b0a8b

Please sign in to comment.