Skip to content

Commit

Permalink
Merge branch 'refs/heads/develop' into retry-registration-timeout-fix
Browse files Browse the repository at this point in the history
# Conflicts:
#	activation/activation.go
#	config/mainnet.go
#	config/presets/testnet.go
  • Loading branch information
ConvallariaMaj committed Aug 7, 2024
2 parents 2c53d3e + a79bdbb commit 50e3129
Show file tree
Hide file tree
Showing 136 changed files with 9,640 additions and 1,861 deletions.
20 changes: 12 additions & 8 deletions .github/workflows/api-swagger-ui.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
name: Build and Push Swagger-UI to R2 testnet-api-docs.spacemesh.network
name: Build and Push Swagger-UI

env:
go-version: "1.22"

on:
release:
types: [published]
# Allow manually triggering this workflow
workflow_dispatch:
push:
tags:
- '*'

jobs:
check-version:
Expand Down Expand Up @@ -42,31 +45,32 @@ jobs:
- name: upload to testnet
uses: jakejarvis/s3-sync-action@master
with:
args: --acl public-read --follow-symlinks --delete
args: --acl public-read --follow-symlinks
env:
AWS_S3_BUCKET: ${{ secrets.CLOUDFLARE_TESTNET_API_BUCKET }}
AWS_ACCESS_KEY_ID: ${{ secrets.CLOUDFLARE_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.CLOUDFLARE_SECRET_ACCESS_KEY }}
SOURCE_DIR: api/release/openapi/swagger/src
DEST_DIR: '/${{ github.event.release.tag_name }}'
DEST_DIR: '${{ github.ref_name }}'
AWS_S3_ENDPOINT: https://${{ secrets.CLOUDFLARE_ACCOUNT_ID }}.r2.cloudflarestorage.com

- name: update url json file for testnet
working-directory: api/release/openapi/swagger/src
run: |
mkdir spec && cd spec
curl -o spec_urls.json https://testnet-api-docs.spacemesh.network/spec_urls.json
new_url="{\"url\":\"https://testnet-api-docs.spacemesh.network/${{ github.event.release.tag_name }}/api.swagger.json\",\"name\":\"${{ github.event.release.tag_name }}\"}"
new_url="{\"url\":\"https://testnet-api-docs.spacemesh.network/${{ github.ref_name }}/api.swagger.json\",\"name\":\"${{ github.ref_name }}\"}"
jq ". += [$new_url]" spec_urls.json > tmp.json && mv tmp.json spec_urls.json
- name: upload new testnet json file
uses: jakejarvis/s3-sync-action@master
with:
args: --acl public-read --follow-symlinks --delete
args: --acl public-read --follow-symlinks
env:
AWS_S3_BUCKET: ${{ secrets.CLOUDFLARE_TESTNET_API_BUCKET }}
AWS_ACCESS_KEY_ID: ${{ secrets.CLOUDFLARE_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.CLOUDFLARE_SECRET_ACCESS_KEY }}
SOURCE_DIR: api/release/openapi/swagger/src/spec_urls.json
SOURCE_DIR: api/release/openapi/swagger/src/spec
DEST_DIR: ''
AWS_S3_ENDPOINT: https://${{ secrets.CLOUDFLARE_ACCOUNT_ID }}.r2.cloudflarestorage.com

50 changes: 2 additions & 48 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ jobs:
- name: lint
run: make lint-github-action

build-tools:
build:
runs-on: ${{ matrix.os }}
needs: filter-changes
if: ${{ needs.filter-changes.outputs.nondocchanges == 'true' }}
Expand Down Expand Up @@ -160,6 +160,7 @@ jobs:
with:
check-latest: true
go-version: ${{ env.go-version }}
cache: ${{ runner.arch != 'arm64' }}
- name: setup env
run: make install
- name: build merge-nodes
Expand All @@ -168,52 +169,6 @@ jobs:
run: make gen-p2p-identity
- name: build bootstrapper
run: make bootstrapper

build:
runs-on: ${{ matrix.os }}
needs: filter-changes
if: ${{ needs.filter-changes.outputs.nondocchanges == 'true' }}
strategy:
fail-fast: true
matrix:
os:
- ubuntu-22.04
- ubuntu-latest-arm-8-cores
- macos-13
- [self-hosted, macOS, ARM64, go-spacemesh]
- windows-2022
steps:
- name: Add OpenCL support - Ubuntu
if: ${{ matrix.os == 'ubuntu-22.04' || matrix.os == 'ubuntu-latest-arm-8-cores' }}
run: sudo apt-get update -q && sudo apt-get install -qy ocl-icd-opencl-dev libpocl2
- name: disable Windows Defender - Windows
if: ${{ matrix.os == 'windows-2022' }}
run: |
Set-MpPreference -DisableRealtimeMonitoring $true
- name: Set new git config - Windows
if: ${{ matrix.os == 'windows-2022' }}
run: |
git config --global pack.window 1
git config --global core.compression 0
git config --global http.postBuffer 524288000
- name: checkout
uses: actions/checkout@v4
with:
ssh-key: ${{ secrets.GH_ACTION_PRIVATE_KEY }}
- uses: extractions/netrc@v2
with:
machine: github.com
username: ${{ secrets.GH_ACTION_TOKEN_USER }}
password: ${{ secrets.GH_ACTION_TOKEN }}
if: vars.GOPRIVATE
- name: set up go
uses: actions/setup-go@v5
with:
check-latest: true
go-version: ${{ env.go-version }}
cache: ${{ runner.arch != 'arm64' }}
- name: setup env
run: make install
- name: build
timeout-minutes: 5
run: make build
Expand Down Expand Up @@ -307,7 +262,6 @@ jobs:
- filter-changes
- quicktests
- lint
- build-tools
- build
- unittests
runs-on: ubuntu-22.04
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/systest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:
id: install
uses: azure/setup-kubectl@v4
with:
version: "v1.23.15"
version: "v1.27.16"

- name: Setup gcloud authentication
uses: google-github-actions/auth@v2
Expand Down Expand Up @@ -130,7 +130,6 @@ jobs:
go-version-file: "go.mod"

- name: Run tests
timeout-minutes: 60
env:
test_id: systest-${{ steps.vars.outputs.sha_short }}
storage: premium-rwo=10Gi
Expand Down
21 changes: 20 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,25 @@ See [RELEASE](./RELEASE.md) for workflow instructions.
* [#6035](https://github.com/spacemeshos/go-spacemesh/issues/6035) Fixed an issue where the node retried registering for the PoET round
only for 15-20 minutes instead of continuing until the start of the round

## Release v1.6.6

### Improvements

* [#6198](https://github.com/spacemeshos/go-spacemesh/pull/6198) Configure default TTL for caching poet's /v1/info

* [#6199](https://github.com/spacemeshos/go-spacemesh/pull/6199) Cache poet's /v1/pow_params

## Release v1.6.5

### Improvements

* [#6185](https://github.com/spacemeshos/go-spacemesh/pull/6185) Optimize mempool

* [#6187](https://github.com/spacemeshos/go-spacemesh/pull/6187) The merge tool now ignores files that are not `.key`
files in the `identities` directory when merging two nodes.

* [#6128](https://github.com/spacemeshos/go-spacemesh/pull/6128) Reduce logs spam

## Release v1.6.4

### Improvements
Expand Down Expand Up @@ -690,7 +709,7 @@ and permanent ineligibility for rewards.

* [#5470](https://github.com/spacemeshos/go-spacemesh/pull/5470)
Fixed a bug in event reporting where the node reports a disconnection from the PoST service as a "PoST failed" event.
Disconnections cannot be avoided completely and do not interrupt the PoST proofing process. As long as the PoST
Disconnections cannot be avoided completely and do not interrupt the PoST proving process. As long as the PoST
service reconnects within a reasonable time, the node will continue to operate normally without reporting any errors
via the event API.

Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ clear-test-cache:
.PHONY: clear-test-cache

test: get-libs
@$(ULIMIT) CGO_LDFLAGS="$(CGO_TEST_LDFLAGS)" gotestsum -- -race -timeout 8m -p 1 $(UNIT_TESTS)
@$(ULIMIT) CGO_LDFLAGS="$(CGO_TEST_LDFLAGS)" gotestsum -- -race -timeout 8m $(UNIT_TESTS)
.PHONY: test

generate: get-libs
Expand Down Expand Up @@ -145,7 +145,7 @@ lint-github-action: get-libs
.PHONY: lint-github-action

cover: get-libs
@$(ULIMIT) CGO_LDFLAGS="$(CGO_TEST_LDFLAGS)" go test -coverprofile=cover.out -timeout 0 -p 1 -coverpkg=./... $(UNIT_TESTS)
@$(ULIMIT) CGO_LDFLAGS="$(CGO_TEST_LDFLAGS)" go test -coverprofile=cover.out -timeout 30m -coverpkg=./... $(UNIT_TESTS)
.PHONY: cover

list-versions:
Expand Down
47 changes: 29 additions & 18 deletions activation/activation.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,25 +39,27 @@ var (

// PoetConfig is the configuration to interact with the poet server.
type PoetConfig struct {
RequestTimeout time.Duration `mapstructure:"poet-request-timeout"`
RequestRetryDelay time.Duration `mapstructure:"retry-delay"`
CertifierInfoCacheTTL time.Duration `mapstructure:"certifier-info-cache-ttl"`
MaxRequestRetries int `mapstructure:"retry-max"`
// Start of new PoET round
PhaseShift time.Duration `mapstructure:"phase-shift"`
PhaseShift time.Duration `mapstructure:"phase-shift"`

Check failure on line 43 in activation/activation.go

View workflow job for this annotation

GitHub Actions / lint

File is not `gci`-ed with --skip-generated -s standard -s default -s prefix(github.com/spacemeshos/go-spacemesh) (gci)
// A gap between end of old PoET round and start of new one
CycleGap time.Duration `mapstructure:"cycle-gap"`
CycleGap time.Duration `mapstructure:"cycle-gap"`

Check failure on line 45 in activation/activation.go

View workflow job for this annotation

GitHub Actions / lint

File is not `gci`-ed with --skip-generated -s standard -s default -s prefix(github.com/spacemeshos/go-spacemesh) (gci)
// Time in the end of cycle gap, when PoST challenge must be build and send to PoET server
GracePeriod time.Duration `mapstructure:"grace-period"`
GracePeriod time.Duration `mapstructure:"grace-period"`

Check failure on line 47 in activation/activation.go

View workflow job for this annotation

GitHub Actions / lint

File is not `gci`-ed with --skip-generated -s standard -s default -s prefix(github.com/spacemeshos/go-spacemesh) (gci)
RequestTimeout time.Duration `mapstructure:"poet-request-timeout"`
RequestRetryDelay time.Duration `mapstructure:"retry-delay"`
// Period to find positioning ATX. Must be less, than GracePeriod
PositioningATXSelectionTimeout time.Duration `mapstructure:"positioning-atx-selection-timeout"`
CertifierInfoCacheTTL time.Duration `mapstructure:"certifier-info-cache-ttl"`
PowParamsCacheTTL time.Duration `mapstructure:"pow-params-cache-ttl"`
MaxRequestRetries int `mapstructure:"retry-max"`
}

func DefaultPoetConfig() PoetConfig {
return PoetConfig{
RequestRetryDelay: 400 * time.Millisecond,
MaxRequestRetries: 10,
CertifierInfoCacheTTL: 5 * time.Minute,
PowParamsCacheTTL: 5 * time.Minute,
}
}

Expand All @@ -72,7 +74,7 @@ type Config struct {
}

// Builder struct is the struct that orchestrates the creation of activation transactions
// it is responsible for initializing post, receiving poet proof and orchestrating nipst. after which it will
// it is responsible for initializing post, receiving poet proof and orchestrating nipost. after which it will
// calculate total weight and providing relevant view as proof.
type Builder struct {
accountLock sync.RWMutex
Expand Down Expand Up @@ -421,7 +423,13 @@ func (b *Builder) run(ctx context.Context, sig *signing.EdSigner) {
for _, poet := range b.poets {
eg.Go(func() error {
_, err := poet.Certify(ctx, sig.NodeID())
if err != nil {
switch {
case errors.Is(err, ErrCertificatesNotSupported):
b.logger.Debug("not certifying (not supported in poet)",
log.ZShortStringer("smesherID", sig.NodeID()),
zap.String("poet", poet.Address()),
)
case err != nil:
b.logger.Warn("failed to certify poet", zap.Error(err), log.ZShortStringer("smesherID", sig.NodeID()))
}
return nil
Expand All @@ -439,6 +447,7 @@ func (b *Builder) run(ctx context.Context, sig *signing.EdSigner) {

b.logger.Warn("failed to publish atx", zap.Error(err))

poetErr := &PoetSvcUnstableError{}
switch {
case errors.Is(err, ErrATXChallengeExpired):
b.logger.Debug("retrying with new challenge after waiting for a layer")
Expand All @@ -455,8 +464,11 @@ func (b *Builder) run(ctx context.Context, sig *signing.EdSigner) {
return
case <-b.layerClock.AwaitLayer(currentLayer.Add(1)):
}
case errors.Is(err, ErrPoetServiceUnstable):
b.logger.Warn("retrying after poet retry interval", zap.Duration("interval", b.poetRetryInterval))
case errors.As(err, &poetErr):
b.logger.Warn("retrying after poet retry interval",
zap.Duration("interval", b.poetRetryInterval),
zap.Error(poetErr.source),
)
select {
case <-ctx.Done():
return
Expand Down Expand Up @@ -589,7 +601,7 @@ func (b *Builder) BuildNIPostChallenge(ctx context.Context, nodeID types.NodeID)
PositioningATX: posAtx,
}
}
logger.Info("persisting the new NiPOST challenge", zap.Object("challenge", challenge))
logger.Debug("persisting the new NiPOST challenge", zap.Object("challenge", challenge))
if err := nipost.AddChallenge(b.localDB, nodeID, challenge); err != nil {
return nil, fmt.Errorf("add nipost challenge: %w", err)
}
Expand Down Expand Up @@ -629,7 +641,7 @@ func (b *Builder) getExistingChallenge(
}

// challenge is fresh
logger.Info("loaded NiPoST challenge from local state",
logger.Debug("loaded NiPoST challenge from local state",
zap.Uint32("current_epoch", currentEpochId.Uint32()),
zap.Uint32("publish_epoch", challenge.PublishEpoch.Uint32()),
)
Expand Down Expand Up @@ -732,7 +744,6 @@ func (b *Builder) PublishActivationTx(ctx context.Context, sig *signing.EdSigner
return fmt.Errorf("wait for publication epoch: %w", ctx.Err())
case <-b.layerClock.AwaitLayer(challenge.PublishEpoch.FirstLayer()):
}
b.logger.Debug("publication epoch has arrived!", log.ZShortStringer("smesherID", sig.NodeID()))

for {
b.logger.Info(
Expand Down Expand Up @@ -962,13 +973,13 @@ func (b *Builder) getPositioningAtx(
return types.EmptyATXID, err
}

b.logger.Info("found candidate positioning atx",
b.logger.Debug("found candidate positioning atx",
log.ZShortStringer("id", id),
log.ZShortStringer("smesherID", nodeID),
)

if previous == nil {
b.logger.Info("selected atx as positioning atx",
b.logger.Info("selected positioning atx",
log.ZShortStringer("id", id),
log.ZShortStringer("smesherID", nodeID))
return id, nil
Expand Down Expand Up @@ -1046,14 +1057,14 @@ func findFullyValidHighTickAtx(

// iterate trough epochs, to get first valid, not malicious ATX with the biggest height
atxdata.IterateHighTicksInEpoch(publish+1, func(id types.ATXID) (contSearch bool) {
logger.Info("found candidate for high-tick atx", log.ZShortStringer("id", id))
logger.Debug("found candidate for high-tick atx", log.ZShortStringer("id", id))
if ctx.Err() != nil {
return false
}
// verify ATX-candidate by getting their dependencies (previous Atx, positioning ATX etc.)
// and verifying PoST for every dependency
if err := validator.VerifyChain(ctx, id, goldenATXID, opts...); err != nil {
logger.Info("rejecting candidate for high-tick atx", zap.Error(err), log.ZShortStringer("id", id))
logger.Debug("rejecting candidate for high-tick atx", zap.Error(err), log.ZShortStringer("id", id))
return true
}
found = &id
Expand Down
7 changes: 0 additions & 7 deletions activation/activation_errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ import (
var (
// ErrATXChallengeExpired is returned when atx missed its publication window and needs to be regenerated.
ErrATXChallengeExpired = errors.New("builder: atx expired")
// ErrPoetServiceUnstable is returned when poet quality of service is low.
ErrPoetServiceUnstable = &PoetSvcUnstableError{}
// ErrPoetProofNotReceived is returned when no poet proof was received.
ErrPoetProofNotReceived = errors.New("builder: didn't receive any poet proof")
)
Expand All @@ -28,8 +26,3 @@ func (e *PoetSvcUnstableError) Error() string {
}

func (e *PoetSvcUnstableError) Unwrap() error { return e.source }

func (e *PoetSvcUnstableError) Is(target error) bool {
_, ok := target.(*PoetSvcUnstableError)
return ok
}
2 changes: 1 addition & 1 deletion activation/activation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1076,7 +1076,7 @@ func TestBuilder_RetryPublishActivationTx(t *testing.T) {
tries++
t.Logf("try %d: %s", tries, now)
if tries < expectedTries {
return nil, ErrPoetServiceUnstable
return nil, &PoetSvcUnstableError{}
}
close(builderConfirmation)
return newNIPostWithPoet(t, []byte("66666")), nil
Expand Down
4 changes: 2 additions & 2 deletions activation/builder_v2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func TestBuilder_BuildsInitialAtxV2(t *testing.T) {
require.Empty(t, atx.Marriages)
require.Equal(t, posEpoch+1, atx.PublishEpoch)
require.Equal(t, sig.NodeID(), atx.SmesherID)
require.True(t, signing.NewEdVerifier().Verify(signing.ATX, atx.SmesherID, atx.SignedBytes(), atx.Signature))
require.True(t, signing.NewEdVerifier().Verify(signing.ATX, atx.SmesherID, atx.ID().Bytes(), atx.Signature))
}

func TestBuilder_SwitchesToBuildV2(t *testing.T) {
Expand Down Expand Up @@ -106,5 +106,5 @@ func TestBuilder_SwitchesToBuildV2(t *testing.T) {
require.Empty(t, atx2.Marriages)
require.Equal(t, atx1.PublishEpoch+1, atx2.PublishEpoch)
require.Equal(t, sig.NodeID(), atx2.SmesherID)
require.True(t, signing.NewEdVerifier().Verify(signing.ATX, atx2.SmesherID, atx2.SignedBytes(), atx2.Signature))
require.True(t, signing.NewEdVerifier().Verify(signing.ATX, atx2.SmesherID, atx2.ID().Bytes(), atx2.Signature))
}
Loading

0 comments on commit 50e3129

Please sign in to comment.