Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add vm signings monitoring #7

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,20 @@ check_period_days = 10
# If a `missedVotes / totalVotes` is over `miss_percentage` parameter, it'll alert.
miss_percentage = 20

[external_chain_signing] # external_chain_siging configurations are used to check vm signings.

# The number of signing for each external chain events you want to check.
check_n = 10

# this field restrict monitor target period. some chains connected on mainnet/testnet may have low txs.
# and if signing contain too old signing records when I fetch sigings with number of `check_n`, and also invalid signing txs are exists before, monitoring alert may be less trusted.
#
# `check_period_days` will truncate old records.
check_period_days = 10

# If a `missedSignings / totalSignings` is over `miss_percentage` parameter, it'll alert.
miss_percentage = 20

```


Expand Down
8 changes: 6 additions & 2 deletions app/app.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package app

import (
"bharvest.io/axelmon/server"
"context"
"encoding/json"
"errors"
Expand All @@ -10,6 +9,8 @@ import (
"sync"
"time"

"bharvest.io/axelmon/server"

"bharvest.io/axelmon/log"
)

Expand Down Expand Up @@ -45,7 +46,7 @@ func Run(ctx context.Context, c *Config) {
var monitoringFuncs []Monfunc

if len(c.General.TargetSvcs) == 0 {
monitoringFuncs = []Monfunc{c.checkMaintainers, c.checkHeartbeats, c.checkEVMVotes}
monitoringFuncs = []Monfunc{c.checkMaintainers, c.checkHeartbeats, c.checkEVMVotes, c.checkVMSignings}
} else {
for _, targetSvc := range c.General.TargetSvcs {
switch targetSvc {
Expand All @@ -61,6 +62,9 @@ func Run(ctx context.Context, c *Config) {
case VMVoteTargetSvc:
monitoringFuncs = append(monitoringFuncs, c.checkVMVotes)
break
case VMSigningTargetSvc:
monitoringFuncs = append(monitoringFuncs, c.checkVMSignings)
break
}
}
}
Expand Down
72 changes: 72 additions & 0 deletions app/signings.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package app

import (
"context"
"fmt"
"strings"
"time"

"github.com/axelarnetwork/axelar-core/x/nexus/exported"

"bharvest.io/axelmon/client/api"
"bharvest.io/axelmon/metrics"
"bharvest.io/axelmon/server"
"github.com/prometheus/client_golang/prometheus"
)

func (c *Config) checkVMSignings(ctx context.Context) error {
chains, err := api.C.GetVerifierSupportedChains(c.Wallet.Proxy.PrintAcc())
if err != nil {
return err
}
return c.getSignings(ctx, chains)
}

func (c *Config) getSignings(ctx context.Context, chains []exported.ChainName) error {

result := make(map[string]server.VotesInfo)
for _, chain := range chains {
// If chain is included in except chains
// then don't monitor that chain's VM signings.
if c.General.ExceptChains[strings.ToLower(chain.String())] {
continue
}

votesInfo := server.VotesInfo{}

if c.PollingSigning.CheckPeriodDays == 0 {
c.PollingSigning.CheckPeriodDays = 10
}
resp, err := api.C.GetPollingSignings(chain.String(), c.PollingSigning.CheckN, c.Wallet.Proxy.PrintAcc(),
time.Duration(c.PollingSigning.CheckPeriodDays)*time.Hour*24)
if err != nil {
return err
}

votesInfo.Missed = fmt.Sprintf("%d / %d", resp.MissCnt, int(resp.TotalSignings))
metrics.VMSigningsCounter.With(prometheus.Labels{"network_name": chain.String(), "status": "missed"}).Add(float64(resp.MissCnt))
// check if the total number of signings is higher than the number of signings checked
if resp.TotalSignings < float64(c.PollingVote.CheckN) {
metrics.VMSigningsCounter.With(prometheus.Labels{"network_name": chain.String(), "status": "success"}).Add(float64(int(resp.TotalSignings) - resp.MissCnt))
} else {
metrics.VMSigningsCounter.With(prometheus.Labels{"network_name": chain.String(), "status": "success"}).Add(resp.TotalSignings - float64(resp.MissCnt))
}

if (float64(resp.MissCnt)/resp.TotalSignings)*100 > float64(c.PollingVote.MissPercentage) {
votesInfo.Status = false

msg := fmt.Sprintf("status(%s)", chain)
c.alert(msg, []string{}, false, false)
} else {
votesInfo.Status = true

msg := fmt.Sprintf("status(%s)", chain)
c.alert(msg, []string{}, true, false)
}

result[chain.String()] = votesInfo
}
server.GlobalState.VMSignings.Chain = result

return nil
}
9 changes: 8 additions & 1 deletion app/types.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
package app

import (
"bharvest.io/axelmon/wallet"
"context"
"sync"
"time"

"bharvest.io/axelmon/wallet"
)

type Duration time.Duration
Expand Down Expand Up @@ -60,6 +61,11 @@ type Config struct {
MissPercentage int `toml:"miss_percentage"`
CheckPeriodDays int `toml:"check_period_days"`
} `toml:"external_chain_vote"`
PollingSigning struct {
CheckN int `toml:"check_n"`
MissPercentage int `toml:"miss_percentage"`
CheckPeriodDays int `toml:"check_period_days"`
} `toml:"external_chain_signing"`

Ctx context.Context
Cancel context.CancelFunc
Expand All @@ -74,4 +80,5 @@ const (
HeartbeatTargetSvc TargetSvc = "heartbeat"
EVMVoteTargetSvc TargetSvc = "evm"
VMVoteTargetSvc TargetSvc = "vm"
VMSigningTargetSvc TargetSvc = "vmSigning"
)
119 changes: 117 additions & 2 deletions client/api/axelarscan.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
package api

import (
"bharvest.io/axelmon/log"
"bytes"
"encoding/json"
"errors"
"fmt"
"github.com/axelarnetwork/axelar-core/x/nexus/exported"
"io"
"net/http"
"time"

"bharvest.io/axelmon/log"
"github.com/axelarnetwork/axelar-core/x/nexus/exported"
)

type PollingType string
Expand Down Expand Up @@ -189,3 +190,117 @@ func (c *Client) GetPollingVotes(chain string, size int, proxyAcc string, pollin

return &result, nil
}

func (c *Client) GetPollingSignings(chain string, size int, proxyAcc string, checkPeriod time.Duration) (*SigningsReturn, error) {
// VotesResponse MissCnt is byte type.
// Therefore, the maximum number of evm votes should be
// less than 256
if size > 255 {
return nil, errors.New("exceed maximum # evm votes")
}

reqBytes, err := json.Marshal(SigningsRequest{
chain,
size,
})
if err != nil {
return nil, err
}
reqBody := bytes.NewBuffer(reqBytes)

url := fmt.Sprintf("%s/validator/searchVMProofs", c.axelarscan)
req, err := http.NewRequest("POST", url, reqBody)
if err != nil {
return nil, err
}
req.Header.Add("Content-Type", "application/json")

client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()

if resp.StatusCode != 200 {
return nil, errors.New("Failed to get proper data from axelarscan")
}

bodyBytes, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}

var res map[string]any
err = json.Unmarshal(bodyBytes, &res)
if err != nil {
return nil, err
}

dataBytes, err := json.Marshal(res["data"])
if err != nil {
return nil, err
}

var data []map[string]any
err = json.Unmarshal(dataBytes, &data)
if err != nil {
return nil, err
}

result := SigningsReturn{}
result.Chain = chain
result.MissCnt = 0
result.TotalSignings = 0

result.SigningInfos = make([]SigningInfo, len(data))

var now = time.Now()

for i, d := range data {
if d["initiated_txhash"] != nil {
result.SigningInfos[i].InitiatedTXHash = d["initiated_txhash"].(string)
}
if d["id"] != nil {
result.SigningInfos[i].SessionID = d["session_id"].(float64)
}

if time.Unix(int64(d["created_at"].(map[string]any)["ms"].(float64)/1000), 0).Before(now.Add(-1 * checkPeriod)) {
// it's too old record. skip it.
log.Debug("skipping... it's too old")
continue
}

signer := d[proxyAcc]
if signer != nil {
signingInfoBytes, err := json.Marshal(signer)
if err != nil {
return nil, err
}

signingInfo := Signing{}
err = json.Unmarshal(signingInfoBytes, &signingInfo)
if err != nil {
return nil, err
}

if signingInfo.Sign {
// sign => yes
result.SigningInfos[i].Sign = 1
} else {
// sign => no
result.SigningInfos[i].Sign = 2
}
} else {
// sign => not signed
result.SigningInfos[i].Sign = 0
}

if result.SigningInfos[i].Sign != 1 {
result.MissCnt++
}
result.TotalSignings++
}

return &result, nil
}
32 changes: 32 additions & 0 deletions client/api/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,38 @@ type (
}
)

type (
SigningsRequest struct {
Chain string `json:"chain"`
Size int `json:"size"`
}

Signing struct {
CreatedAt int64 `json:"created_at"`
ID string `json:"id"`
Signer string `json:"signer"`
Type string `json:"type"`
Sign bool `json:"sign"`
Height int `json:"height"`
}

SigningsReturn struct {
Chain string
MissCnt int
SigningInfos []SigningInfo
TotalSignings float64
}
SigningInfo struct {
InitiatedTXHash string
SessionID float64

// 0 => not signed
// 1 => yes
// 2 => no
Sign byte
}
)

type Proxy struct {
Height string `json:"height"`
Result struct {
Expand Down
16 changes: 15 additions & 1 deletion config.toml.example
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,18 @@ check_n = 10
check_period_days = 10

# If a `missedVotes / totalVotes` is over `miss_percentage` parameter, it'll alert.
miss_percentage = 20
miss_percentage = 20

[external_chain_signing] # external_chain_siging configurations are used to check vm signings.

# The number of signing for each external chain events you want to check.
check_n = 10

# this field restrict monitor target period. some chains connected on mainnet/testnet may have low txs.
# and if signing contain too old signing records when I fetch sigings with number of `check_n`, and also invalid signing txs are exists before, monitoring alert may be less trusted.
#
# `check_period_days` will truncate old records.
check_period_days = 10

# If a `missedSignings / totalSignings` is over `miss_percentage` parameter, it'll alert.
miss_percentage = 20
8 changes: 8 additions & 0 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ import (
)

var (
VMSigningsCounter = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "vm_signings_total",
Help: "Number of VM signings",
},
[]string{"network_name", "status"},
)

EVMVotesCounter = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "evm_votes_total",
Expand Down
4 changes: 4 additions & 0 deletions server/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ type Response struct {
Chain map[string]VotesInfo `json:"chain"`
} `json:"externalChainVotes"`

VMSignings struct {
Chain map[string]VotesInfo `json:"chain"`
} `json:"externalChainSignings"`

Alerts struct {
SentTgAlarms map[string]time.Time `json:"sent_tg_alarms"`
SentSlkAlarms map[string]time.Time `json:"sent_slk_alarms"`
Expand Down