From ea242d16a3e9b70ae5647dd776df04ac322ebed3 Mon Sep 17 00:00:00 2001 From: Jean-Pierre CHENG Date: Mon, 23 Dec 2024 22:01:45 +0700 Subject: [PATCH] add vm signings monitoring --- README.md | 14 +++++ app/app.go | 8 ++- app/signings.go | 72 +++++++++++++++++++++++ app/types.go | 9 ++- client/api/axelarscan.go | 119 ++++++++++++++++++++++++++++++++++++++- client/api/types.go | 32 +++++++++++ config.toml.example | 16 +++++- metrics/metrics.go | 8 +++ server/types.go | 4 ++ 9 files changed, 276 insertions(+), 6 deletions(-) create mode 100644 app/signings.go diff --git a/README.md b/README.md index aa08f53..94ee8a0 100644 --- a/README.md +++ b/README.md @@ -166,6 +166,20 @@ check_period_days = 10 # If a `missedVotes / totalVotes` is over `miss_percentage` parameter, it'll alert. miss_percentage = 20 +[external_chain_signing] # external_chain_siging configurations are used to check vm signings. + +# The number of signing for each external chain events you want to check. +check_n = 10 + +# this field restrict monitor target period. some chains connected on mainnet/testnet may have low txs. +# and if signing contain too old signing records when I fetch sigings with number of `check_n`, and also invalid signing txs are exists before, monitoring alert may be less trusted. +# +# `check_period_days` will truncate old records. +check_period_days = 10 + +# If a `missedSignings / totalSignings` is over `miss_percentage` parameter, it'll alert. +miss_percentage = 20 + ``` diff --git a/app/app.go b/app/app.go index 7b80aff..f2a3d4f 100644 --- a/app/app.go +++ b/app/app.go @@ -1,7 +1,6 @@ package app import ( - "bharvest.io/axelmon/server" "context" "encoding/json" "errors" @@ -10,6 +9,8 @@ import ( "sync" "time" + "bharvest.io/axelmon/server" + "bharvest.io/axelmon/log" ) @@ -45,7 +46,7 @@ func Run(ctx context.Context, c *Config) { var monitoringFuncs []Monfunc if len(c.General.TargetSvcs) == 0 { - monitoringFuncs = []Monfunc{c.checkMaintainers, c.checkHeartbeats, c.checkEVMVotes} + monitoringFuncs = []Monfunc{c.checkMaintainers, c.checkHeartbeats, c.checkEVMVotes, c.checkVMSignings} } else { for _, targetSvc := range c.General.TargetSvcs { switch targetSvc { @@ -61,6 +62,9 @@ func Run(ctx context.Context, c *Config) { case VMVoteTargetSvc: monitoringFuncs = append(monitoringFuncs, c.checkVMVotes) break + case VMSigningTargetSvc: + monitoringFuncs = append(monitoringFuncs, c.checkVMSignings) + break } } } diff --git a/app/signings.go b/app/signings.go new file mode 100644 index 0000000..db4191f --- /dev/null +++ b/app/signings.go @@ -0,0 +1,72 @@ +package app + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/axelarnetwork/axelar-core/x/nexus/exported" + + "bharvest.io/axelmon/client/api" + "bharvest.io/axelmon/metrics" + "bharvest.io/axelmon/server" + "github.com/prometheus/client_golang/prometheus" +) + +func (c *Config) checkVMSignings(ctx context.Context) error { + chains, err := api.C.GetVerifierSupportedChains(c.Wallet.Proxy.PrintAcc()) + if err != nil { + return err + } + return c.getSignings(ctx, chains) +} + +func (c *Config) getSignings(ctx context.Context, chains []exported.ChainName) error { + + result := make(map[string]server.VotesInfo) + for _, chain := range chains { + // If chain is included in except chains + // then don't monitor that chain's VM signings. + if c.General.ExceptChains[strings.ToLower(chain.String())] { + continue + } + + votesInfo := server.VotesInfo{} + + if c.PollingSigning.CheckPeriodDays == 0 { + c.PollingSigning.CheckPeriodDays = 10 + } + resp, err := api.C.GetPollingSignings(chain.String(), c.PollingSigning.CheckN, c.Wallet.Proxy.PrintAcc(), + time.Duration(c.PollingSigning.CheckPeriodDays)*time.Hour*24) + if err != nil { + return err + } + + votesInfo.Missed = fmt.Sprintf("%d / %d", resp.MissCnt, int(resp.TotalSignings)) + metrics.VMSigningsCounter.With(prometheus.Labels{"network_name": chain.String(), "status": "missed"}).Add(float64(resp.MissCnt)) + // check if the total number of signings is higher than the number of signings checked + if resp.TotalSignings < float64(c.PollingVote.CheckN) { + metrics.VMSigningsCounter.With(prometheus.Labels{"network_name": chain.String(), "status": "success"}).Add(float64(int(resp.TotalSignings) - resp.MissCnt)) + } else { + metrics.VMSigningsCounter.With(prometheus.Labels{"network_name": chain.String(), "status": "success"}).Add(resp.TotalSignings - float64(resp.MissCnt)) + } + + if (float64(resp.MissCnt)/resp.TotalSignings)*100 > float64(c.PollingVote.MissPercentage) { + votesInfo.Status = false + + msg := fmt.Sprintf("status(%s)", chain) + c.alert(msg, []string{}, false, false) + } else { + votesInfo.Status = true + + msg := fmt.Sprintf("status(%s)", chain) + c.alert(msg, []string{}, true, false) + } + + result[chain.String()] = votesInfo + } + server.GlobalState.VMSignings.Chain = result + + return nil +} diff --git a/app/types.go b/app/types.go index 69a6470..27c82a2 100644 --- a/app/types.go +++ b/app/types.go @@ -1,10 +1,11 @@ package app import ( - "bharvest.io/axelmon/wallet" "context" "sync" "time" + + "bharvest.io/axelmon/wallet" ) type Duration time.Duration @@ -60,6 +61,11 @@ type Config struct { MissPercentage int `toml:"miss_percentage"` CheckPeriodDays int `toml:"check_period_days"` } `toml:"external_chain_vote"` + PollingSigning struct { + CheckN int `toml:"check_n"` + MissPercentage int `toml:"miss_percentage"` + CheckPeriodDays int `toml:"check_period_days"` + } `toml:"external_chain_signing"` Ctx context.Context Cancel context.CancelFunc @@ -74,4 +80,5 @@ const ( HeartbeatTargetSvc TargetSvc = "heartbeat" EVMVoteTargetSvc TargetSvc = "evm" VMVoteTargetSvc TargetSvc = "vm" + VMSigningTargetSvc TargetSvc = "vmSigning" ) diff --git a/client/api/axelarscan.go b/client/api/axelarscan.go index 58aee85..27ee7eb 100644 --- a/client/api/axelarscan.go +++ b/client/api/axelarscan.go @@ -1,15 +1,16 @@ package api import ( - "bharvest.io/axelmon/log" "bytes" "encoding/json" "errors" "fmt" - "github.com/axelarnetwork/axelar-core/x/nexus/exported" "io" "net/http" "time" + + "bharvest.io/axelmon/log" + "github.com/axelarnetwork/axelar-core/x/nexus/exported" ) type PollingType string @@ -189,3 +190,117 @@ func (c *Client) GetPollingVotes(chain string, size int, proxyAcc string, pollin return &result, nil } + +func (c *Client) GetPollingSignings(chain string, size int, proxyAcc string, checkPeriod time.Duration) (*SigningsReturn, error) { + // VotesResponse MissCnt is byte type. + // Therefore, the maximum number of evm votes should be + // less than 256 + if size > 255 { + return nil, errors.New("exceed maximum # evm votes") + } + + reqBytes, err := json.Marshal(SigningsRequest{ + chain, + size, + }) + if err != nil { + return nil, err + } + reqBody := bytes.NewBuffer(reqBytes) + + url := fmt.Sprintf("%s/validator/searchVMProofs", c.axelarscan) + req, err := http.NewRequest("POST", url, reqBody) + if err != nil { + return nil, err + } + req.Header.Add("Content-Type", "application/json") + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return nil, errors.New("Failed to get proper data from axelarscan") + } + + bodyBytes, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + var res map[string]any + err = json.Unmarshal(bodyBytes, &res) + if err != nil { + return nil, err + } + + dataBytes, err := json.Marshal(res["data"]) + if err != nil { + return nil, err + } + + var data []map[string]any + err = json.Unmarshal(dataBytes, &data) + if err != nil { + return nil, err + } + + result := SigningsReturn{} + result.Chain = chain + result.MissCnt = 0 + result.TotalSignings = 0 + + result.SigningInfos = make([]SigningInfo, len(data)) + + var now = time.Now() + + for i, d := range data { + if d["initiated_txhash"] != nil { + result.SigningInfos[i].InitiatedTXHash = d["initiated_txhash"].(string) + } + if d["id"] != nil { + result.SigningInfos[i].SessionID = d["session_id"].(float64) + } + + if time.Unix(int64(d["created_at"].(map[string]any)["ms"].(float64)/1000), 0).Before(now.Add(-1 * checkPeriod)) { + // it's too old record. skip it. + log.Debug("skipping... it's too old") + continue + } + + signer := d[proxyAcc] + if signer != nil { + signingInfoBytes, err := json.Marshal(signer) + if err != nil { + return nil, err + } + + signingInfo := Signing{} + err = json.Unmarshal(signingInfoBytes, &signingInfo) + if err != nil { + return nil, err + } + + if signingInfo.Sign { + // sign => yes + result.SigningInfos[i].Sign = 1 + } else { + // sign => no + result.SigningInfos[i].Sign = 2 + } + } else { + // sign => not signed + result.SigningInfos[i].Sign = 0 + } + + if result.SigningInfos[i].Sign != 1 { + result.MissCnt++ + } + result.TotalSignings++ + } + + return &result, nil +} diff --git a/client/api/types.go b/client/api/types.go index 6a50bb9..970bea3 100644 --- a/client/api/types.go +++ b/client/api/types.go @@ -41,6 +41,38 @@ type ( } ) +type ( + SigningsRequest struct { + Chain string `json:"chain"` + Size int `json:"size"` + } + + Signing struct { + CreatedAt int64 `json:"created_at"` + ID string `json:"id"` + Signer string `json:"signer"` + Type string `json:"type"` + Sign bool `json:"sign"` + Height int `json:"height"` + } + + SigningsReturn struct { + Chain string + MissCnt int + SigningInfos []SigningInfo + TotalSignings float64 + } + SigningInfo struct { + InitiatedTXHash string + SessionID float64 + + // 0 => not signed + // 1 => yes + // 2 => no + Sign byte + } +) + type Proxy struct { Height string `json:"height"` Result struct { diff --git a/config.toml.example b/config.toml.example index 86a74f9..963671c 100644 --- a/config.toml.example +++ b/config.toml.example @@ -78,4 +78,18 @@ check_n = 10 check_period_days = 10 # If a `missedVotes / totalVotes` is over `miss_percentage` parameter, it'll alert. -miss_percentage = 20 \ No newline at end of file +miss_percentage = 20 + +[external_chain_signing] # external_chain_siging configurations are used to check vm signings. + +# The number of signing for each external chain events you want to check. +check_n = 10 + +# this field restrict monitor target period. some chains connected on mainnet/testnet may have low txs. +# and if signing contain too old signing records when I fetch sigings with number of `check_n`, and also invalid signing txs are exists before, monitoring alert may be less trusted. +# +# `check_period_days` will truncate old records. +check_period_days = 10 + +# If a `missedSignings / totalSignings` is over `miss_percentage` parameter, it'll alert. +miss_percentage = 20 diff --git a/metrics/metrics.go b/metrics/metrics.go index b471fc5..b407a9a 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -6,6 +6,14 @@ import ( ) var ( + VMSigningsCounter = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "vm_signings_total", + Help: "Number of VM signings", + }, + []string{"network_name", "status"}, + ) + EVMVotesCounter = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "evm_votes_total", diff --git a/server/types.go b/server/types.go index 7b7e224..0256033 100644 --- a/server/types.go +++ b/server/types.go @@ -25,6 +25,10 @@ type Response struct { Chain map[string]VotesInfo `json:"chain"` } `json:"externalChainVotes"` + VMSignings struct { + Chain map[string]VotesInfo `json:"chain"` + } `json:"externalChainSignings"` + Alerts struct { SentTgAlarms map[string]time.Time `json:"sent_tg_alarms"` SentSlkAlarms map[string]time.Time `json:"sent_slk_alarms"`