11package alert
22
33import (
4+ "context"
45 "watchAlert/alert/consumer"
56 "watchAlert/alert/eval"
67 "watchAlert/alert/probing"
78 "watchAlert/internal/ctx"
9+ "watchAlert/internal/global"
10+ "watchAlert/pkg/client"
11+ "watchAlert/pkg/tools"
12+
13+ "github.com/zeromicro/go-zero/core/logc"
814)
915
1016var (
@@ -13,18 +19,199 @@ var (
1319
1420 ProductProbing probing.ProductProbing
1521 ConsumeProbing probing.ConsumeProbing
22+
23+ // Leader 选举器
24+ LeaderElector * tools.LeaderElector
25+
26+ // 消息订阅取消函数
27+ subscriberCancels []context.CancelFunc
28+
29+ // 选举开关
30+ leaderElectionEnabled bool
1631)
1732
1833func Initialize (ctx * ctx.Context ) {
1934 // 初始化告警规则评估任务
2035 AlertRule = eval .NewAlertRuleEval (ctx )
21- AlertRule .RestartAllEvals ()
22-
2336 ConsumerWork = consumer .NewConsumerWork (ctx )
24- ConsumerWork .RestartAllConsumers ()
2537
2638 // 初始化拨测任务
2739 ConsumeProbing = probing .NewProbingConsumerTask (ctx )
2840 ProductProbing = probing .NewProbingTask (ctx )
41+
42+ // 检查 Leader 选举是否启用
43+ leaderElectionEnabled = global .Config .Server .EnableElection
44+
45+ if leaderElectionEnabled {
46+ // 启用 Leader 选举模式
47+ logc .Infof (ctx .Ctx , "Leader 选举已启用,开始选举流程..." )
48+ LeaderElector = tools .NewLeaderElector (
49+ ctx .Ctx ,
50+ client .Redis ,
51+ loadRules ,
52+ unloadRules ,
53+ )
54+ // 启动 Leader 选举
55+ LeaderElector .Start ()
56+ } else {
57+ loadRules ()
58+ }
59+ }
60+
61+ // loadRules 加载所有规则(成为 Leader 时调用)
62+ func loadRules () {
63+ logc .Infof (ctx .Ctx , "本节点为 Leader 节点,开始加载规则..." )
64+
65+ // 重启所有告警规则评估器
66+ AlertRule .RestartAllEvals ()
67+
68+ // 重启所有故障中心消费者
69+ ConsumerWork .RestartAllConsumers ()
70+
71+ // 重启所有拨测任务
2972 ProductProbing .RePushRule (& ConsumeProbing )
73+
74+ // 启动 Redis 消息订阅,监听规则变更
75+ startMessageSubscribers ()
76+ }
77+
78+ // startMessageSubscribers 启动消息订阅器
79+ func startMessageSubscribers () {
80+ subscriberCancels = make ([]context.CancelFunc , 0 )
81+
82+ // 订阅告警规则重载消息
83+ subCtx1 , cancel1 := context .WithCancel (ctx .Ctx )
84+ subscriberCancels = append (subscriberCancels , cancel1 )
85+ go tools .SubscribeReloadMessages (subCtx1 , client .Redis , tools .ChannelRuleReload , handleRuleReload )
86+
87+ // 订阅故障中心重载消息
88+ subCtx2 , cancel2 := context .WithCancel (ctx .Ctx )
89+ subscriberCancels = append (subscriberCancels , cancel2 )
90+ go tools .SubscribeReloadMessages (subCtx2 , client .Redis , tools .ChannelFaultCenterReload , handleFaultCenterReload )
91+
92+ // 订阅拨测规则重载消息
93+ subCtx3 , cancel3 := context .WithCancel (ctx .Ctx )
94+ subscriberCancels = append (subscriberCancels , cancel3 )
95+ go tools .SubscribeReloadMessages (subCtx3 , client .Redis , tools .ChannelProbingReload , handleProbingReload )
96+ }
97+
98+ // stopMessageSubscribers 停止消息订阅器
99+ func stopMessageSubscribers () {
100+ for _ , cancel := range subscriberCancels {
101+ cancel ()
102+ }
103+ subscriberCancels = nil
104+ logc .Infof (ctx .Ctx , "消息订阅器已停止" )
105+ }
106+
107+ // handleRuleReload 处理告警规则重载消息
108+ func handleRuleReload (msg tools.ReloadMessage ) {
109+
110+ // 从数据库获取规则
111+ rule := ctx .DB .Rule ().GetRuleObject (msg .ID )
112+ if rule .RuleId == "" {
113+ logc .Errorf (ctx .Ctx , "规则不存在: %s" , msg .ID )
114+ return
115+ }
116+
117+ switch msg .Action {
118+ case tools .ActionCreate , tools .ActionEnable :
119+ if rule .Enabled != nil && * rule .Enabled {
120+ AlertRule .Submit (rule )
121+ logc .Infof (ctx .Ctx , "[Leader] 已启动规则评估: %s" , msg .Name )
122+ }
123+
124+ case tools .ActionUpdate :
125+ AlertRule .Stop (msg .ID )
126+ if rule .Enabled != nil && * rule .Enabled {
127+ AlertRule .Submit (rule )
128+ logc .Infof (ctx .Ctx , "[Leader] 已重启规则评估: %s" , msg .Name )
129+ }
130+
131+ case tools .ActionDelete , tools .ActionDisable :
132+ AlertRule .Stop (msg .ID )
133+ logc .Infof (ctx .Ctx , "[Leader] 已停止规则评估: %s" , msg .Name )
134+ }
135+ }
136+
137+ // handleFaultCenterReload 处理故障中心重载消息
138+ func handleFaultCenterReload (msg tools.ReloadMessage ) {
139+ fc , err := ctx .DB .FaultCenter ().Get (msg .TenantID , msg .ID , "" )
140+ if err != nil {
141+ logc .Errorf (ctx .Ctx , "故障中心不存在: %s, err: %v" , msg .ID , err )
142+ return
143+ }
144+
145+ switch msg .Action {
146+ case tools .ActionCreate , tools .ActionEnable :
147+ ConsumerWork .Submit (fc )
148+ logc .Infof (ctx .Ctx , "[Leader] 已启动故障中心消费: %s" , msg .Name )
149+
150+ case tools .ActionUpdate :
151+ ConsumerWork .Stop (msg .ID )
152+ ConsumerWork .Submit (fc )
153+ logc .Infof (ctx .Ctx , "[Leader] 已重启故障中心消费: %s" , msg .Name )
154+
155+ case tools .ActionDelete , tools .ActionDisable :
156+ ConsumerWork .Stop (msg .ID )
157+ logc .Infof (ctx .Ctx , "[Leader] 已停止故障中心消费: %s" , msg .Name )
158+ }
159+ }
160+
161+ // handleProbingReload 处理拨测规则重载消息
162+ func handleProbingReload (msg tools.ReloadMessage ) {
163+ rule , err := ctx .DB .Probing ().Search (msg .TenantID , msg .ID )
164+ if err != nil {
165+ logc .Errorf (ctx .Ctx , "拨测规则不存在: %s, err: %v" , msg .ID , err )
166+ return
167+ }
168+ switch msg .Action {
169+ case tools .ActionCreate , tools .ActionEnable :
170+ if rule .Enabled != nil && * rule .Enabled {
171+ ProductProbing .Add (rule )
172+ ConsumeProbing .Add (rule )
173+ logc .Infof (ctx .Ctx , "[Leader] 已启动拨测任务: %s" , msg .Name )
174+ }
175+
176+ case tools .ActionUpdate :
177+ ProductProbing .Stop (msg .ID )
178+ ConsumeProbing .Stop (msg .ID )
179+ if rule .Enabled != nil && * rule .Enabled {
180+ ProductProbing .Add (rule )
181+ ConsumeProbing .Add (rule )
182+ logc .Infof (ctx .Ctx , "[Leader] 已重启拨测任务: %s" , msg .Name )
183+ }
184+
185+ case tools .ActionDelete , tools .ActionDisable :
186+ ProductProbing .Stop (msg .ID )
187+ ConsumeProbing .Stop (msg .ID )
188+ logc .Infof (ctx .Ctx , "[Leader] 已停止拨测任务: %s" , msg .Name )
189+ }
190+ }
191+
192+ // unloadRules 卸载所有规则(失去 Leader 时调用)
193+ func unloadRules () {
194+ logc .Infof (ctx .Ctx , "本节点失去 Leader 身份,停止所有任务..." )
195+
196+ // 停止消息订阅
197+ stopMessageSubscribers ()
198+
199+ // 停止所有告警规则评估器
200+ AlertRule .StopAllEvals ()
201+
202+ // 停止所有故障中心消费者
203+ ConsumerWork .StopAllConsumers ()
204+
205+ // 停止所有拨测任务
206+ ProductProbing .StopAllTasks ()
207+ ConsumeProbing .StopAllTasks ()
208+ }
209+
210+ // IsLeader 判断节点角色
211+ func IsLeader () bool {
212+ if ! leaderElectionEnabled {
213+ return true
214+ }
215+
216+ return LeaderElector != nil && LeaderElector .IsLeader ()
30217}
0 commit comments