@@ -20,6 +20,7 @@ import (
2020 "time"
2121
2222 "github.com/oklog/run"
23+ "github.com/prometheus/client_golang/prometheus"
2324 "github.com/prometheus/common/model"
2425
2526 "github.com/prometheus/alertmanager/config"
@@ -33,25 +34,37 @@ import (
3334// currently active alerts and a set of inhibition rules. It implements the
3435// Muter interface.
3536type Inhibitor struct {
36- alerts provider.Alerts
37- rules []* InhibitRule
38- marker types.AlertMarker
39- logger * slog.Logger
37+ alerts provider.Alerts
38+ rules []* InhibitRule
39+ marker types.AlertMarker
40+ logger * slog.Logger
41+ metrics * InhibitorMetrics
4042
4143 mtx sync.RWMutex
4244 cancel func ()
4345}
4446
4547// NewInhibitor returns a new Inhibitor.
46- func NewInhibitor (ap provider.Alerts , rs []config.InhibitRule , mk types.AlertMarker , logger * slog.Logger ) * Inhibitor {
48+ func NewInhibitor (ap provider.Alerts , rs []config.InhibitRule , mk types.AlertMarker , logger * slog.Logger , metrics * InhibitorMetrics ) * Inhibitor {
4749 ih := & Inhibitor {
48- alerts : ap ,
49- marker : mk ,
50- logger : logger ,
50+ alerts : ap ,
51+ marker : mk ,
52+ logger : logger ,
53+ metrics : metrics ,
5154 }
52- for _ , cr := range rs {
53- r := NewInhibitRule (cr )
55+
56+ ruleNames := make (map [string ]struct {})
57+ for i , cr := range rs {
58+ if _ , ok := ruleNames [cr .Name ]; ok {
59+ ih .logger .Warn ("duplicate inhibition rule name" , "index" , i , "name" , cr .Name )
60+ }
61+
62+ r := NewInhibitRule (cr , NewRuleMetrics (cr .Name , metrics ))
5463 ih .rules = append (ih .rules , r )
64+
65+ if cr .Name != "" {
66+ ruleNames [cr .Name ] = struct {}{}
67+ }
5568 }
5669 return ih
5770}
@@ -70,16 +83,30 @@ func (ih *Inhibitor) run(ctx context.Context) {
7083 continue
7184 }
7285 // Update the inhibition rules' cache.
86+ cachedSum := 0
87+ indexedSum := 0
7388 for _ , r := range ih .rules {
7489 if r .SourceMatchers .Matches (a .Labels ) {
7590 if err := r .scache .Set (a ); err != nil {
7691 ih .logger .Error ("error on set alert" , "err" , err )
7792 continue
7893 }
79-
8094 r .updateIndex (a )
95+
96+ cached := r .scache .Len ()
97+ indexed := r .sindex .Len ()
98+
99+ if r .Name != "" {
100+ r .metrics .sourceAlertsCacheSize .With (prometheus.Labels {"rule" : r .Name }).Set (float64 (cached ))
101+ r .metrics .sourceAlertsIndexSize .With (prometheus.Labels {"rule" : r .Name }).Set (float64 (indexed ))
102+ }
103+
104+ cachedSum += cached
105+ indexedSum += indexed
81106 }
82107 }
108+ ih .metrics .sourceAlertsCacheSize .Set (float64 (cachedSum ))
109+ ih .metrics .sourceAlertsIndexSize .Set (float64 (indexedSum ))
83110 }
84111 }
85112}
@@ -128,21 +155,29 @@ func (ih *Inhibitor) Stop() {
128155// Mutes returns true iff the given label set is muted. It implements the Muter
129156// interface.
130157func (ih * Inhibitor ) Mutes (lset model.LabelSet ) bool {
158+ start := time .Now ()
131159 fp := lset .Fingerprint ()
132160
133161 for _ , r := range ih .rules {
162+ ruleStart := time .Now ()
134163 if ! r .TargetMatchers .Matches (lset ) {
135164 // If target side of rule doesn't match, we don't need to look any further.
165+ r .metrics .matchesDuration .With (prometheus.Labels {"rule" : r .Name , "matched" : "false" }).Observe (time .Since (ruleStart ).Seconds ())
136166 continue
137167 }
168+ r .metrics .matchesDuration .With (prometheus.Labels {"rule" : r .Name , "matched" : "true" }).Observe (time .Since (ruleStart ).Seconds ())
138169 // If we are here, the target side matches. If the source side matches, too, we
139170 // need to exclude inhibiting alerts for which the same is true.
140171 if inhibitedByFP , eq := r .hasEqual (lset , r .SourceMatchers .Matches (lset )); eq {
141172 ih .marker .SetInhibited (fp , inhibitedByFP .String ())
173+ ih .metrics .mutesDuration .With (prometheus.Labels {"muted" : "true" }).Observe (time .Since (start ).Seconds ())
174+ r .metrics .mutesDuration .With (prometheus.Labels {"rule" : r .Name , "muted" : "true" }).Observe (time .Since (ruleStart ).Seconds ())
142175 return true
143176 }
177+ r .metrics .mutesDuration .With (prometheus.Labels {"rule" : r .Name , "muted" : "false" }).Observe (time .Since (ruleStart ).Seconds ())
144178 }
145179 ih .marker .SetInhibited (fp )
180+ ih .metrics .mutesDuration .With (prometheus.Labels {"muted" : "false" }).Observe (time .Since (start ).Seconds ())
146181
147182 return false
148183}
@@ -173,14 +208,17 @@ type InhibitRule struct {
173208 // The index items might overwrite eachother if multiple source alerts have exact equal labels.
174209 // Overwrites only happen if the new source alert has bigger EndsAt value.
175210 sindex * index
211+
212+ metrics * RuleMetrics
176213}
177214
178215// NewInhibitRule returns a new InhibitRule based on a configuration definition.
179- func NewInhibitRule (cr config.InhibitRule ) * InhibitRule {
216+ func NewInhibitRule (cr config.InhibitRule , metrics * RuleMetrics ) * InhibitRule {
180217 var (
181218 sourcem labels.Matchers
182219 targetm labels.Matchers
183220 )
221+
184222 // cr.SourceMatch will be deprecated. This for loop appends regex matchers.
185223 for ln , lv := range cr .SourceMatch {
186224 matcher , err := labels .NewMatcher (labels .MatchEqual , ln , lv )
@@ -235,6 +273,7 @@ func NewInhibitRule(cr config.InhibitRule) *InhibitRule {
235273 Equal : equal ,
236274 scache : store .NewAlerts (),
237275 sindex : newIndex (),
276+ metrics : metrics ,
238277 }
239278
240279 rule .scache .SetGCCallback (rule .gcCallback )
@@ -310,6 +349,10 @@ func (r *InhibitRule) gcCallback(alerts []types.Alert) {
310349 fp := r .fingerprintEquals (a .Labels )
311350 r .sindex .Delete (fp )
312351 }
352+ if r .Name != "" {
353+ r .metrics .sourceAlertsCacheSize .With (prometheus.Labels {"rule" : r .Name }).Set (float64 (r .scache .Len ()))
354+ r .metrics .sourceAlertsIndexSize .With (prometheus.Labels {"rule" : r .Name }).Set (float64 (r .sindex .Len ()))
355+ }
313356}
314357
315358// hasEqual checks whether the source cache contains alerts matching the equal
0 commit comments