Skip to content

Commit 16fb87a

Browse files
authoredDec 5, 2024
maintainer, coordinator: close table trigger dispatcher before maintainer exit (pingcap#639)
* close table trigger when maintainer exit
1 parent 31cbc13 commit 16fb87a

File tree

8 files changed

+262
-101
lines changed

8 files changed

+262
-101
lines changed
 

‎coordinator/operator/operator_move.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ func (m *MoveMaintainerOperator) OnNodeRemove(n node.ID) {
106106
return
107107
}
108108

109-
log.Info("changefeed changefeedIsRemoved from dest node",
109+
log.Info("changefeed changefeed is removed from dest node",
110110
zap.String("dest", m.dest.String()),
111111
zap.String("origin", m.origin.String()),
112112
zap.String("changefeed", m.changefeed.ID.String()))

‎downstreamadapter/dispatchermanager/event_dispatcher_manager.go

+37-26
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,13 @@ type EventDispatcherManager struct {
100100
cancel context.CancelFunc
101101
wg sync.WaitGroup
102102

103-
tableEventDispatcherCount prometheus.Gauge
104-
metricCreateDispatcherDuration prometheus.Observer
105-
metricCheckpointTs prometheus.Gauge
106-
metricCheckpointTsLag prometheus.Gauge
107-
metricResolvedTs prometheus.Gauge
108-
metricResolvedTsLag prometheus.Gauge
103+
metricTableTriggerEventDispatcherCount prometheus.Gauge
104+
metricEventDispatcherCount prometheus.Gauge
105+
metricCreateDispatcherDuration prometheus.Observer
106+
metricCheckpointTs prometheus.Gauge
107+
metricCheckpointTsLag prometheus.Gauge
108+
metricResolvedTs prometheus.Gauge
109+
metricResolvedTsLag prometheus.Gauge
109110
}
110111

111112
// return actual startTs of the table trigger event dispatcher
@@ -118,23 +119,24 @@ func NewEventDispatcherManager(
118119
maintainerID node.ID) (*EventDispatcherManager, uint64, error) {
119120
ctx, cancel := context.WithCancel(context.Background())
120121
manager := &EventDispatcherManager{
121-
dispatcherMap: newDispatcherMap(),
122-
changefeedID: changefeedID,
123-
maintainerID: maintainerID,
124-
statusesChan: make(chan TableSpanStatusWithSeq, 8192),
125-
blockStatusesChan: make(chan *heartbeatpb.TableSpanBlockStatus, 1024*1024),
126-
errCh: make(chan error, 1),
127-
cancel: cancel,
128-
config: cfConfig,
129-
filterConfig: toFilterConfigPB(cfConfig.Filter),
130-
schemaIDToDispatchers: dispatcher.NewSchemaIDToDispatchers(),
131-
latestWatermark: NewWatermark(startTs),
132-
tableEventDispatcherCount: metrics.TableEventDispatcherGauge.WithLabelValues(changefeedID.Namespace(), changefeedID.Name()),
133-
metricCreateDispatcherDuration: metrics.CreateDispatcherDuration.WithLabelValues(changefeedID.Namespace(), changefeedID.Name()),
134-
metricCheckpointTs: metrics.EventDispatcherManagerCheckpointTsGauge.WithLabelValues(changefeedID.Namespace(), changefeedID.Name()),
135-
metricCheckpointTsLag: metrics.EventDispatcherManagerCheckpointTsLagGauge.WithLabelValues(changefeedID.Namespace(), changefeedID.Name()),
136-
metricResolvedTs: metrics.EventDispatcherManagerResolvedTsGauge.WithLabelValues(changefeedID.Namespace(), changefeedID.Name()),
137-
metricResolvedTsLag: metrics.EventDispatcherManagerResolvedTsLagGauge.WithLabelValues(changefeedID.Namespace(), changefeedID.Name()),
122+
dispatcherMap: newDispatcherMap(),
123+
changefeedID: changefeedID,
124+
maintainerID: maintainerID,
125+
statusesChan: make(chan TableSpanStatusWithSeq, 8192),
126+
blockStatusesChan: make(chan *heartbeatpb.TableSpanBlockStatus, 1024*1024),
127+
errCh: make(chan error, 1),
128+
cancel: cancel,
129+
config: cfConfig,
130+
filterConfig: toFilterConfigPB(cfConfig.Filter),
131+
schemaIDToDispatchers: dispatcher.NewSchemaIDToDispatchers(),
132+
latestWatermark: NewWatermark(startTs),
133+
metricTableTriggerEventDispatcherCount: metrics.TableTriggerEventDispatcherGauge.WithLabelValues(changefeedID.Namespace(), changefeedID.Name()),
134+
metricEventDispatcherCount: metrics.EventDispatcherGauge.WithLabelValues(changefeedID.Namespace(), changefeedID.Name()),
135+
metricCreateDispatcherDuration: metrics.CreateDispatcherDuration.WithLabelValues(changefeedID.Namespace(), changefeedID.Name()),
136+
metricCheckpointTs: metrics.EventDispatcherManagerCheckpointTsGauge.WithLabelValues(changefeedID.Namespace(), changefeedID.Name()),
137+
metricCheckpointTsLag: metrics.EventDispatcherManagerCheckpointTsLagGauge.WithLabelValues(changefeedID.Namespace(), changefeedID.Name()),
138+
metricResolvedTs: metrics.EventDispatcherManagerResolvedTsGauge.WithLabelValues(changefeedID.Namespace(), changefeedID.Name()),
139+
metricResolvedTsLag: metrics.EventDispatcherManagerResolvedTsLagGauge.WithLabelValues(changefeedID.Namespace(), changefeedID.Name()),
138140
}
139141

140142
// Set Sync Point Config
@@ -256,7 +258,8 @@ func (e *EventDispatcherManager) close(remove bool) {
256258
e.cancel()
257259
e.wg.Wait()
258260

259-
metrics.TableEventDispatcherGauge.DeleteLabelValues(e.changefeedID.Namespace(), e.changefeedID.Name())
261+
metrics.TableTriggerEventDispatcherGauge.DeleteLabelValues(e.changefeedID.Namespace(), e.changefeedID.Name())
262+
metrics.EventDispatcherGauge.DeleteLabelValues(e.changefeedID.Namespace(), e.changefeedID.Name())
260263
metrics.CreateDispatcherDuration.DeleteLabelValues(e.changefeedID.Namespace(), e.changefeedID.Name())
261264
metrics.EventDispatcherManagerCheckpointTsGauge.DeleteLabelValues(e.changefeedID.Namespace(), e.changefeedID.Name())
262265
metrics.EventDispatcherManagerResolvedTsGauge.DeleteLabelValues(e.changefeedID.Namespace(), e.changefeedID.Name())
@@ -385,7 +388,11 @@ func (e *EventDispatcherManager) newDispatchers(infos []dispatcherCreateInfo) er
385388
Seq: seq,
386389
}
387390

388-
e.tableEventDispatcherCount.Inc()
391+
if d.IsTableTriggerEventDispatcher() {
392+
e.metricTableTriggerEventDispatcherCount.Inc()
393+
} else {
394+
e.metricEventDispatcherCount.Inc()
395+
}
389396

390397
log.Info("new dispatcher created",
391398
zap.String("ID", id.String()),
@@ -637,7 +644,11 @@ func (e *EventDispatcherManager) cleanDispatcher(id common.DispatcherID, schemaI
637644
if e.tableTriggerEventDispatcher != nil && e.tableTriggerEventDispatcher.GetId() == id {
638645
e.tableTriggerEventDispatcher = nil
639646
}
640-
e.tableEventDispatcherCount.Dec()
647+
if id == e.tableTriggerEventDispatcher.GetId() {
648+
e.metricTableTriggerEventDispatcherCount.Dec()
649+
} else {
650+
e.metricEventDispatcherCount.Dec()
651+
}
641652
log.Info("table event dispatcher completely stopped, and delete it from event dispatcher manager", zap.Any("dispatcher id", id))
642653
}
643654

‎maintainer/maintainer.go

+8-8
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ type Maintainer struct {
8585
// the dispatcher id of table trigger event dispatcher, it's generated by maintainer
8686
// table trigger event dispatcher runs on the same node with maintainer,
8787
// so when a maintainer is created, that means the dispatcher is gone and must be recreated.
88-
tableTriggerEventDispatcherID common.DispatcherID
88+
ddlSpan *replica.SpanReplication
8989

9090
pdEndpoints []string
9191
nodeManager *watcher.NodeManager
@@ -157,8 +157,7 @@ func NewMaintainer(cfID common.ChangeFeedID,
157157
cascadeRemoving: false,
158158
config: cfg,
159159

160-
tableTriggerEventDispatcherID: tableTriggerEventDispatcherID,
161-
160+
ddlSpan: ddlSpan,
162161
watermark: &heartbeatpb.Watermark{
163162
CheckpointTs: checkpointTs,
164163
ResolvedTs: checkpointTs,
@@ -337,7 +336,7 @@ func (m *Maintainer) onMessage(msg *messaging.TargetMessage) {
337336
case messaging.TypeMaintainerPostBootstrapResponse:
338337
m.onMaintainerPostBootstrapResponse(msg)
339338
case messaging.TypeMaintainerCloseResponse:
340-
m.onNodeClosed(msg.From, msg.Message[0].(*heartbeatpb.MaintainerCloseResponse))
339+
m.onMaintainerCloseResponse(msg.From, msg.Message[0].(*heartbeatpb.MaintainerCloseResponse))
341340
case messaging.TypeRemoveMaintainerRequest:
342341
req := msg.Message[0].(*heartbeatpb.RemoveMaintainerRequest)
343342
m.onRemoveMaintainer(req.Cascade, req.Removed)
@@ -571,7 +570,7 @@ func (m *Maintainer) sendPostBootstrapRequest() {
571570
}
572571
}
573572

574-
func (m *Maintainer) onNodeClosed(from node.ID, response *heartbeatpb.MaintainerCloseResponse) {
573+
func (m *Maintainer) onMaintainerCloseResponse(from node.ID, response *heartbeatpb.MaintainerCloseResponse) {
575574
if response.Success {
576575
m.nodesClosed[from] = struct{}{}
577576
}
@@ -601,7 +600,8 @@ func (m *Maintainer) tryCloseChangefeed() bool {
601600
m.statusChanged.Store(true)
602601
}
603602
if !m.cascadeRemoving {
604-
return true
603+
m.controller.RemoveTasksByTableIDs(m.ddlSpan.Span.TableID)
604+
return !m.ddlSpan.IsWorking()
605605
}
606606
return m.sendMaintainerCloseRequestToAllNode()
607607
}
@@ -687,8 +687,8 @@ func (m *Maintainer) getNewBootstrapFn() bootstrap.NewBootstrapMessageFn {
687687
if id == m.selfNode.ID {
688688
log.Info("create table event trigger dispatcher", zap.String("changefeed", m.id.String()),
689689
zap.String("server", id.String()),
690-
zap.String("dispatcher id", m.tableTriggerEventDispatcherID.String()))
691-
msg.TableTriggerEventDispatcherId = m.tableTriggerEventDispatcherID.ToPB()
690+
zap.String("dispatcher id", m.ddlSpan.ID.String()))
691+
msg.TableTriggerEventDispatcherId = m.ddlSpan.ID.ToPB()
692692
}
693693
log.Info("send maintainer bootstrap message",
694694
zap.String("changefeed", m.id.String()),

‎maintainer/maintainer_test.go

+36-9
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"net/http"
2020
"net/http/pprof"
2121
"strconv"
22+
"sync"
2223
"testing"
2324
"time"
2425

@@ -85,7 +86,7 @@ func (m *mockDispatcherManager) handleMessage(msg *messaging.TargetMessage) {
8586
case messaging.TypeMaintainerBootstrapRequest:
8687
m.onBootstrapRequest(msg)
8788
case messaging.TypeMaintainerPostBootstrapRequest:
88-
89+
m.onPostBootstrapRequest(msg)
8990
case messaging.TypeScheduleDispatcherRequest:
9091
m.onDispatchRequest(msg)
9192
case messaging.TypeMaintainerCloseRequest:
@@ -133,6 +134,14 @@ func (m *mockDispatcherManager) onBootstrapRequest(msg *messaging.TargetMessage)
133134
}
134135
m.changefeedID = req.ChangefeedID
135136
m.checkpointTs = req.StartTs
137+
if req.TableTriggerEventDispatcherId != nil {
138+
m.dispatchersMap[*req.TableTriggerEventDispatcherId] = &heartbeatpb.TableSpanStatus{
139+
ID: req.TableTriggerEventDispatcherId,
140+
ComponentStatus: heartbeatpb.ComponentState_Working,
141+
CheckpointTs: req.StartTs,
142+
}
143+
m.dispatchers = append(m.dispatchers, m.dispatchersMap[*req.TableTriggerEventDispatcherId])
144+
}
136145
err := m.mc.SendCommand(messaging.NewSingleTargetMessage(
137146
m.maintainerID,
138147
messaging.MaintainerManagerTopic,
@@ -237,8 +246,7 @@ func (m *mockDispatcherManager) sendHeartbeat() {
237246
}
238247

239248
func TestMaintainerSchedule(t *testing.T) {
240-
ctx := context.Background()
241-
ctx, cancel := context.WithCancel(ctx)
249+
ctx, cancel := context.WithCancel(context.Background())
242250
mux := http.NewServeMux()
243251
registry := prometheus.NewRegistry()
244252
metrics.InitMetrics(registry)
@@ -302,7 +310,13 @@ func TestMaintainerSchedule(t *testing.T) {
302310
return nil
303311
})
304312
dispatcherManager := MockDispatcherManager(mc, n.ID)
305-
go dispatcherManager.Run(ctx)
313+
314+
wg := &sync.WaitGroup{}
315+
wg.Add(1)
316+
go func() {
317+
defer wg.Done()
318+
require.ErrorIs(t, dispatcherManager.Run(ctx), context.Canceled)
319+
}()
306320

307321
taskScheduler := threadpool.NewThreadPoolDefault()
308322
tsoClient := &mockTsoClient{}
@@ -327,10 +341,23 @@ func TestMaintainerSchedule(t *testing.T) {
327341
}, time.Now().Add(time.Millisecond*500))
328342
time.Sleep(time.Second * time.Duration(sleepTime))
329343

344+
require.Eventually(t, func() bool {
345+
return maintainer.ddlSpan.IsWorking() && maintainer.postBootstrapMsg == nil
346+
}, time.Second*2, time.Millisecond*100)
347+
348+
require.Eventually(t, func() bool {
349+
return maintainer.controller.replicationDB.GetReplicatingSize() == tableSize
350+
}, time.Second*2, time.Millisecond*100)
351+
352+
require.Eventually(t, func() bool {
353+
return maintainer.controller.GetTaskSizeByNodeID(n.ID) == tableSize
354+
}, time.Second*2, time.Millisecond*100)
355+
356+
maintainer.onRemoveMaintainer(false, false)
357+
require.Eventually(t, func() bool {
358+
return maintainer.tryCloseChangefeed()
359+
}, time.Second*200, time.Millisecond*100)
360+
330361
cancel()
331-
// stream.Close()
332-
require.Equal(t, tableSize,
333-
maintainer.controller.replicationDB.GetReplicatingSize())
334-
require.Equal(t, tableSize,
335-
maintainer.controller.GetTaskSizeByNodeID(n.ID))
362+
wg.Wait()
336363
}

‎maintainer/operator/operator_remove.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ func NewRemoveDispatcherOperator(db *replica.ReplicationDB, replicaSet *replica.
4444
func (m *RemoveDispatcherOperator) Check(from node.ID, status *heartbeatpb.TableSpanStatus) {
4545
if !m.finished.Load() && from == m.replicaSet.GetNodeID() &&
4646
status.ComponentStatus != heartbeatpb.ComponentState_Working {
47+
m.replicaSet.UpdateStatus(status)
4748
log.Info("dispatcher report non-working status",
4849
zap.String("replicaSet", m.replicaSet.ID.String()))
4950
m.finished.Store(true)
@@ -70,7 +71,8 @@ func (m *RemoveDispatcherOperator) IsFinished() bool {
7071
}
7172

7273
func (m *RemoveDispatcherOperator) OnTaskRemoved() {
73-
m.finished.Store(true)
74+
panic("unreachable")
75+
// m.finished.Store(true)
7476
}
7577

7678
func (m *RemoveDispatcherOperator) Start() {

‎maintainer/replica/replication_span.go

+5
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,11 @@ func (r *SpanReplication) IsDropped() bool {
158158
// return false
159159
}
160160

161+
func (r *SpanReplication) IsWorking() bool {
162+
status := r.status.Load()
163+
return status.ComponentStatus == heartbeatpb.ComponentState_Working
164+
}
165+
161166
func (r *SpanReplication) UpdateBlockState(newState heartbeatpb.State) {
162167
r.blockState.Store(&newState)
163168
}

‎metrics/grafana/ticdc_new_arch.json

+159-50
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,8 @@
130130
"editable": true,
131131
"gnetId": null,
132132
"graphTooltip": 1,
133-
"id": 32,
134-
"iteration": 1732345252721,
133+
"id": null,
134+
"iteration": 1733385885504,
135135
"links": [],
136136
"panels": [
137137
{
@@ -1524,7 +1524,7 @@
15241524
"h": 6,
15251525
"w": 12,
15261526
"x": 0,
1527-
"y": 22
1527+
"y": 3
15281528
},
15291529
"hiddenSeries": false,
15301530
"id": 10069,
@@ -1643,7 +1643,7 @@
16431643
"h": 6,
16441644
"w": 12,
16451645
"x": 12,
1646-
"y": 22
1646+
"y": 3
16471647
},
16481648
"hiddenSeries": false,
16491649
"id": 10071,
@@ -1724,6 +1724,121 @@
17241724
"align": false,
17251725
"alignLevel": null
17261726
}
1727+
},
1728+
{
1729+
"aliasColors": {},
1730+
"bars": false,
1731+
"dashLength": 10,
1732+
"dashes": false,
1733+
"datasource": "${DS_C1}",
1734+
"decimals": 1,
1735+
"description": "",
1736+
"editable": true,
1737+
"error": false,
1738+
"fieldConfig": {
1739+
"defaults": {
1740+
"unit": "none"
1741+
},
1742+
"overrides": []
1743+
},
1744+
"fill": 0,
1745+
"fillGradient": 0,
1746+
"grid": {},
1747+
"gridPos": {
1748+
"h": 6,
1749+
"w": 12,
1750+
"x": 0,
1751+
"y": 9
1752+
},
1753+
"hiddenSeries": false,
1754+
"id": 20037,
1755+
"legend": {
1756+
"alignAsTable": false,
1757+
"avg": false,
1758+
"current": false,
1759+
"max": false,
1760+
"min": false,
1761+
"rightSide": false,
1762+
"show": true,
1763+
"sideWidth": null,
1764+
"sort": null,
1765+
"sortDesc": null,
1766+
"total": false,
1767+
"values": false
1768+
},
1769+
"lines": true,
1770+
"linewidth": 1,
1771+
"links": [],
1772+
"nullPointMode": "null",
1773+
"options": {
1774+
"alertThreshold": true
1775+
},
1776+
"paceLength": 10,
1777+
"percentage": false,
1778+
"pluginVersion": "7.5.17",
1779+
"pointradius": 1,
1780+
"points": true,
1781+
"renderer": "flot",
1782+
"seriesOverrides": [],
1783+
"spaceLength": 10,
1784+
"stack": false,
1785+
"steppedLine": false,
1786+
"targets": [
1787+
{
1788+
"exemplar": true,
1789+
"expr": "sum(ticdc_dispatchermanager_table_trigger_event_dispatcher_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$ticdc_instance\", namespace=~\"$namespace\", changefeed=~\"$changefeed\"}) by (instance, changefeed)",
1790+
"format": "time_series",
1791+
"hide": false,
1792+
"interval": "",
1793+
"intervalFactor": 2,
1794+
"legendFormat": "{{namespace}}-{{changefeed}}-{{instance}}",
1795+
"refId": "A",
1796+
"step": 10
1797+
}
1798+
],
1799+
"thresholds": [],
1800+
"timeFrom": null,
1801+
"timeRegions": [],
1802+
"timeShift": null,
1803+
"title": "Table Trigger Dispatcher Count",
1804+
"tooltip": {
1805+
"msResolution": false,
1806+
"shared": true,
1807+
"sort": 0,
1808+
"value_type": "individual"
1809+
},
1810+
"type": "graph",
1811+
"xaxis": {
1812+
"buckets": null,
1813+
"mode": "time",
1814+
"name": null,
1815+
"show": true,
1816+
"values": []
1817+
},
1818+
"yaxes": [
1819+
{
1820+
"$$hashKey": "object:1202",
1821+
"format": "none",
1822+
"label": null,
1823+
"logBase": 1,
1824+
"max": null,
1825+
"min": "0",
1826+
"show": true
1827+
},
1828+
{
1829+
"$$hashKey": "object:1203",
1830+
"format": "none",
1831+
"label": null,
1832+
"logBase": 1,
1833+
"max": null,
1834+
"min": null,
1835+
"show": false
1836+
}
1837+
],
1838+
"yaxis": {
1839+
"align": false,
1840+
"alignLevel": null
1841+
}
17271842
}
17281843
],
17291844
"title": "Maintainer",
@@ -1801,7 +1916,7 @@
18011916
"targets": [
18021917
{
18031918
"exemplar": true,
1804-
"expr": "sum(tigate_dispatchermanager_table_event_dispatcher_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$ticdc_instance\", namespace=~\"$namespace\", changefeed=~\"$changefeed\"}) by (instance, changefeed)",
1919+
"expr": "sum(ticdc_dispatchermanager_table_event_dispatcher_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$ticdc_instance\", namespace=~\"$namespace\", changefeed=~\"$changefeed\"}) by (instance, changefeed)",
18051920
"format": "time_series",
18061921
"hide": false,
18071922
"interval": "",
@@ -1916,7 +2031,7 @@
19162031
"targets": [
19172032
{
19182033
"exemplar": true,
1919-
"expr": "sum(tigate_dispatchermanagermanager_event_dispatcher_manager_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$ticdc_instance\", namespace=~\"$namespace\", changefeed=~\"$changefeed\"}) by (instance)",
2034+
"expr": "sum(ticdc_dispatchermanagermanager_event_dispatcher_manager_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$ticdc_instance\", namespace=~\"$namespace\", changefeed=~\"$changefeed\"}) by (instance)",
19202035
"format": "time_series",
19212036
"hide": false,
19222037
"interval": "",
@@ -25981,6 +26096,13 @@
2598126096
"renderer": "flot",
2598226097
"repeat": null,
2598326098
"repeatDirection": "h",
26099+
"scopedVars": {
26100+
"runtime_instance": {
26101+
"selected": false,
26102+
"text": "10.2.6.240:8300",
26103+
"value": "10.2.6.240:8300"
26104+
}
26105+
},
2598426106
"seriesOverrides": [
2598526107
{
2598626108
"alias": "alloc-from-os",
@@ -26155,6 +26277,13 @@
2615526277
"renderer": "flot",
2615626278
"repeat": null,
2615726279
"repeatDirection": "h",
26280+
"scopedVars": {
26281+
"runtime_instance": {
26282+
"selected": false,
26283+
"text": "10.2.6.240:8300",
26284+
"value": "10.2.6.240:8300"
26285+
}
26286+
},
2615826287
"seriesOverrides": [],
2615926288
"spaceLength": 10,
2616026289
"stack": false,
@@ -26250,6 +26379,13 @@
2625026379
"pointradius": 5,
2625126380
"points": false,
2625226381
"renderer": "flot",
26382+
"scopedVars": {
26383+
"runtime_instance": {
26384+
"selected": false,
26385+
"text": "10.2.6.240:8300",
26386+
"value": "10.2.6.240:8300"
26387+
}
26388+
},
2625326389
"seriesOverrides": [
2625426390
{}
2625526391
],
@@ -26360,6 +26496,13 @@
2636026496
"pointradius": 5,
2636126497
"points": false,
2636226498
"renderer": "flot",
26499+
"scopedVars": {
26500+
"runtime_instance": {
26501+
"selected": false,
26502+
"text": "10.2.6.240:8300",
26503+
"value": "10.2.6.240:8300"
26504+
}
26505+
},
2636326506
"seriesOverrides": [
2636426507
{
2636526508
"alias": "sweep",
@@ -26461,7 +26604,7 @@
2646126604
"h": 1,
2646226605
"w": 24,
2646326606
"x": 0,
26464-
"y": 25
26607+
"y": 26
2646526608
},
2646626609
"id": 10000,
2646726610
"panels": [
@@ -27769,12 +27912,7 @@
2776927912
"list": [
2777027913
{
2777127914
"allValue": null,
27772-
"current": {
27773-
"isNone": true,
27774-
"selected": false,
27775-
"text": "None",
27776-
"value": ""
27777-
},
27915+
"current": {},
2777827916
"datasource": "${DS_C1}",
2777927917
"definition": "",
2778027918
"description": null,
@@ -27801,12 +27939,7 @@
2780127939
},
2780227940
{
2780327941
"allValue": null,
27804-
"current": {
27805-
"isNone": true,
27806-
"selected": false,
27807-
"text": "None",
27808-
"value": ""
27809-
},
27942+
"current": {},
2781027943
"datasource": "${DS_C1}",
2781127944
"definition": "",
2781227945
"description": null,
@@ -27833,11 +27966,7 @@
2783327966
},
2783427967
{
2783527968
"allValue": ".*",
27836-
"current": {
27837-
"selected": false,
27838-
"text": "All",
27839-
"value": "$__all"
27840-
},
27969+
"current": {},
2784127970
"datasource": "${DS_C1}",
2784227971
"definition": "label_values(ticdc_processor_processor_tick_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, namespace)",
2784327972
"description": null,
@@ -27864,11 +27993,7 @@
2786427993
},
2786527994
{
2786627995
"allValue": ".*",
27867-
"current": {
27868-
"selected": false,
27869-
"text": "All",
27870-
"value": "$__all"
27871-
},
27996+
"current": {},
2787227997
"datasource": "${DS_C1}",
2787327998
"definition": "label_values(ticdc_processor_processor_tick_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, changefeed)",
2787427999
"description": null,
@@ -27895,11 +28020,7 @@
2789528020
},
2789628021
{
2789728022
"allValue": ".*",
27898-
"current": {
27899-
"selected": false,
27900-
"text": "All",
27901-
"value": "$__all"
27902-
},
28023+
"current": {},
2790328024
"datasource": "${DS_C1}",
2790428025
"definition": "label_values(process_start_time_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", job=\"ticdc\"}, instance)",
2790528026
"description": null,
@@ -27926,11 +28047,7 @@
2792628047
},
2792728048
{
2792828049
"allValue": ".*",
27929-
"current": {
27930-
"selected": false,
27931-
"text": "All",
27932-
"value": "$__all"
27933-
},
28050+
"current": {},
2793428051
"datasource": "${DS_C1}",
2793528052
"definition": "label_values(tikv_engine_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, instance)",
2793628053
"description": null,
@@ -28013,11 +28130,7 @@
2801328130
},
2801428131
{
2801528132
"allValue": "",
28016-
"current": {
28017-
"selected": false,
28018-
"text": "All",
28019-
"value": "$__all"
28020-
},
28133+
"current": {},
2802128134
"datasource": "${DS_C1}",
2802228135
"definition": "label_values(process_start_time_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", job=\"ticdc\"}, instance)",
2802328136
"description": null,
@@ -28044,11 +28157,7 @@
2804428157
},
2804528158
{
2804628159
"allValue": null,
28047-
"current": {
28048-
"selected": false,
28049-
"text": "All",
28050-
"value": "$__all"
28051-
},
28160+
"current": {},
2805228161
"datasource": "${DS_C1}",
2805328162
"definition": "label_values(ticdc_actor_number_of_workers{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", job=\"ticdc\"}, name)",
2805428163
"description": null,
@@ -28107,5 +28216,5 @@
2810728216
"timezone": "browser",
2810828217
"title": "${DS_C1}-TiCDC-New-Arch",
2810928218
"uid": "YiGL8hBZ0aab",
28110-
"version": 2
28219+
"version": 3
2811128220
}

‎pkg/metrics/dispatcher.go

+13-6
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,25 @@ package metrics
1616
import "github.com/prometheus/client_golang/prometheus"
1717

1818
var (
19-
// EventDispatcherManagerGauge is the metrics collector related to dispatcher manager.
2019
EventDispatcherManagerGauge = prometheus.NewGaugeVec(
2120
prometheus.GaugeOpts{
22-
Namespace: "tigate",
21+
Namespace: "ticdc",
2322
Subsystem: "dispatchermanagermanager",
2423
Name: "event_dispatcher_manager_count",
2524
Help: "The number of event dispatcher managers",
2625
}, []string{"namespace", "changefeed"})
2726

28-
// TableEventDispatcherGauge is the metrics collector related to dispatcher manager.
29-
TableEventDispatcherGauge = prometheus.NewGaugeVec(
27+
TableTriggerEventDispatcherGauge = prometheus.NewGaugeVec(
28+
prometheus.GaugeOpts{
29+
Namespace: "ticdc",
30+
Subsystem: "dispatchermanager",
31+
Name: "table_trigger_event_dispatcher_count",
32+
Help: "The number of table event dispatchers",
33+
}, []string{"namespace", "changefeed"})
34+
35+
EventDispatcherGauge = prometheus.NewGaugeVec(
3036
prometheus.GaugeOpts{
31-
Namespace: "tigate",
37+
Namespace: "ticdc",
3238
Subsystem: "dispatchermanager",
3339
Name: "table_event_dispatcher_count",
3440
Help: "The number of table event dispatchers",
@@ -124,7 +130,8 @@ var (
124130

125131
func InitDispatcherMetrics(registry *prometheus.Registry) {
126132
registry.MustRegister(EventDispatcherManagerGauge)
127-
registry.MustRegister(TableEventDispatcherGauge)
133+
registry.MustRegister(TableTriggerEventDispatcherGauge)
134+
registry.MustRegister(EventDispatcherGauge)
128135
registry.MustRegister(CreateDispatcherDuration)
129136
registry.MustRegister(EventDispatcherManagerResolvedTsGauge)
130137
registry.MustRegister(EventDispatcherManagerResolvedTsLagGauge)

0 commit comments

Comments
 (0)
Please sign in to comment.