From 57ca3db6567df26f01082cd49040da5b1c4ebd7a Mon Sep 17 00:00:00 2001 From: Sergey Nikitin Date: Tue, 2 Sep 2025 15:09:36 +0200 Subject: [PATCH 1/3] feat: Add metrics per specific step in the CI job In order to get more detailed overview of the CI runs, we want to add a new metric github_workflow_job_step_run_duration_seconds which is give the run stats for each job in each workflow. This way we will be able to see the full picture of the runs and see what takes the most of the time --- pkg/actionsmetrics/event_reader.go | 28 ++++++++++++++++++++++++++++ pkg/actionsmetrics/metrics.go | 14 ++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/pkg/actionsmetrics/event_reader.go b/pkg/actionsmetrics/event_reader.go index 3e14b8921a..75ccef3322 100644 --- a/pkg/actionsmetrics/event_reader.go +++ b/pkg/actionsmetrics/event_reader.go @@ -74,8 +74,10 @@ func (reader *EventReader) ProcessWorkflowJobEvent(ctx context.Context, event in labels["job_name"] = *e.WorkflowJob.Name keysAndValues = append(keysAndValues, "job_name", *e.WorkflowJob.Name) + var repoName string if e.Repo != nil { if n := e.Repo.Name; n != nil { + repoName = *n labels["repository"] = *n keysAndValues = append(keysAndValues, "repository", *n) } @@ -210,6 +212,32 @@ func (reader *EventReader) ProcessWorkflowJobEvent(ctx context.Context, event in githubWorkflowJobRunDurationSeconds.With(extraLabel("job_conclusion", *e.WorkflowJob.Conclusion, labels)).Observe(*runTimeSeconds) } } + + trackedRepositories := map[string]bool{ + "client": true, + "client-dev": true, + } + + if _, ok := trackedRepositories[repoName]; ok { + for _, step := range e.WorkflowJob.Steps { + if step.StartedAt == nil || step.CompletedAt == nil || step.Conclusion == nil || step.Name == nil || step.Status == nil || step.Number == nil { + continue // Skip steps with incomplete data + } + + stepLabels := extraLabel("step_name", *step.Name, labels) + stepLabels["step_number"] = fmt.Sprint(step.Number) + stepLabels["step_conclusion"] = *step.Conclusion + stepLabels["step_status"] = *step.Status + + stepDuration := step.CompletedAt.Sub(step.StartedAt.Time) + + githubWorkflowJobStepDurationSeconds.With(stepLabels).Observe(stepDuration.Seconds()) + + log.Info("processed step", "step_name", *step.Name, "step_conclusion", *step.Conclusion, "step_duration_seconds", stepDuration.Seconds()) + } + } else { + log.Info("skipping the step reporting for the repo", "repo_name", repoName) + } } } diff --git a/pkg/actionsmetrics/metrics.go b/pkg/actionsmetrics/metrics.go index bb13e879d2..c31cf4c3ae 100644 --- a/pkg/actionsmetrics/metrics.go +++ b/pkg/actionsmetrics/metrics.go @@ -42,10 +42,12 @@ func init() { githubWorkflowJobQueueDurationSeconds = initGithubWorkflowJobQueueDurationSeconds(queueBuckets) githubWorkflowJobRunDurationSeconds = initGithubWorkflowJobRunDurationSeconds(runBuckets) + githubWorkflowJobStepDurationSeconds = initGithubWorkflowJobStepDurationSeconds(runBuckets) metrics.Registry.MustRegister( githubWorkflowJobQueueDurationSeconds, githubWorkflowJobRunDurationSeconds, + githubWorkflowJobStepDurationSeconds, githubWorkflowJobConclusionsTotal, githubWorkflowJobsQueuedTotal, githubWorkflowJobsStartedTotal, @@ -145,10 +147,22 @@ func initGithubWorkflowJobRunDurationSeconds(buckets []float64) *prometheus.Hist ) } +func initGithubWorkflowJobStepDurationSeconds(buckets []float64) *prometheus.HistogramVec { + return prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "github_workflow_job_step_run_duration_seconds", + Help: "Run times for the steps in workflow jobs in seconds", + Buckets: buckets, + }, + metricLabels("step_name", "step_number", "step_conclusion", "step_status"), + ) +} + var ( commonLabels = []string{"runs_on", "job_name", "organization", "repository", "owner", "workflow_name", "is_main_branch"} githubWorkflowJobQueueDurationSeconds *prometheus.HistogramVec githubWorkflowJobRunDurationSeconds *prometheus.HistogramVec + githubWorkflowJobStepDurationSeconds *prometheus.HistogramVec githubWorkflowJobConclusionsTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "github_workflow_job_conclusions_total", From 307a4d90ab7da84275d3e42a434b61bb72276dea Mon Sep 17 00:00:00 2001 From: Sergey Nikitin Date: Wed, 3 Sep 2025 11:32:19 +0200 Subject: [PATCH 2/3] only track succeeded jobs --- pkg/actionsmetrics/event_reader.go | 36 ++++++++++++++---------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/pkg/actionsmetrics/event_reader.go b/pkg/actionsmetrics/event_reader.go index 75ccef3322..6373048a17 100644 --- a/pkg/actionsmetrics/event_reader.go +++ b/pkg/actionsmetrics/event_reader.go @@ -213,30 +213,28 @@ func (reader *EventReader) ProcessWorkflowJobEvent(ctx context.Context, event in } } - trackedRepositories := map[string]bool{ - "client": true, - "client-dev": true, - } - - if _, ok := trackedRepositories[repoName]; ok { - for _, step := range e.WorkflowJob.Steps { - if step.StartedAt == nil || step.CompletedAt == nil || step.Conclusion == nil || step.Name == nil || step.Status == nil || step.Number == nil { - continue // Skip steps with incomplete data - } + if *e.WorkflowJob.Conclusion == "success" { + trackedRepositories := map[string]bool{ + "client": true, + "client-dev": true, + } - stepLabels := extraLabel("step_name", *step.Name, labels) - stepLabels["step_number"] = fmt.Sprint(step.Number) - stepLabels["step_conclusion"] = *step.Conclusion - stepLabels["step_status"] = *step.Status + if _, ok := trackedRepositories[repoName]; ok { + for _, step := range e.WorkflowJob.Steps { + stepLabels := extraLabel("step_name", *step.Name, labels) + stepLabels["step_number"] = fmt.Sprint(step.Number) + stepLabels["step_conclusion"] = *step.Conclusion + stepLabels["step_status"] = *step.Status - stepDuration := step.CompletedAt.Sub(step.StartedAt.Time) + stepDuration := step.CompletedAt.Sub(step.StartedAt.Time) - githubWorkflowJobStepDurationSeconds.With(stepLabels).Observe(stepDuration.Seconds()) + githubWorkflowJobStepDurationSeconds.With(stepLabels).Observe(stepDuration.Seconds()) - log.Info("processed step", "step_name", *step.Name, "step_conclusion", *step.Conclusion, "step_duration_seconds", stepDuration.Seconds()) + log.Info("processed step", "step_name", *step.Name, "step_conclusion", *step.Conclusion, "step_duration_seconds", stepDuration.Seconds()) + } + } else { + log.Info("skipping the step reporting for the repo", "repo_name", repoName) } - } else { - log.Info("skipping the step reporting for the repo", "repo_name", repoName) } } } From 9117a35e959f3044c0598b65e5ec856eff7efd58 Mon Sep 17 00:00:00 2001 From: Sergey Nikitin Date: Wed, 3 Sep 2025 17:24:56 +0200 Subject: [PATCH 3/3] only check client repo --- pkg/actionsmetrics/event_reader.go | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/pkg/actionsmetrics/event_reader.go b/pkg/actionsmetrics/event_reader.go index 6373048a17..f6f5b5902a 100644 --- a/pkg/actionsmetrics/event_reader.go +++ b/pkg/actionsmetrics/event_reader.go @@ -74,7 +74,7 @@ func (reader *EventReader) ProcessWorkflowJobEvent(ctx context.Context, event in labels["job_name"] = *e.WorkflowJob.Name keysAndValues = append(keysAndValues, "job_name", *e.WorkflowJob.Name) - var repoName string + var repoName = "" if e.Repo != nil { if n := e.Repo.Name; n != nil { repoName = *n @@ -213,27 +213,18 @@ func (reader *EventReader) ProcessWorkflowJobEvent(ctx context.Context, event in } } - if *e.WorkflowJob.Conclusion == "success" { - trackedRepositories := map[string]bool{ - "client": true, - "client-dev": true, - } - - if _, ok := trackedRepositories[repoName]; ok { - for _, step := range e.WorkflowJob.Steps { - stepLabels := extraLabel("step_name", *step.Name, labels) - stepLabels["step_number"] = fmt.Sprint(step.Number) - stepLabels["step_conclusion"] = *step.Conclusion - stepLabels["step_status"] = *step.Status + if *e.WorkflowJob.Conclusion == "success" && repoName == "client" { + for _, step := range e.WorkflowJob.Steps { + stepLabels := extraLabel("step_name", *step.Name, labels) + stepLabels["step_number"] = fmt.Sprint(step.Number) + stepLabels["step_conclusion"] = *step.Conclusion + stepLabels["step_status"] = *step.Status - stepDuration := step.CompletedAt.Sub(step.StartedAt.Time) + stepDuration := step.CompletedAt.Sub(step.StartedAt.Time) - githubWorkflowJobStepDurationSeconds.With(stepLabels).Observe(stepDuration.Seconds()) + githubWorkflowJobStepDurationSeconds.With(stepLabels).Observe(stepDuration.Seconds()) - log.Info("processed step", "step_name", *step.Name, "step_conclusion", *step.Conclusion, "step_duration_seconds", stepDuration.Seconds()) - } - } else { - log.Info("skipping the step reporting for the repo", "repo_name", repoName) + log.Info("processed step in the repository", "repo", repoName, "step_name", *step.Name, "step_conclusion", *step.Conclusion) } } }