Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TRT-1912: Treat flake as failures in component readiness #2172

Merged
merged 3 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions config/views.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
pass_rate_required_new_tests: 95
include_multi_release_analysis: true
metrics:
Expand Down Expand Up @@ -122,6 +123,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
metrics:
enabled: false
regression_tracking:
Expand Down Expand Up @@ -182,6 +184,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
metrics:
enabled: false
regression_tracking:
Expand Down Expand Up @@ -241,6 +244,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
pass_rate_required_all_tests: 90
metrics:
enabled: false
Expand Down Expand Up @@ -302,6 +306,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
pass_rate_required_new_tests: 95
metrics:
enabled: true
Expand Down Expand Up @@ -365,6 +370,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
metrics:
enabled: false
regression_tracking:
Expand Down Expand Up @@ -427,6 +433,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
metrics:
enabled: false
regression_tracking:
Expand Down Expand Up @@ -488,6 +495,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
pass_rate_required_all_tests: 90
metrics:
enabled: false
Expand Down Expand Up @@ -548,6 +556,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
metrics:
enabled: true
regression_tracking:
Expand Down Expand Up @@ -605,6 +614,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
metrics:
enabled: true
regression_tracking:
Expand Down
58 changes: 32 additions & 26 deletions pkg/api/componentreadiness/component_report.go
Original file line number Diff line number Diff line change
Expand Up @@ -627,8 +627,7 @@ func (c *componentReportGenerator) normalizeProwJobName(prowName string) string
}

func (c *componentReportGenerator) fetchJobRunTestStatusResults(ctx context.Context,
query *bigquery.Query) (map[string][]crtype.
JobRunTestStatusRow, []error) {
query *bigquery.Query) (map[string][]crtype.JobRunTestStatusRow, []error) {
errs := []error{}
status := map[string][]crtype.JobRunTestStatusRow{}
log.Infof("Fetching job run test details with:\n%s\nParameters:\n%+v\n", query.Q, query.Parameters)
Expand Down Expand Up @@ -911,9 +910,7 @@ type triagedIncidentsGenerator struct {
SampleRelease crtype.RequestReleaseOptions
}

func (t *triagedIncidentsGenerator) generateTriagedIssuesFor(ctx context.Context) (resolvedissues.
TriagedIncidentsForRelease,
[]error) {
func (t *triagedIncidentsGenerator) generateTriagedIssuesFor(ctx context.Context) (resolvedissues.TriagedIncidentsForRelease, []error) {
before := time.Now()
incidents, errs := t.queryTriagedIssues(ctx)

Expand Down Expand Up @@ -1102,7 +1099,7 @@ func (c *componentReportGenerator) matchBaseRegression(testID crtype.ReportTestI
baseRegression = regressionallowances.IntentionalRegressionFor(baseRelease, testID.ColumnIdentification, testID.TestID)

// This could go away if we remove the option for ignoring fallback
if baseRegression != nil && baseRegression.PreviousPassPercentage() > getTestStatusSuccessRate(baseStats) {
if baseRegression != nil && baseRegression.PreviousPassPercentage(c.FlakeAsFailure) > c.getTestStatusPassRate(baseStats) {
Copy link
Contributor

@neisw neisw Dec 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we update places like these as well so that flakes can be counted as failures when c.FlakeAsFailure is enabled?

func (c *componentReportGenerator) getTestStatusPassRate(testStatus crtype.TestStatus) float64 {

func getFailureCount(status crtype.JobRunTestStatusRow) int {

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh, nevermind so you are separating out the success, failure and flake and then checking the flag in getPassRate

// override with the basis regression previous values
// testStats will reflect the expected threshold, not the computed values from the release with the allowed regression
baseRegressionPreviousRelease, err := previousRelease(c.BaseRelease.Release)
Expand Down Expand Up @@ -1466,15 +1463,18 @@ func getFailureCount(status crtype.JobRunTestStatusRow) int {
return failure
}

func getTestStatusSuccessRate(testStatus crtype.TestStatus) float64 {
return getSuccessRate(testStatus.SuccessCount, testStatus.TotalCount-testStatus.SuccessCount-testStatus.FlakeCount, testStatus.FlakeCount)
func (c *componentReportGenerator) getTestStatusPassRate(testStatus crtype.TestStatus) float64 {
return c.getPassRate(testStatus.SuccessCount, testStatus.TotalCount-testStatus.SuccessCount-testStatus.FlakeCount, testStatus.FlakeCount)
}

func getSuccessRate(success, failure, flake int) float64 {
func (c *componentReportGenerator) getPassRate(success, failure, flake int) float64 {
total := success + failure + flake
if total == 0 {
return 0.0
}
if c.FlakeAsFailure {
return float64(success) / float64(total)
}
return float64(success+flake) / float64(total)
}

Expand All @@ -1494,7 +1494,7 @@ func getRegressionStatus(basisPassPercentage, samplePassPercentage float64, isTr

func (c *componentReportGenerator) getEffectivePityFactor(basisPassPercentage float64, approvedRegression *regressionallowances.IntentionalRegression) int {
if approvedRegression != nil && approvedRegression.RegressedFailures > 0 {
regressedPassPercentage := approvedRegression.RegressedPassPercentage()
regressedPassPercentage := approvedRegression.RegressedPassPercentage(c.FlakeAsFailure)
if regressedPassPercentage < basisPassPercentage {
// product owner chose a required pass percentage, so we allow pity to cover that approved pass percent
// plus the existing pity factor to limit, "well, it's just *barely* lower" arguments.
Expand Down Expand Up @@ -1569,7 +1569,7 @@ func (c *componentReportGenerator) assessComponentStatus(
Start: baseStart,
End: baseEnd,
TestDetailsTestStats: crtype.TestDetailsTestStats{
SuccessRate: getSuccessRate(baseSuccess, baseFailure, baseFlake),
SuccessRate: c.getPassRate(baseSuccess, baseFailure, baseFlake),
SuccessCount: baseSuccess,
FailureCount: baseFailure,
FlakeCount: baseFlake,
Expand All @@ -1592,7 +1592,7 @@ func (c *componentReportGenerator) buildFisherExactTestStats(requiredConfidence,
Start: baseStart,
End: baseEnd,
TestDetailsTestStats: crtype.TestDetailsTestStats{
SuccessRate: getSuccessRate(baseSuccess, baseFailure, baseFlake),
SuccessRate: c.getPassRate(baseSuccess, baseFailure, baseFlake),
SuccessCount: baseSuccess,
FailureCount: baseFailure,
FlakeCount: baseFlake,
Expand All @@ -1606,7 +1606,7 @@ func (c *componentReportGenerator) buildFisherExactTestStats(requiredConfidence,
Start: &c.SampleRelease.Start,
End: &c.SampleRelease.End,
TestDetailsTestStats: crtype.TestDetailsTestStats{
SuccessRate: getSuccessRate(sampleSuccess, sampleFailure, sampleFlake),
SuccessRate: c.getPassRate(sampleSuccess, sampleFailure, sampleFlake),
SuccessCount: sampleSuccess,
FailureCount: sampleFailure,
FlakeCount: sampleFlake,
Expand All @@ -1625,16 +1625,22 @@ func (c *componentReportGenerator) buildFisherExactTestStats(requiredConfidence,
}
} else {
// see if we had a significant regression prior to adjusting
basisPassPercentage := float64(baseSuccess+baseFlake) / float64(baseTotal)
initialPassPercentage := float64(sampleSuccess+sampleFlake) / float64(initialSampleTotal)
basePass := baseSuccess + baseFlake
samplePass := sampleSuccess + sampleFlake
if c.FlakeAsFailure {
basePass = baseSuccess
samplePass = sampleSuccess
}
basisPassPercentage := float64(basePass) / float64(baseTotal)
initialPassPercentage := float64(samplePass) / float64(initialSampleTotal)
effectivePityFactor := c.getEffectivePityFactor(basisPassPercentage, approvedRegression)

wasSignificant := false
// only consider wasSignificant if the sampleTotal has been changed and our sample
// pass percentage is below the basis
if initialSampleTotal > sampleTotal && initialPassPercentage < basisPassPercentage {
if basisPassPercentage-initialPassPercentage > float64(c.PityFactor)/100 {
wasSignificant, _ = c.fischerExactTest(requiredConfidence, initialSampleTotal, sampleSuccess, sampleFlake, baseTotal, baseSuccess, baseFlake)
wasSignificant, _ = c.fischerExactTest(requiredConfidence, initialSampleTotal-samplePass, samplePass, baseTotal-basePass, basePass)
}
// if it was significant without the adjustment use
// ExtremeTriagedRegression or SignificantTriagedRegression
Expand Down Expand Up @@ -1666,10 +1672,10 @@ func (c *componentReportGenerator) buildFisherExactTestStats(requiredConfidence,
}

// now that we know sampleTotal is non zero
samplePassPercentage := float64(sampleSuccess+sampleFlake) / float64(sampleTotal)
samplePassPercentage := float64(samplePass) / float64(sampleTotal)

// did we remove enough failures that we are below the MinimumFailure threshold?
if c.MinimumFailure != 0 && (sampleTotal-sampleSuccess-sampleFlake) < c.MinimumFailure {
if c.MinimumFailure != 0 && (sampleTotal-samplePass) < c.MinimumFailure {
testStats.ReportStatus = status
testStats.FisherExact = thrift.Float64Ptr(0.0)
return testStats
Expand All @@ -1679,9 +1685,9 @@ func (c *componentReportGenerator) buildFisherExactTestStats(requiredConfidence,

if improved {
// flip base and sample when improved
significant, fisherExact = c.fischerExactTest(requiredConfidence, baseTotal, baseSuccess, baseFlake, sampleTotal, sampleSuccess, sampleFlake)
significant, fisherExact = c.fischerExactTest(requiredConfidence, baseTotal-basePass, basePass, sampleTotal-samplePass, samplePass)
} else if basisPassPercentage-samplePassPercentage > float64(effectivePityFactor)/100 {
significant, fisherExact = c.fischerExactTest(requiredConfidence, sampleTotal, sampleSuccess, sampleFlake, baseTotal, baseSuccess, baseFlake)
significant, fisherExact = c.fischerExactTest(requiredConfidence, sampleTotal-samplePass, samplePass, baseTotal-basePass, basePass)
}
if significant {
if improved {
Expand Down Expand Up @@ -1716,7 +1722,7 @@ func (c *componentReportGenerator) buildFisherExactTestStats(requiredConfidence,
}

func (c *componentReportGenerator) buildPassRateTestStats(sampleSuccess, sampleFailure, sampleFlake int, requiredSuccessRate float64) crtype.ReportTestStats {
successRate := getSuccessRate(sampleSuccess, sampleFailure, sampleFlake)
successRate := c.getPassRate(sampleSuccess, sampleFailure, sampleFlake)

// Assume 2x our allowed failure rate = an extreme regression.
// i.e. if we require 90%, extreme is anything below 80%
Expand Down Expand Up @@ -1756,11 +1762,11 @@ func (c *componentReportGenerator) buildPassRateTestStats(sampleSuccess, sampleF
}
}

func (c *componentReportGenerator) fischerExactTest(confidenceRequired, sampleTotal, sampleSuccess, sampleFlake, baseTotal, baseSuccess, baseFlake int) (bool, float64) {
_, _, r, _ := fischer.FisherExactTest(sampleTotal-sampleSuccess-sampleFlake,
sampleSuccess+sampleFlake,
baseTotal-baseSuccess-baseFlake,
baseSuccess+baseFlake)
func (c *componentReportGenerator) fischerExactTest(confidenceRequired, sampleFailure, sampleSuccess, baseFailure, baseSuccess int) (bool, float64) {
_, _, r, _ := fischer.FisherExactTest(sampleFailure,
sampleSuccess,
baseFailure,
baseSuccess)
return r < 1-float64(confidenceRequired)/100, r
}

Expand Down
Loading