Skip to content

Commit 7f1c287

Browse files
committed
removed goroutineProfile semaphore from goroutine leak profile collection.
1 parent e5688d0 commit 7f1c287

File tree

2 files changed

+239
-13
lines changed

2 files changed

+239
-13
lines changed

src/runtime/mprof.go

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1261,7 +1261,7 @@ func goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.P
12611261

12621262
//go:linkname pprof_goroutineLeakProfileWithLabels
12631263
func pprof_goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
1264-
return goroutineLeakProfileWithLabelsConcurrent(p, labels)
1264+
return goroutineLeakProfileWithLabels(p, labels)
12651265
}
12661266

12671267
// labels may be nil. If labels is non-nil, it must have the same length as p.
@@ -1323,38 +1323,37 @@ func goroutineLeakProfileWithLabelsConcurrent(p []profilerecord.StackRecord, lab
13231323
return work.goroutineLeak.count, false
13241324
}
13251325

1326-
// Use the same semaphore as goroutineProfileWithLabelsConcurrent,
1327-
// because ultimately we still use goroutine profiles.
1328-
semacquire(&goroutineProfile.sema)
1329-
13301326
// Unlike in goroutineProfileWithLabelsConcurrent, we don't need to
13311327
// save the current goroutine stack, because it is obviously not leaked.
1332-
1328+
// We also do not need acquire any semaphores on goroutineProfile, because
1329+
// we don't use it for storage.
13331330
pcbuf := makeProfStack() // see saveg() for explanation
13341331

13351332
// Prepare a profile large enough to store all leaked goroutines.
13361333
n = work.goroutineLeak.count
13371334

13381335
if n > len(p) {
1339-
// There's not enough space in p to store the whole profile, so (per the
1340-
// contract of runtime.GoroutineProfile) we're not allowed to write to p
1341-
// at all and must return n, false.
1342-
semrelease(&goroutineProfile.sema)
1336+
// There's not enough space in p to store the whole profile, so
1337+
// we're not allowed to write to p at all and must return n, false.
13431338
return n, false
13441339
}
13451340

13461341
// Visit each leaked goroutine and try to record its stack.
1342+
var offset int
13471343
forEachGRace(func(gp1 *g) {
13481344
if readgstatus(gp1) == _Gleaked {
1349-
doRecordGoroutineProfile(gp1, pcbuf)
1345+
systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &p[offset], pcbuf) })
1346+
if labels != nil {
1347+
labels[offset] = gp1.labels
1348+
}
1349+
offset++
13501350
}
13511351
})
13521352

13531353
if raceenabled {
13541354
raceacquire(unsafe.Pointer(&labelSync))
13551355
}
13561356

1357-
semrelease(&goroutineProfile.sema)
13581357
return n, true
13591358
}
13601359

src/runtime/pprof/pprof_test.go

Lines changed: 228 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"context"
1212
"fmt"
1313
"internal/abi"
14+
"internal/goexperiment"
1415
"internal/profile"
1516
"internal/syscall/unix"
1617
"internal/testenv"
@@ -774,7 +775,11 @@ func TestMorestack(t *testing.T) {
774775
for {
775776
go func() {
776777
growstack1()
777-
c <- true
778+
// NOTE(vsaioc): This goroutine may leak without this select.
779+
select {
780+
case c <- true:
781+
case <-time.After(duration):
782+
}
778783
}()
779784
select {
780785
case <-t:
@@ -1565,6 +1570,228 @@ func containsCountsLabels(prof *profile.Profile, countLabels map[int64]map[strin
15651570
return true
15661571
}
15671572

1573+
func goroutineLeakExample() {
1574+
<-make(chan struct{})
1575+
panic("unreachable")
1576+
}
1577+
1578+
func TestGoroutineLeakProfileConcurrency(t *testing.T) {
1579+
if !goexperiment.GoroutineLeakProfile {
1580+
// Do not run this test if the experimental flag is not enabled.
1581+
t.Skip("goroutine leak profile is not enabled")
1582+
}
1583+
const leakCount = 3
1584+
1585+
testenv.MustHaveParallelism(t)
1586+
regexLeakCount := regexp.MustCompile("goroutineleak profile: total ")
1587+
whiteSpace := regexp.MustCompile("\\s+")
1588+
1589+
// Regular goroutine profile. Used to check that there is no interference between
1590+
// the two profile types.
1591+
goroutineProf := Lookup("goroutine")
1592+
goroutineLeakProf := Lookup("goroutineleak")
1593+
1594+
// Check that a profile with debug information contains
1595+
includesLeak := func(t *testing.T, name, s string) {
1596+
if !strings.Contains(s, "runtime/pprof.goroutineLeakExample") {
1597+
t.Errorf("%s profile does not contain expected leaked goroutine (runtime/pprof.goroutineLeakExample): %s", name, s)
1598+
}
1599+
}
1600+
1601+
checkFrame := func(i int, j int, locations []*profile.Location, expectedFunctionName string) {
1602+
if len(locations) <= i {
1603+
t.Errorf("leaked goroutine stack locations out of range at %d of %d", i+1, len(locations))
1604+
return
1605+
}
1606+
location := locations[i]
1607+
if len(location.Line) <= j {
1608+
t.Errorf("leaked goroutine stack location lines out of range at %d of %d", j+1, len(location.Line))
1609+
return
1610+
}
1611+
if location.Line[j].Function.Name != expectedFunctionName {
1612+
t.Errorf("leaked goroutine stack expected %s as the location[%d].Line[%d] but found %s (%s:%d)", expectedFunctionName, i, j, location.Line[j].Function.Name, location.Line[j].Function.Filename, location.Line[j].Line)
1613+
}
1614+
}
1615+
1616+
// We use this helper to count the total number of leaked goroutines in the profile.
1617+
//
1618+
// NOTE(vsaioc): This value should match for the number of leaks produced in this test,
1619+
// but other tests could also leak goroutines, in which case we would have a mismatch
1620+
// when bulk-running tests.
1621+
//
1622+
// The two mismatching outcomes are therefore:
1623+
// - More leaks than expected, which is a correctness issue with other tests.
1624+
// In this case, this test effectively checks other tests wrt
1625+
// goroutine leaks running tests in bulk (e.g., by running all.bash).
1626+
//
1627+
// - Fewer leaks than expected; this is an unfortunate symptom of scheduling
1628+
// non-determinism, which may occur once in a blue moon. We make
1629+
// a best-effort attempt to allow the expected leaks to occur, by yielding
1630+
// the main thread, but it is never a guarantee.
1631+
countLeaks := func(t *testing.T, number int, s string) {
1632+
// Strip the profile header
1633+
parts := regexLeakCount.Split(s, -1)
1634+
if len(parts) < 2 {
1635+
t.Fatalf("goroutineleak profile does not contain 'goroutineleak profile: total ': %s\nparts: %v", s, parts)
1636+
return
1637+
}
1638+
1639+
parts = whiteSpace.Split(parts[1], -1)
1640+
1641+
count, err := strconv.ParseInt(parts[0], 10, 64)
1642+
if err != nil {
1643+
t.Fatalf("goroutineleak profile count is not a number: %s\nerror: %v", s, err)
1644+
}
1645+
1646+
// Check that the total number of leaked goroutines is exactly the expected number.
1647+
if count != int64(number) {
1648+
t.Errorf("goroutineleak profile does not contain exactly %d leaked goroutines: %d", number, count)
1649+
}
1650+
}
1651+
1652+
checkLeakStack := func(t *testing.T) func(pc uintptr, locations []*profile.Location, _ map[string][]string) {
1653+
return func(pc uintptr, locations []*profile.Location, _ map[string][]string) {
1654+
if pc != leakCount {
1655+
t.Errorf("expected %d leaked goroutines with specific stack configurations, but found %d", leakCount, pc)
1656+
return
1657+
}
1658+
switch len(locations) {
1659+
case 4:
1660+
// We expect a receive operation. This is the typical stack.
1661+
checkFrame(0, 0, locations, "runtime.gopark")
1662+
checkFrame(1, 0, locations, "runtime.chanrecv")
1663+
checkFrame(2, 0, locations, "runtime.chanrecv1")
1664+
switch len(locations[3].Line) {
1665+
case 2:
1666+
// Running `go func() { goroutineLeakExample() }()` will produce a stack with 2 lines.
1667+
// The anonymous function will have the call to goroutineLeakExample inlined.
1668+
checkFrame(3, 1, locations, "runtime/pprof.TestGoroutineLeakProfileConcurrency.func5")
1669+
fallthrough
1670+
case 1:
1671+
// Running `go goroutineLeakExample()` will produce a stack with 1 line.
1672+
checkFrame(3, 0, locations, "runtime/pprof.goroutineLeakExample")
1673+
default:
1674+
t.Errorf("leaked goroutine stack location expected 1 or 2 lines in the 4th location but found %d", len(locations[3].Line))
1675+
return
1676+
}
1677+
default:
1678+
message := fmt.Sprintf("leaked goroutine stack expected 4 or 5 locations but found %d", len(locations))
1679+
for _, location := range locations {
1680+
for _, line := range location.Line {
1681+
message += fmt.Sprintf("\n%s:%d", line.Function.Name, line.Line)
1682+
}
1683+
}
1684+
t.Errorf("%s", message)
1685+
}
1686+
}
1687+
}
1688+
// Leak some goroutines that will feature in the goroutine leak profile
1689+
for i := 0; i < leakCount; i++ {
1690+
go goroutineLeakExample()
1691+
go func() {
1692+
// Leak another goroutine that will feature a slightly different stack.
1693+
// This includes the frame runtime/pprof.TestGoroutineLeakProfileConcurrency.func1.
1694+
goroutineLeakExample()
1695+
panic("unreachable")
1696+
}()
1697+
// Yield several times to allow the goroutines to leak.
1698+
runtime.Gosched()
1699+
runtime.Gosched()
1700+
}
1701+
1702+
// Give all goroutines a chance to leak.
1703+
time.Sleep(time.Second)
1704+
1705+
t.Run("profile contains leak", func(t *testing.T) {
1706+
var w strings.Builder
1707+
goroutineLeakProf.WriteTo(&w, 0)
1708+
parseProfile(t, []byte(w.String()), checkLeakStack(t))
1709+
})
1710+
1711+
t.Run("leak persists between sequential profiling runs", func(t *testing.T) {
1712+
for i := 0; i < 2; i++ {
1713+
var w strings.Builder
1714+
goroutineLeakProf.WriteTo(&w, 0)
1715+
parseProfile(t, []byte(w.String()), checkLeakStack(t))
1716+
}
1717+
})
1718+
1719+
// Concurrent calls to the goroutine leak profiler should not trigger data races
1720+
// or corruption.
1721+
t.Run("overlapping profile requests", func(t *testing.T) {
1722+
ctx := context.Background()
1723+
ctx, cancel := context.WithTimeout(ctx, time.Second)
1724+
defer cancel()
1725+
1726+
var wg sync.WaitGroup
1727+
for i := 0; i < 2; i++ {
1728+
wg.Add(1)
1729+
Do(ctx, Labels("i", fmt.Sprint(i)), func(context.Context) {
1730+
go func() {
1731+
defer wg.Done()
1732+
for ctx.Err() == nil {
1733+
var w strings.Builder
1734+
goroutineLeakProf.WriteTo(&w, 1)
1735+
// NOTE(vsaioc): We cannot always guarantee that the leak will actually be recorded in
1736+
// the profile when making concurrent goroutine leak requests, because the GC runs
1737+
// concurrently with the profiler and may reset the leaked goroutine status before
1738+
// a concurrent profiler has the chance to record it.
1739+
//
1740+
// However, the goroutine leak count will persist. Still, we give some leeway by making
1741+
// it an inequality, just in case other tests in the suite start leaking goroutines.
1742+
countLeaks(t, 2*leakCount, w.String())
1743+
}
1744+
}()
1745+
})
1746+
}
1747+
wg.Wait()
1748+
})
1749+
1750+
// Concurrent calls to the goroutine leak profiler should not trigger data races
1751+
// or corruption, or interfere with regular goroutine profiles.
1752+
t.Run("overlapping goroutine and goroutine leak profile requests", func(t *testing.T) {
1753+
ctx := context.Background()
1754+
ctx, cancel := context.WithTimeout(ctx, time.Second)
1755+
defer cancel()
1756+
1757+
var wg sync.WaitGroup
1758+
for i := 0; i < 2; i++ {
1759+
wg.Add(2)
1760+
Do(ctx, Labels("i", fmt.Sprint(i)), func(context.Context) {
1761+
go func() {
1762+
defer wg.Done()
1763+
for ctx.Err() == nil {
1764+
var w strings.Builder
1765+
goroutineLeakProf.WriteTo(&w, 1)
1766+
// NOTE(vsaioc): We cannot always guarantee that the leak will
1767+
// actually be recorded in the profile during concurrent
1768+
// goroutine leak profile requests. The GC runs concurrently with
1769+
// the profiler and may reset the leaked goroutine status before
1770+
// the profiler has the chance to record the leaked stacks.
1771+
//
1772+
// However, the leaked goroutine count is not reset.
1773+
// Other tests are not expected to leak goroutines,
1774+
// so the leak count is expected to be consistent.
1775+
countLeaks(t, 2*leakCount, w.String())
1776+
}
1777+
}()
1778+
go func() {
1779+
defer wg.Done()
1780+
for ctx.Err() == nil {
1781+
var w strings.Builder
1782+
goroutineProf.WriteTo(&w, 1)
1783+
// The regular goroutine profile should see the leaked
1784+
// goroutines. We simply check that the goroutine leak
1785+
// profile does not corrupt the goroutine profile state.
1786+
includesLeak(t, "goroutine", w.String())
1787+
}
1788+
}()
1789+
})
1790+
}
1791+
wg.Wait()
1792+
})
1793+
}
1794+
15681795
func TestGoroutineProfileConcurrency(t *testing.T) {
15691796
testenv.MustHaveParallelism(t)
15701797

0 commit comments

Comments
 (0)