Skip to content

Commit 1d87094

Browse files
committed
fix(start): cleanup orphaned tmux sessions on startup
Add CleanupOrphanedSessions() to kill zombie Gas Town sessions before starting new agents. This prevents session/process accumulation when gt is restarted without proper shutdown. The cleanup: - Scans for gt-* and hq-* sessions (covers polecats, witnesses, mayor, deacon) - Kills sessions where Claude is not running (zombies) - Uses KillSessionWithProcesses to properly terminate orphaned Claude processes Also adds helper functions needed by handoff.go: - KillPaneProcesses - findClaudeProcessesDescendantsOf - isDescendantOf - getParentPID This addresses the issue where over time, accumulated sessions overwhelm system memory or file descriptor limits.
1 parent f5821cc commit 1d87094

2 files changed

Lines changed: 148 additions & 0 deletions

File tree

internal/cmd/start.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,14 @@ func runStart(cmd *cobra.Command, args []string) error {
166166

167167
t := tmux.NewTmux()
168168

169+
// Clean up any orphaned sessions from previous runs before starting
170+
// This prevents session accumulation when gt is restarted without proper shutdown
171+
if cleaned, err := t.CleanupOrphanedSessions(); err != nil {
172+
fmt.Printf(" %s Could not cleanup orphaned sessions: %v\n", style.Dim.Render("○"), err)
173+
} else if cleaned > 0 {
174+
fmt.Printf(" %s Cleaned up %d orphaned session(s)\n", style.Dim.Render("○"), cleaned)
175+
}
176+
169177
fmt.Printf("Starting Gas Town from %s\n\n", style.Dim.Render(townRoot))
170178
fmt.Println("Starting all agents in parallel...")
171179
fmt.Println()

internal/tmux/tmux.go

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,104 @@ func getAllDescendants(pid string) []string {
215215
return result
216216
}
217217

218+
// isDescendantOf checks if a process is a descendant of an ancestor PID.
219+
// It does this by walking up the process tree using parent PIDs.
220+
func isDescendantOf(pid, ancestorPID string) bool {
221+
currentPID := pid
222+
maxIterations := 50 // Prevent infinite loops
223+
224+
for i := 0; i < maxIterations; i++ {
225+
if currentPID == ancestorPID {
226+
return true
227+
}
228+
229+
// Get the parent PID
230+
ppid, err := getParentPID(currentPID)
231+
if err != nil {
232+
return false
233+
}
234+
235+
// If we reached PID 1 (init), we've gone too far
236+
if ppid == "1" || ppid == "0" {
237+
return false
238+
}
239+
240+
currentPID = ppid
241+
}
242+
243+
return false
244+
}
245+
246+
// getParentPID gets the parent PID of a process using ps.
247+
func getParentPID(pid string) (string, error) {
248+
out, err := exec.Command("ps", "-o", "ppid=", "-p", pid).Output()
249+
if err != nil {
250+
return "", err
251+
}
252+
return strings.TrimSpace(string(out)), nil
253+
}
254+
255+
// findClaudeProcessesDescendantsOf finds all Claude processes that are descendants of a PID.
256+
func findClaudeProcessesDescendantsOf(ancestorPID string) []string {
257+
var result []string
258+
259+
// Find all processes with "claude" or "node" in their command line
260+
pgrepCmd := exec.Command("pgrep", "-f", "claude")
261+
claudeOut, err := pgrepCmd.Output()
262+
if err != nil {
263+
// No claude processes found, try "node" as fallback
264+
pgrepCmd = exec.Command("pgrep", "-f", "node")
265+
claudeOut, err = pgrepCmd.Output()
266+
if err != nil {
267+
return result
268+
}
269+
}
270+
271+
// For each Claude process, check if it's a descendant of the ancestor PID
272+
claudePIDs := strings.Fields(strings.TrimSpace(string(claudeOut)))
273+
for _, pid := range claudePIDs {
274+
if isDescendantOf(pid, ancestorPID) {
275+
result = append(result, pid)
276+
}
277+
}
278+
279+
return result
280+
}
281+
282+
// KillPaneProcesses kills all Claude processes in a pane without killing the pane itself.
283+
// This is useful before respawn-pane to ensure the old process is actually terminated.
284+
// The pane parameter should be a pane ID (e.g., "%0") or session:window.pane format.
285+
func (t *Tmux) KillPaneProcesses(pane string) error {
286+
// Get the pane PID
287+
pid, err := t.run("list-panes", "-t", pane, "-F", "#{pane_pid}")
288+
if err != nil {
289+
return err
290+
}
291+
292+
panePID := strings.TrimSpace(pid)
293+
if panePID == "" {
294+
return nil // No pane PID, nothing to kill
295+
}
296+
297+
// Find all Claude/node processes that are descendants of the pane PID
298+
claudePIDs := findClaudeProcessesDescendantsOf(panePID)
299+
300+
// Send SIGTERM to all Claude processes
301+
for _, pid := range claudePIDs {
302+
_ = exec.Command("kill", "-TERM", pid).Run()
303+
}
304+
305+
// Wait for graceful shutdown
306+
time.Sleep(100 * time.Millisecond)
307+
308+
// Send SIGKILL to any remaining Claude processes
309+
for _, pid := range claudePIDs {
310+
_ = exec.Command("kill", "-KILL", pid).Run()
311+
}
312+
313+
return nil
314+
}
315+
218316
// KillServer terminates the entire tmux server and all sessions.
219317
func (t *Tmux) KillServer() error {
220318
_, err := t.run("kill-server")
@@ -1268,3 +1366,45 @@ func (t *Tmux) SetPaneDiedHook(session, agentID string) error {
12681366
_, err := t.run("set-hook", "-t", session, "pane-died", hookCmd)
12691367
return err
12701368
}
1369+
1370+
// CleanupOrphanedSessions kills any Gas Town sessions that have zombie agents.
1371+
// This prevents session accumulation when gt is restarted without proper shutdown.
1372+
//
1373+
// A session is cleaned up if:
1374+
// - Its name starts with "gt-" or "hq-" (Gas Town naming convention)
1375+
// - The agent (Claude) is not running in it (zombie session)
1376+
//
1377+
// Returns the number of sessions cleaned up and any errors encountered.
1378+
func (t *Tmux) CleanupOrphanedSessions() (int, error) {
1379+
sessions, err := t.ListSessions()
1380+
if err != nil {
1381+
return 0, err
1382+
}
1383+
1384+
cleaned := 0
1385+
for _, session := range sessions {
1386+
if session == "" {
1387+
continue
1388+
}
1389+
1390+
// Only clean up Gas Town sessions (both gt-* and hq-* prefixes)
1391+
if !strings.HasPrefix(session, "gt-") && !strings.HasPrefix(session, "hq-") {
1392+
continue
1393+
}
1394+
1395+
// Check if Claude is running - if so, leave it alone
1396+
if t.IsClaudeRunning(session) {
1397+
continue
1398+
}
1399+
1400+
// Zombie session: tmux alive but Claude dead
1401+
// Kill it with process cleanup to prevent orphan processes
1402+
if err := t.KillSessionWithProcesses(session); err != nil {
1403+
// Log but continue - don't fail the whole cleanup
1404+
continue
1405+
}
1406+
cleaned++
1407+
}
1408+
1409+
return cleaned, nil
1410+
}

0 commit comments

Comments
 (0)