Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions cmd/entire/cli/checkpoint/remote/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,18 @@ import (
"strconv"
"strings"
"sync"
"time"

"github.com/entireio/cli/cmd/entire/cli/execx"
"github.com/entireio/cli/cmd/entire/cli/settings"
)

// cancelWaitDelay caps how long Wait blocks reading from inherited pipes after
// the context is cancelled and the process group has been killed. Five seconds
// is generous enough to drain legitimate buffered output but short enough to
// keep a stuck push from holding the hook open indefinitely.
const cancelWaitDelay = 5 * time.Second

// CheckpointTokenEnvVar is the environment variable for providing an access token
// used to authenticate git push/fetch operations for checkpoint branches.
// The token is injected as an HTTP Basic Authorization header per RFC 7617:
Expand Down Expand Up @@ -209,6 +217,11 @@ func newCommand(ctx context.Context, args ...string) *exec.Cmd {
mkCmd := func(finalArgs []string) *exec.Cmd {
c := exec.CommandContext(ctx, "git", finalArgs...)
c.Stdin = nil // Disconnect stdin to prevent hanging in hook context
// git push/fetch over HTTPS spawns helpers (git-remote-https, credential
// helpers) that inherit our stdio pipes. Without process-group kill +
// WaitDelay, ctx cancellation only SIGKILLs git itself; the helpers keep
// the pipe open and CombinedOutput blocks indefinitely.
execx.KillOnCancel(c, cancelWaitDelay)
return c
}

Expand Down
29 changes: 29 additions & 0 deletions cmd/entire/cli/checkpoint/remote/git_killoncancel_unix_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//go:build unix

package remote

import (
"context"
"testing"
)

// newCommand must wire up KillOnCancel so that git push/fetch subprocesses
// don't strand grandchildren (git-remote-https, credential helpers) holding
// the inherited stdio pipes when their context fires. Without this,
// tryPushSessionsCommon's 2-minute timeout was effectively unbounded — the
// real-world symptom that motivated this guard.
func TestNewCommand_AppliesKillOnCancel(t *testing.T) {
t.Parallel()

cmd := newCommand(context.Background(), "status")

if cmd.SysProcAttr == nil || !cmd.SysProcAttr.Setpgid {
t.Fatalf("newCommand should set Setpgid for process-group kill; got %+v", cmd.SysProcAttr)
}
if cmd.Cancel == nil {
t.Fatal("newCommand should set Cancel to kill the process group")
}
if cmd.WaitDelay <= 0 {
t.Fatalf("newCommand should set WaitDelay; got %s", cmd.WaitDelay)
}
}
31 changes: 31 additions & 0 deletions cmd/entire/cli/execx/killoncancel_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//go:build unix

package execx

import (
"os/exec"
"syscall"
"time"
)

// KillOnCancel configures cmd so that when its context is cancelled the entire
// process group is terminated, and any inherited stdio pipes are forcibly
// closed after waitDelay. This is necessary for subprocesses that fork helpers
// (e.g. `git push` over HTTPS spawns `git-remote-https` and credential
// helpers): SIGKILL on the direct child leaves grandchildren holding the
// inherited pipes, and CombinedOutput stays blocked on Read until they exit on
// their own. Setpgid lets Cancel kill the whole group; WaitDelay is a backstop
// in case a process slips out of the group somehow.
func KillOnCancel(cmd *exec.Cmd, waitDelay time.Duration) {
if cmd.SysProcAttr == nil {
cmd.SysProcAttr = &syscall.SysProcAttr{}
}
cmd.SysProcAttr.Setpgid = true
cmd.Cancel = func() error {
if cmd.Process == nil {
return nil
}
return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
}
cmd.WaitDelay = waitDelay
}
86 changes: 86 additions & 0 deletions cmd/entire/cli/execx/killoncancel_unix_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
//go:build unix

package execx

import (
"context"
"os/exec"
"testing"
"time"
)

// grandchildHoldingStdoutCmd returns a shell snippet that backgrounds a sleep
// (the grandchild) before waiting indefinitely in the parent. The grandchild
// inherits the parent's stdout, so any pipe-reading caller blocks on it long
// after the parent has been killed. The sleep is short enough that the
// regression-pinning test costs ~3s rather than 5s on every CI run while
// still leaving a clear gap above the 2s assertion threshold.
const grandchildHoldingStdoutCmd = "sleep 3 & echo started; wait"

// Regression: exec.CommandContext alone does not unblock CombinedOutput when
// the killed child has grandchildren still holding the inherited stdout pipe.
// This test pins that behavior so reviewers can see why KillOnCancel exists.
// If a future Go release fixes this in the stdlib, this assertion fails and
// KillOnCancel can be reconsidered.
func TestExecCommandContext_HangsOnGrandchildHoldingPipe(t *testing.T) {
t.Parallel()
if _, err := exec.LookPath("sh"); err != nil {
t.Skip("sh not available")
}

ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
defer cancel()

cmd := exec.CommandContext(ctx, "/bin/sh", "-c", grandchildHoldingStdoutCmd)

Comment on lines +27 to +35
start := time.Now()
// We expect CombinedOutput to return with an error (killed by ctx) — the
// point of the test is the duration, so the error is intentionally ignored.
_, _ = cmd.CombinedOutput() //nolint:errcheck // duration is what we assert on
elapsed := time.Since(start)

if elapsed < 2*time.Second {
t.Fatalf("expected CombinedOutput to hang on grandchild stdout; got elapsed=%s (Go may have fixed this — consider removing KillOnCancel)", elapsed)
}
}

// KillOnCancel must terminate the whole process group and release the pipes
// shortly after the context fires, even when grandchildren hold stdout.
func TestKillOnCancel_TerminatesGrandchildHoldingPipe(t *testing.T) {
t.Parallel()
if _, err := exec.LookPath("sh"); err != nil {
t.Skip("sh not available")
}

ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
defer cancel()

cmd := exec.CommandContext(ctx, "/bin/sh", "-c", grandchildHoldingStdoutCmd)
KillOnCancel(cmd, 500*time.Millisecond)

start := time.Now()
_, err := cmd.CombinedOutput()
elapsed := time.Since(start)

if err == nil {
t.Fatal("expected error from cancelled command")
}
if elapsed > 2*time.Second {
t.Fatalf("KillOnCancel should unblock CombinedOutput well before the grandchild exits; got elapsed=%s", elapsed)
}
}

func TestKillOnCancel_SetsSetpgid(t *testing.T) {
t.Parallel()
cmd := exec.CommandContext(context.Background(), "/bin/true")
KillOnCancel(cmd, time.Second)
if cmd.SysProcAttr == nil || !cmd.SysProcAttr.Setpgid {
t.Fatalf("KillOnCancel did not set Setpgid: %+v", cmd.SysProcAttr)
}
if cmd.WaitDelay != time.Second {
t.Fatalf("KillOnCancel WaitDelay = %s; want 1s", cmd.WaitDelay)
}
if cmd.Cancel == nil {
t.Fatal("KillOnCancel did not set Cancel")
}
}
47 changes: 47 additions & 0 deletions cmd/entire/cli/execx/killoncancel_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
//go:build windows

package execx

import (
"fmt"
"os/exec"
"strconv"
"syscall"
"time"
)

// KillOnCancel configures cmd so that when its context is cancelled the
// process and any descendants are terminated and any inherited stdio pipes
// are forcibly closed after waitDelay.
//
// Windows specifics: CREATE_NEW_PROCESS_GROUP isolates the child from the
// parent's console signals, but neither it nor Process.Kill terminates
// descendants — git over HTTPS spawns git-remote-https plus credential
// helpers that survive a direct kill and keep the inherited stdio open. We
// shell out to taskkill.exe /F /T (force + tree) so the whole descendant
// chain dies. If taskkill is unavailable, we fall back to the direct kill
// and rely on waitDelay to bound the user-visible hang.
func KillOnCancel(cmd *exec.Cmd, waitDelay time.Duration) {
if cmd.SysProcAttr == nil {
cmd.SysProcAttr = &syscall.SysProcAttr{}
}
cmd.SysProcAttr.CreationFlags |= syscall.CREATE_NEW_PROCESS_GROUP
cmd.Cancel = func() error {
if cmd.Process == nil {
return nil
}
kill := exec.Command("taskkill.exe", "/F", "/T", "/PID", strconv.Itoa(cmd.Process.Pid))
taskkillErr := kill.Run()
if taskkillErr == nil {
return nil
}
// taskkill is missing or refused — fall back to direct kill so at
// least the visible child terminates. WaitDelay still closes the
// pipes within bounded time, so the caller does not hang.
if killErr := cmd.Process.Kill(); killErr != nil {
return fmt.Errorf("taskkill: %w; fallback kill: %w", taskkillErr, killErr)
}
return taskkillErr
}
cmd.WaitDelay = waitDelay
}
Loading
Loading