diff --git a/docs/configuration.md b/docs/configuration.md index 04cacbc..72d91c7 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -116,17 +116,49 @@ Provider-specific fields: #### trayTypes Defines one or more tray "profiles" that the Tray Manager can maintain. -| Key | Type | Required | Description | -|---------------------|--------------------|----------|--------------------------------------------------------------------------------| -| name | string | yes | Unique name for the tray type. Also used as the runner scale set name/label. | -| provider | string | yes | Name of a provider defined in `providers`. | -| runnerGroupId | int | yes | GitHub Runner Group ID to register runners into. | -| githubOrg | string | yes | The GitHub org key, matching one of the entries under `github`. | -| shutdown | bool | no | Whether instances should self-terminate when the job completes. | -| maxTrays | int | no | Maximum number of concurrent trays of this type. | -| maxParallelCreation | int | no | Maximum number of trays to create in parallel. Defaults to 10. | -| extraMetadata | map[string]string | no | Extra key-value metadata passed to the provider (e.g., GCE instance metadata). | -| config | provider-dependent | yes | Provider-specific configuration for how to create a tray (see below). | +| Key | Type | Required | Description | +|---------------------|--------------------|----------|------------------------------------------------------------------------------------------------------------| +| name | string | yes | Unique name for the tray type. Also used as the runner scale set name/label. | +| provider | string | yes | Name of a provider defined in `providers`. | +| runnerGroupId | int | yes | GitHub Runner Group ID to register runners into. | +| githubOrg | string | yes | The GitHub org key, matching one of the entries under `github`. | +| shutdown | bool | no | Whether instances should self-terminate when the job completes. | +| maxTrays | int | no | Maximum number of concurrent trays of this type. | +| maxParallelCreation | int | no | Maximum number of trays to create in parallel. Defaults to 10. | +| runnerVersion | string | no | Pin the GitHub Actions runner version the agent downloads. Empty -> latest from GH releases. | +| bootstrap | object | no | Provider-injected agent bootstrap (see below). Enabled by default; set `bootstrap.enabled: false` to opt out. | +| extraMetadata | map[string]string | no | Extra key-value metadata passed to the provider (e.g., GCE instance metadata). | +| config | provider-dependent | yes | Provider-specific configuration for how to create a tray (see below). | + +#### bootstrap + +When enabled, the provider injects a script into the spawned tray that +downloads the cattery agent binary from `/agent/download` and +starts it. The agent in turn downloads the GitHub Actions runner if it is not +already present on disk. + +This means a fresh VM image only needs the OS plus whatever heavy tooling the +user wants (Docker, language runtimes, security agents). Cattery handles +installing itself and the runner. + +| Key | Type | Required | Description | +|--------------|--------|----------|------------------------------------------------------------------------------------------------------------------------| +| enabled | bool | no | Master switch. Defaults to `true`. Set `false` for legacy pre-baked images that already start the agent themselves. | +| os | string | no | Selects the built-in script template. Default: `linux`. | +| agentFolder | string | no | Where to download the cattery binary on the tray. Default: `/opt/cattery`. | +| runnerFolder | string | no | Where to install the GH Actions runner. Default: `/opt/cattery/actions-runner`. Passed to the agent as `--runner-folder`. | +| user | string | no | OS user to run the agent as. Default: empty (script runs as whatever user the provider's delivery mechanism uses). | +| script | string | no | Override the built-in template. Treated as a Go `text/template` with `{{.ServerURL}}`, `{{.AgentID}}`, `{{.AgentFolder}}`, `{{.RunnerFolder}}`, `{{.User}}` available. | + +Provider delivery: + +- **gce**: script is set as the `startup-script` instance metadata key. +- **docker**: script is piped to `/bin/sh -s` as the container's entrypoint stdin. + +**Migration note**: If you previously relied on a pre-baked image with its own +systemd unit (e.g. `cattery.service` + `install-agent.sh`) starting the agent, +add `bootstrap: { enabled: false }` to those tray types after upgrading. +Otherwise the injected startup script will spawn a second agent. Provider-specific config under trayType.config: diff --git a/examples/example-config.yaml b/examples/example-config.yaml index f18917a..fba7f72 100644 --- a/examples/example-config.yaml +++ b/examples/example-config.yaml @@ -32,15 +32,17 @@ providers: credentialsFile: path/to/credentials.json trayTypes: - - name: cattery-tiny - provider: docker-local - shutdown: false - runnerGroupId: 3 - githubOrg: My-Github-Org - config: - image: cattery-runner-tiny:latest + # Bootstrap is enabled by default for every tray type. The provider injects + # a script that downloads the cattery agent from /agent/download and runs it. + # The agent itself downloads the GH Actions runner if not present on disk + # (latest release by default, or pinned via runnerVersion). - - name: cattery-gce + - name: cattery-gce-default + provider: gce + githubOrg: My-Github-Org + runnerGroupId: 3 + shutdown: true + maxTrays: 3 config: instanceTemplate: global/instanceTemplates/ machineType: e2-standard-2 @@ -48,12 +50,50 @@ trayTypes: zones: - us-west1-a - us-west1-b - extraMetadata: - # can be: version (0.0.2), server (to download binary from server) or a commit hash - cattery-agent-version: 0.0.4 - # cattery-agent-version: 13d197aa1e73db09514772e55794b3a0f9b7952b + + - name: cattery-gce-custom + provider: gce githubOrg: My-Github-Org - maxTrays: 3 # max number of VMs of this type + runnerGroupId: 3 + shutdown: true + runnerVersion: "2.332.0" # pin runner version (default: latest) + bootstrap: + user: cattery # run agent as this user (default: root) + agentFolder: /home/cattery + runnerFolder: /home/cattery/actions-runner + # script: | # optional template override + # #!/bin/bash + # curl -sSfL {{.ServerURL}}/agent/download -o /tmp/cattery + # ... + config: + instanceTemplate: global/instanceTemplates/ + machineType: e2-standard-2 + project: my-gcp-project + zones: + - us-west1-a + + # Opt out: legacy pre-baked image where the agent is already installed and + # started by your own systemd unit (e.g. the cattery.service chain). + - name: cattery-gce-legacy provider: gce - runnerGroupId: 3 # check in github org settings -> Runner groups + githubOrg: My-Github-Org + runnerGroupId: 3 shutdown: true + bootstrap: + enabled: false + extraMetadata: + cattery-agent-version: 0.0.4 # consumed by your image's install-agent.sh + config: + instanceTemplate: global/instanceTemplates/legacy-image + machineType: e2-standard-2 + project: my-gcp-project + zones: + - us-west1-a + + - name: cattery-tiny + provider: docker-local + shutdown: false + runnerGroupId: 3 + githubOrg: My-Github-Org + config: + image: ubuntu:24.04 # plain image -- bootstrap installs the agent diff --git a/src/agent/agent.go b/src/agent/agent.go index f688d3c..d09972f 100644 --- a/src/agent/agent.go +++ b/src/agent/agent.go @@ -3,6 +3,7 @@ package agent import ( "cattery/agent/catteryClient" "cattery/agent/githubListener" + "cattery/agent/runner" "cattery/agent/tools" "cattery/lib/agents" "cattery/lib/messages" @@ -41,6 +42,7 @@ type CatteryAgent struct { agent *agents.Agent agentId string + runnerFolder string listenerExecPath string } @@ -48,6 +50,7 @@ func NewCatteryAgent(runnerFolder string, catteryServerUrl string, agentId strin return &CatteryAgent{ logger: log.WithFields(log.Fields{"name": "agent", "agentId": agentId}), catteryClient: catteryClient.NewCatteryClient(catteryServerUrl, agentId), + runnerFolder: runnerFolder, listenerExecPath: path.Join(runnerFolder, "bin", "Runner.Listener"), agentId: agentId, } @@ -56,12 +59,20 @@ func NewCatteryAgent(runnerFolder string, catteryServerUrl string, agentId strin func (a *CatteryAgent) Start() { a.logger.Info("Starting Cattery Agent") - agent, jitConfig, err := a.catteryClient.RegisterAgent(a.agentId) + resp, err := a.catteryClient.RegisterAgent(a.agentId) if err != nil { a.logger.Errorf("Failed to register agent: %v", err) return } - a.agent = agent + a.agent = &resp.Agent + jitConfig := &resp.JitConfig + + // Ensure the GH Actions runner distribution is present on disk before we + // try to launch Runner.Listener. No-op when the runner is pre-baked. + if err := runner.EnsureRunner(a.runnerFolder, resp.RunnerVersion); err != nil { + a.logger.Errorf("Failed to ensure runner: %v", err) + return + } a.logger.Info("Agent registered, starting Listener") diff --git a/src/agent/catteryClient/client.go b/src/agent/catteryClient/client.go index 2acf8f2..f39be4f 100644 --- a/src/agent/catteryClient/client.go +++ b/src/agent/catteryClient/client.go @@ -29,42 +29,43 @@ func NewCatteryClient(baseURL string, agentId string) *CatteryClient { } } -// RegisterAgent request just-in-time runner configuration from the Cattery server -// and returns the configuration as a base64 encoded string +// RegisterAgent requests just-in-time runner configuration from the Cattery server. +// Returns the full RegisterResponse so callers can read the JIT config, agent info, +// and runner version (used for runner bootstrap). // // https://docs.github.com/en/rest/actions/self-hosted-runners?apiVersion=2022-11-28#create-configuration-for-a-just-in-time-runner-for-an-organization -func (c *CatteryClient) RegisterAgent(id string) (*agents.Agent, *string, error) { +func (c *CatteryClient) RegisterAgent(id string) (*messages.RegisterResponse, error) { client := c.httpClient requestUrl, err := url.JoinPath(c.baseURL, "/agent", "register/", id) if err != nil { - return nil, nil, err + return nil, err } request, err := http.NewRequest("GET", requestUrl, nil) if err != nil { - return nil, nil, fmt.Errorf("failed to create request: %w", err) + return nil, fmt.Errorf("failed to create request: %w", err) } response, err := client.Do(request) if err != nil { - return nil, nil, err + return nil, err } defer response.Body.Close() if response.StatusCode != http.StatusOK { bodyBytes, _ := io.ReadAll(response.Body) - return nil, nil, fmt.Errorf("response status code: %s body: %s", response.Status, string(bodyBytes)) + return nil, fmt.Errorf("response status code: %s body: %s", response.Status, string(bodyBytes)) } registerResponse := &messages.RegisterResponse{} err = json.NewDecoder(response.Body).Decode(registerResponse) if err != nil { - return nil, nil, err + return nil, err } - return ®isterResponse.Agent, ®isterResponse.JitConfig, nil + return registerResponse, nil } // UnregisterAgent sends a POST request to the Cattery server to unregister the agent diff --git a/src/agent/runner/bootstrap.go b/src/agent/runner/bootstrap.go new file mode 100644 index 0000000..9fd6dfd --- /dev/null +++ b/src/agent/runner/bootstrap.go @@ -0,0 +1,216 @@ +// Package runner handles ensuring the GitHub Actions runner distribution is +// present on disk before the agent launches Runner.Listener. +// +// If the runner is already installed (e.g. baked into the VM image), EnsureRunner +// is a no-op. Otherwise it downloads the runner tarball from GitHub releases. +package runner + +import ( + "archive/tar" + "compress/gzip" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "runtime" + "strings" + "time" + + log "github.com/sirupsen/logrus" +) + +// httpClient is package-scoped so tests can swap it out. +var httpClient = &http.Client{Timeout: 5 * time.Minute} + +// latestReleaseURL returns the GitHub API endpoint that yields the latest +// actions/runner release. Overridable for tests. +var latestReleaseURL = "https://api.github.com/repos/actions/runner/releases/latest" + +// runnerDownloadURL builds the tarball URL for a given version + platform. +// Overridable for tests. +var runnerDownloadURL = func(version, osName, arch string) string { + return fmt.Sprintf( + "https://github.com/actions/runner/releases/download/v%s/actions-runner-%s-%s-%s.tar.gz", + version, osName, arch, version, + ) +} + +// EnsureRunner makes sure Runner.Listener exists under runnerFolder/bin. +// If it doesn't, the GH Actions runner tarball is downloaded and extracted. +// When runnerVersion is empty, the latest release tag is fetched from GitHub. +func EnsureRunner(runnerFolder, runnerVersion string) error { + listenerPath := filepath.Join(runnerFolder, "bin", "Runner.Listener") + if _, err := os.Stat(listenerPath); err == nil { + log.Infof("Runner.Listener already present at %s, skipping download", listenerPath) + return nil + } + + version := runnerVersion + if version == "" { + latest, err := fetchLatestRunnerVersion() + if err != nil { + return fmt.Errorf("resolve latest runner version: %w", err) + } + version = latest + log.Infof("Resolved latest GH runner version: %s", version) + } + // strip optional leading 'v' + version = strings.TrimPrefix(version, "v") + + osName, arch, err := runnerPlatform() + if err != nil { + return err + } + + url := runnerDownloadURL(version, osName, arch) + log.Infof("Downloading GH Actions runner %s/%s v%s from %s", osName, arch, version, url) + + if err := os.MkdirAll(runnerFolder, 0o755); err != nil { + return fmt.Errorf("create runner folder: %w", err) + } + + resp, err := httpClient.Get(url) + if err != nil { + return fmt.Errorf("download runner: %w", err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("download runner: HTTP %d from %s", resp.StatusCode, url) + } + + if err := extractTarGz(resp.Body, runnerFolder); err != nil { + return fmt.Errorf("extract runner: %w", err) + } + + if _, err := os.Stat(listenerPath); err != nil { + return fmt.Errorf("Runner.Listener missing after extraction at %s: %w", listenerPath, err) + } + log.Infof("GH Actions runner installed at %s", runnerFolder) + return nil +} + +// fetchLatestRunnerVersion queries the GH API for the latest runner release tag. +func fetchLatestRunnerVersion() (string, error) { + req, err := http.NewRequest(http.MethodGet, latestReleaseURL, nil) + if err != nil { + return "", err + } + req.Header.Set("Accept", "application/vnd.github+json") + resp, err := httpClient.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("latest release: HTTP %d", resp.StatusCode) + } + var payload struct { + TagName string `json:"tag_name"` + } + if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil { + return "", err + } + if payload.TagName == "" { + return "", fmt.Errorf("latest release: empty tag_name") + } + return payload.TagName, nil +} + +// runnerPlatform maps Go runtime info to the actions/runner naming scheme. +func runnerPlatform() (osName, arch string, err error) { + switch runtime.GOOS { + case "linux": + osName = "linux" + case "darwin": + osName = "osx" + case "windows": + osName = "win" + default: + return "", "", fmt.Errorf("unsupported runtime.GOOS %q for GH runner", runtime.GOOS) + } + switch runtime.GOARCH { + case "amd64": + arch = "x64" + case "arm64": + arch = "arm64" + case "arm": + arch = "arm" + default: + return "", "", fmt.Errorf("unsupported runtime.GOARCH %q for GH runner", runtime.GOARCH) + } + return osName, arch, nil +} + +// extractTarGz unpacks a gzipped tar stream into destDir. Entries that would +// escape destDir via path traversal are rejected. +func extractTarGz(r io.Reader, destDir string) error { + gz, err := gzip.NewReader(r) + if err != nil { + return err + } + defer gz.Close() + + absDest, err := filepath.Abs(destDir) + if err != nil { + return err + } + + tr := tar.NewReader(gz) + for { + header, err := tr.Next() + if err == io.EOF { + return nil + } + if err != nil { + return err + } + + target := filepath.Join(absDest, header.Name) + // Path traversal protection: target must stay inside destDir. + rel, err := filepath.Rel(absDest, target) + if err != nil || strings.HasPrefix(rel, "..") { + return fmt.Errorf("tar entry escapes destination: %q", header.Name) + } + + switch header.Typeflag { + case tar.TypeDir: + if err := os.MkdirAll(target, os.FileMode(header.Mode)|0o700); err != nil { + return err + } + case tar.TypeReg: + if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + return err + } + f, err := os.OpenFile(target, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(header.Mode)) + if err != nil { + return err + } + if _, err := io.Copy(f, tr); err != nil { + f.Close() + return err + } + if err := f.Close(); err != nil { + return err + } + case tar.TypeSymlink: + // Resolve the symlink target relative to destDir and reject escapes. + linkTarget := header.Linkname + absLink := linkTarget + if !filepath.IsAbs(absLink) { + absLink = filepath.Join(filepath.Dir(target), linkTarget) + } + rel, err := filepath.Rel(absDest, absLink) + if err != nil || strings.HasPrefix(rel, "..") { + return fmt.Errorf("symlink entry escapes destination: %q -> %q", header.Name, linkTarget) + } + _ = os.Remove(target) // overwrite if exists + if err := os.Symlink(linkTarget, target); err != nil { + return err + } + default: + // Skip other entry types (block/char devices, etc.) + } + } +} diff --git a/src/agent/runner/bootstrap_test.go b/src/agent/runner/bootstrap_test.go new file mode 100644 index 0000000..28aa737 --- /dev/null +++ b/src/agent/runner/bootstrap_test.go @@ -0,0 +1,185 @@ +package runner + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "runtime" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestEnsureRunner_AlreadyPresent_NoDownload(t *testing.T) { + // Pre-create the listener; downloader must not be touched. + folder := t.TempDir() + require.NoError(t, os.MkdirAll(filepath.Join(folder, "bin"), 0o755)) + listener := filepath.Join(folder, "bin", "Runner.Listener") + require.NoError(t, os.WriteFile(listener, []byte("preexisting"), 0o755)) + + withDownloadServer(t, "should-not-be-called", nil, func() { + err := EnsureRunner(folder, "2.300.0") + require.NoError(t, err) + }) + + got, err := os.ReadFile(listener) + require.NoError(t, err) + assert.Equal(t, "preexisting", string(got), "existing listener must not be overwritten") +} + +func TestEnsureRunner_DownloadsPinnedVersion(t *testing.T) { + folder := t.TempDir() + + tarball := makeRunnerTarball(t, "fake-listener-bytes") + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write(tarball) + })) + defer srv.Close() + + origURLFn := runnerDownloadURL + t.Cleanup(func() { runnerDownloadURL = origURLFn }) + runnerDownloadURL = func(version, osName, arch string) string { + return srv.URL + "/" + version + } + + require.NoError(t, EnsureRunner(folder, "2.300.0")) + + listener := filepath.Join(folder, "bin", "Runner.Listener") + got, err := os.ReadFile(listener) + require.NoError(t, err) + assert.Equal(t, "fake-listener-bytes", string(got)) +} + +func TestEnsureRunner_ResolvesLatestWhenVersionEmpty(t *testing.T) { + folder := t.TempDir() + + tarball := makeRunnerTarball(t, "latest-listener") + var requestedVersion string + + tarSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write(tarball) + })) + defer tarSrv.Close() + apiSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.Write([]byte(`{"tag_name":"v2.999.0"}`)) + })) + defer apiSrv.Close() + + origAPI := latestReleaseURL + origURLFn := runnerDownloadURL + t.Cleanup(func() { + latestReleaseURL = origAPI + runnerDownloadURL = origURLFn + }) + latestReleaseURL = apiSrv.URL + runnerDownloadURL = func(version, osName, arch string) string { + requestedVersion = version + return tarSrv.URL + "/" + version + } + + require.NoError(t, EnsureRunner(folder, "")) + assert.Equal(t, "2.999.0", requestedVersion, "must strip leading v and pass to URL builder") +} + +func TestEnsureRunner_DownloadFailureSurfaces(t *testing.T) { + folder := t.TempDir() + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "nope", http.StatusNotFound) + })) + defer srv.Close() + + origURLFn := runnerDownloadURL + t.Cleanup(func() { runnerDownloadURL = origURLFn }) + runnerDownloadURL = func(version, osName, arch string) string { return srv.URL } + + err := EnsureRunner(folder, "2.300.0") + require.Error(t, err) + assert.Contains(t, err.Error(), "404") +} + +func TestExtractTarGz_RejectsPathTraversal(t *testing.T) { + // Build a tarball with a malicious entry that escapes destDir. + var buf bytes.Buffer + gz := gzip.NewWriter(&buf) + tw := tar.NewWriter(gz) + require.NoError(t, tw.WriteHeader(&tar.Header{ + Name: "../escape.txt", + Mode: 0o644, + Size: 5, + Typeflag: tar.TypeReg, + })) + _, _ = tw.Write([]byte("hello")) + require.NoError(t, tw.Close()) + require.NoError(t, gz.Close()) + + dest := t.TempDir() + err := extractTarGz(&buf, dest) + require.Error(t, err) + assert.Contains(t, err.Error(), "escapes destination") +} + +func TestRunnerPlatform(t *testing.T) { + osName, arch, err := runnerPlatform() + require.NoError(t, err) + // Spot-check: on the test host these should resolve to *something*; we + // can't assert exact values portably, but we can assert non-empty. + assert.NotEmpty(t, osName) + assert.NotEmpty(t, arch) + // And on common dev machines (linux/amd64, darwin/arm64) we get sane mappings. + if runtime.GOOS == "linux" { + assert.Equal(t, "linux", osName) + } + if runtime.GOARCH == "amd64" { + assert.Equal(t, "x64", arch) + } +} + +// --- helpers --- + +// makeRunnerTarball creates a tar.gz with a single entry bin/Runner.Listener +// containing the provided body. Mirrors the layout the agent expects. +func makeRunnerTarball(t *testing.T, body string) []byte { + t.Helper() + var buf bytes.Buffer + gz := gzip.NewWriter(&buf) + tw := tar.NewWriter(gz) + + require.NoError(t, tw.WriteHeader(&tar.Header{ + Name: "bin/", + Mode: 0o755, + Typeflag: tar.TypeDir, + })) + require.NoError(t, tw.WriteHeader(&tar.Header{ + Name: "bin/Runner.Listener", + Mode: 0o755, + Size: int64(len(body)), + Typeflag: tar.TypeReg, + })) + _, err := tw.Write([]byte(body)) + require.NoError(t, err) + require.NoError(t, tw.Close()) + require.NoError(t, gz.Close()) + return buf.Bytes() +} + +// withDownloadServer replaces the URL builder for the duration of fn. The +// supplied 'shouldNotCall' string is irrelevant to behaviour; it documents +// intent at the call site (the server is expected not to receive a request). +func withDownloadServer(t *testing.T, _ string, _ http.Handler, fn func()) { + t.Helper() + orig := runnerDownloadURL + t.Cleanup(func() { runnerDownloadURL = orig }) + runnerDownloadURL = func(version, osName, arch string) string { + t.Fatalf("runnerDownloadURL should not have been called for version=%s", version) + return "" + } + fn() +} diff --git a/src/lib/bootstrap/bootstrap.go b/src/lib/bootstrap/bootstrap.go new file mode 100644 index 0000000..a312cd0 --- /dev/null +++ b/src/lib/bootstrap/bootstrap.go @@ -0,0 +1,106 @@ +// Package bootstrap renders the shell script that providers inject into a +// fresh tray to download the cattery agent and start it. +// +// The script content is provider-agnostic. Each provider delivers it via its +// native mechanism: GCE -> startup-script metadata, Docker -> container stdin, +// future cloud providers -> user-data / custom-data. +package bootstrap + +import ( + "bytes" + "cattery/lib/config" + "embed" + "fmt" + "text/template" +) + +//go:embed templates/*.tmpl +var templatesFS embed.FS + +// Defaults applied when the user leaves a BootstrapConfig field empty. +const ( + DefaultOS = "linux" + DefaultAgentFolder = "/opt/cattery" + DefaultRunnerFolder = "/opt/cattery/actions-runner" +) + +// Params are the runtime values substituted into the bootstrap template. +type Params struct { + ServerURL string + AgentID string + AgentFolder string + RunnerFolder string + User string + // Background controls how the agent process is launched. + // + // false (default): the script `exec`s the agent in the foreground. Use this + // when the script will *be* the long-running process -- e.g. a Docker + // container entrypoint where the shell is PID 1. + // + // true: the script installs and starts a systemd unit, then exits cleanly. + // Use this when the script is delivered by something that expects it to + // terminate -- e.g. GCE startup-scripts (which keep + // google-startup-scripts.service running until the script returns) or + // generic cloud-init. + Background bool +} + +// Generate renders the bootstrap script for the given config + params. +// +// If cfg.Script is non-empty it is parsed as a text/template. Otherwise the +// built-in template for cfg.OS is used. Empty string fields fall back to +// package-level defaults; cfg.User is left empty by default (script runs as +// whatever user the provider's delivery mechanism uses). +func Generate(cfg config.BootstrapConfig, p Params) (string, error) { + if p.AgentFolder == "" { + p.AgentFolder = orDefault(cfg.AgentFolder, DefaultAgentFolder) + } + if p.RunnerFolder == "" { + p.RunnerFolder = orDefault(cfg.RunnerFolder, DefaultRunnerFolder) + } + if p.User == "" { + p.User = cfg.User // empty allowed -> template skips sudo + } + + tmplSrc, err := selectTemplate(cfg) + if err != nil { + return "", err + } + + tmpl, err := template.New("bootstrap").Parse(tmplSrc) + if err != nil { + return "", fmt.Errorf("parse bootstrap template: %w", err) + } + + var buf bytes.Buffer + if err := tmpl.Execute(&buf, p); err != nil { + return "", fmt.Errorf("render bootstrap template: %w", err) + } + return buf.String(), nil +} + +// RunnerFolderOrDefault returns the bootstrap runner folder that providers +// should pass to the agent's --runner-folder flag. +func RunnerFolderOrDefault(cfg config.BootstrapConfig) string { + return orDefault(cfg.RunnerFolder, DefaultRunnerFolder) +} + +func selectTemplate(cfg config.BootstrapConfig) (string, error) { + if cfg.Script != "" { + return cfg.Script, nil + } + osName := orDefault(cfg.OS, DefaultOS) + path := fmt.Sprintf("templates/%s.sh.tmpl", osName) + data, err := templatesFS.ReadFile(path) + if err != nil { + return "", fmt.Errorf("no built-in bootstrap template for os %q", osName) + } + return string(data), nil +} + +func orDefault(v, def string) string { + if v == "" { + return def + } + return v +} diff --git a/src/lib/bootstrap/bootstrap_test.go b/src/lib/bootstrap/bootstrap_test.go new file mode 100644 index 0000000..c9b15bf --- /dev/null +++ b/src/lib/bootstrap/bootstrap_test.go @@ -0,0 +1,122 @@ +package bootstrap + +import ( + "cattery/lib/config" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGenerate_DefaultsApplied(t *testing.T) { + script, err := Generate(config.BootstrapConfig{Enabled: true}, Params{ + ServerURL: "https://cattery.example.com", + AgentID: "tray-abc", + }) + require.NoError(t, err) + + assert.Contains(t, script, "https://cattery.example.com") + assert.Contains(t, script, "tray-abc") + assert.Contains(t, script, DefaultAgentFolder) + assert.Contains(t, script, DefaultRunnerFolder) + // Foreground (default) -> exec, no detachment. + assert.Contains(t, script, "exec ") + assert.NotContains(t, script, "setsid") + // No User -> no sudo branch. + assert.NotContains(t, script, "sudo -E -u") +} + +func TestGenerate_BackgroundDetaches(t *testing.T) { + script, err := Generate(config.BootstrapConfig{Enabled: true}, Params{ + ServerURL: "https://srv", + AgentID: "id1", + Background: true, + }) + require.NoError(t, err) + + // Background mode launches detached and exits -- no `exec` of the agent. + assert.Contains(t, script, "setsid") + assert.Contains(t, script, `/dev/null 2>&1; then + chown -R "{{.User}}":"{{.User}}" "${AGENT_FOLDER}" "${RUNNER_FOLDER}" || true +fi +{{- end}} + +echo "[cattery-bootstrap] downloading agent from ${SERVER_URL}/agent/download" +curl -fsSL "${SERVER_URL}/agent/download" -o "${CATTERY_BIN}" +chmod +x "${CATTERY_BIN}" + +{{if .Background -}} +# Background mode: launch the agent as a detached process and exit. Used when +# the script is delivered by something that expects it to terminate -- e.g. +# GCE startup-scripts (which keep google-startup-scripts.service running until +# the script returns) or generic cloud-init. +# +# `setsid` detaches the agent from this script's session/process group, and +# the redirects free the controlling terminal so the parent runner sees EOF +# on the agent's stdio when the script exits. Output goes to a log file so it +# survives the script's exit. +LOG_FILE="${AGENT_FOLDER}/agent.log" +echo "[cattery-bootstrap] launching agent in background; logs at ${LOG_FILE}" +{{if .User -}} +setsid sudo -E -u "{{.User}}" "${CATTERY_BIN}" agent \ + --server-url "${SERVER_URL}" \ + --agent-id "${AGENT_ID}" \ + --runner-folder "${RUNNER_FOLDER}" \ + "${LOG_FILE}" 2>&1 & +{{- else -}} +setsid "${CATTERY_BIN}" agent \ + --server-url "${SERVER_URL}" \ + --agent-id "${AGENT_ID}" \ + --runner-folder "${RUNNER_FOLDER}" \ + "${LOG_FILE}" 2>&1 & +{{- end}} +disown || true +echo "[cattery-bootstrap] agent pid=$! detached; bootstrap exiting" +{{- else -}} +# Foreground mode: replace this shell with the agent process. Use this when the +# script itself is meant to be the long-running process -- e.g. a container +# entrypoint where the shell is PID 1 and exiting would kill the container. +echo "[cattery-bootstrap] launching agent in foreground" +{{if .User -}} +exec sudo -E -u "{{.User}}" "${CATTERY_BIN}" agent \ + --server-url "${SERVER_URL}" \ + --agent-id "${AGENT_ID}" \ + --runner-folder "${RUNNER_FOLDER}" +{{- else -}} +exec "${CATTERY_BIN}" agent \ + --server-url "${SERVER_URL}" \ + --agent-id "${AGENT_ID}" \ + --runner-folder "${RUNNER_FOLDER}" +{{- end}} +{{- end}} diff --git a/src/lib/config/config.go b/src/lib/config/config.go index 56a79ab..1595265 100644 --- a/src/lib/config/config.go +++ b/src/lib/config/config.go @@ -105,9 +105,17 @@ func LoadConfig(configPath *string) (*CatteryConfig, error) { } cfg.trayTypesMap = make(map[string]*TrayType) - for _, trayType := range cfg.TrayTypes { + for i, trayType := range cfg.TrayTypes { cfg.trayTypesMap[trayType.Name] = trayType + // Default Bootstrap.Enabled to true unless the user explicitly set it. + // We check viper.IsSet so that omitting the field (or the whole + // `bootstrap:` block) means "enabled", while `enabled: false` opts out. + enabledKey := fmt.Sprintf("traytypes.%d.bootstrap.enabled", i) + if !viper.IsSet(enabledKey) { + trayType.Bootstrap.Enabled = true + } + providerConfig, ok := cfg.providerMap[trayType.Provider] if !ok { @@ -200,17 +208,32 @@ type GitHubOrganization struct { const DefaultMaxParallelCreation = 10 type TrayType struct { - Name string `yaml:"name" validate:"required"` - Provider string `yaml:"provider" validate:"required"` - RunnerGroupId int64 `yaml:"runnerGroupId" validate:"required"` - Shutdown bool `yaml:"shutdown"` - GitHubOrg string `yaml:"githubOrg" validate:"required"` - MaxTrays int `yaml:"maxTrays"` - MaxParallelCreation int `yaml:"maxParallelCreation"` - Config TrayConfig `yaml:"config"` + Name string `yaml:"name" validate:"required"` + Provider string `yaml:"provider" validate:"required"` + RunnerGroupId int64 `yaml:"runnerGroupId" validate:"required"` + Shutdown bool `yaml:"shutdown"` + GitHubOrg string `yaml:"githubOrg" validate:"required"` + MaxTrays int `yaml:"maxTrays"` + MaxParallelCreation int `yaml:"maxParallelCreation"` + RunnerVersion string `yaml:"runnerVersion"` + Bootstrap BootstrapConfig `yaml:"bootstrap"` + Config TrayConfig `yaml:"config"` ExtraMetadata TrayExtraMetadata } +// BootstrapConfig controls whether the provider injects a script that downloads +// and runs the cattery agent on the spawned tray. All fields optional; defaults +// applied at use sites (Enabled defaulted in LoadConfig, strings defaulted in +// bootstrap.Generate). +type BootstrapConfig struct { + Enabled bool `yaml:"enabled"` + OS string `yaml:"os"` + AgentFolder string `yaml:"agentFolder"` + RunnerFolder string `yaml:"runnerFolder"` + User string `yaml:"user"` + Script string `yaml:"script"` +} + type TrayExtraMetadata map[string]string type ProviderConfig map[string]string diff --git a/src/lib/messages/register.go b/src/lib/messages/register.go index 2d004e4..8893a65 100644 --- a/src/lib/messages/register.go +++ b/src/lib/messages/register.go @@ -5,8 +5,9 @@ import ( ) type RegisterResponse struct { - Agent agents.Agent `json:"agent"` - JitConfig string `json:"jit_config"` + Agent agents.Agent `json:"agent"` + JitConfig string `json:"jit_config"` + RunnerVersion string `json:"runner_version,omitempty"` } type UnregisterRequest struct { diff --git a/src/lib/trays/providers/dockerProvider.go b/src/lib/trays/providers/dockerProvider.go index 6356139..c9b15af 100644 --- a/src/lib/trays/providers/dockerProvider.go +++ b/src/lib/trays/providers/dockerProvider.go @@ -1,6 +1,7 @@ package providers import ( + "cattery/lib/bootstrap" "cattery/lib/config" "cattery/lib/trays" "fmt" @@ -45,6 +46,15 @@ func (d *DockerProvider) RunTray(tray *trays.Tray) error { image := trayConfig.Image serverUrl := config.Get().Server.AdvertiseUrl + var bootstrapCfg config.BootstrapConfig + if tt := tray.TrayType(); tt != nil { + bootstrapCfg = tt.Bootstrap + } + + if bootstrapCfg.Enabled { + return d.runWithBootstrap(tray, containerName, image, serverUrl, bootstrapCfg) + } + dockerCommand := exec.Command("docker", "run", "-d", "--rm", "--add-host=host.docker.internal:host-gateway", "--name", containerName, @@ -62,6 +72,40 @@ func (d *DockerProvider) RunTray(tray *trays.Tray) error { return nil } +// runWithBootstrap launches a container that bootstraps the cattery agent at +// startup instead of relying on a pre-baked binary in the image. The script is +// piped to the container's shell via stdin, which avoids quote-escaping +// headaches with `-c "..."` for multiline scripts. +func (d *DockerProvider) runWithBootstrap(tray *trays.Tray, containerName, image, serverUrl string, cfg config.BootstrapConfig) error { + // Background=false: the script is the container entrypoint; /bin/sh is + // PID 1 and `exec`ing the agent makes it the long-running container + // process. If the script exited, the container would terminate. + script, err := bootstrap.Generate(cfg, bootstrap.Params{ + ServerURL: serverUrl, + AgentID: tray.Id, + Background: false, + }) + if err != nil { + return fmt.Errorf("generate bootstrap script: %w", err) + } + + dockerCommand := exec.Command("docker", "run", "-d", "--rm", "-i", + "--add-host=host.docker.internal:host-gateway", + "--name", containerName, + "--entrypoint", "/bin/sh", + image, + "-s", + ) + dockerCommand.Stdin = strings.NewReader(script) + + d.logger.Info("Running docker bootstrap command: ", dockerCommand.String()) + if err := dockerCommand.Run(); err != nil { + d.logger.Error("Failed to run docker bootstrap command: ", err) + return err + } + return nil +} + func (d *DockerProvider) CleanTray(tray *trays.Tray) error { dockerCommand := exec.Command("docker", "container", "stop", tray.Id) dockerCommandOutput, err := dockerCommand.CombinedOutput() diff --git a/src/lib/trays/providers/gceProvider.go b/src/lib/trays/providers/gceProvider.go index 97601b4..1595e30 100644 --- a/src/lib/trays/providers/gceProvider.go +++ b/src/lib/trays/providers/gceProvider.go @@ -1,6 +1,7 @@ package providers import ( + "cattery/lib/bootstrap" "cattery/lib/config" "cattery/lib/trays" "context" @@ -65,15 +66,37 @@ func (g *GceProvider) RunTray(tray *trays.Tray) error { machineType := trayConfig.MachineType var extraMetadata config.TrayExtraMetadata + var bootstrapCfg config.BootstrapConfig if tt := tray.TrayType(); tt != nil { extraMetadata = tt.ExtraMetadata + bootstrapCfg = tt.Bootstrap + } + + baseMetadata := map[string]string{ + "cattery-url": config.Get().Server.AdvertiseUrl, + "cattery-agent-id": tray.Id, + } + + // When bootstrap is enabled, inject a startup-script that downloads the + // agent binary from the server and runs it. The user's machine image only + // needs base OS + their tooling -- the agent installs itself. + if bootstrapCfg.Enabled { + // Background=true: GCE startup-scripts must terminate so that + // google-startup-scripts.service completes; the agent runs as a + // systemd unit installed by the script. + script, err := bootstrap.Generate(bootstrapCfg, bootstrap.Params{ + ServerURL: config.Get().Server.AdvertiseUrl, + AgentID: tray.Id, + Background: true, + }) + if err != nil { + return fmt.Errorf("generate bootstrap script: %w", err) + } + baseMetadata["startup-script"] = script } metadata := createGcpMetadata( - map[string]string{ - "cattery-url": config.Get().Server.AdvertiseUrl, - "cattery-agent-id": tray.Id, - }, + baseMetadata, extraMetadata, ) diff --git a/src/server/handlers/agentHandler.go b/src/server/handlers/agentHandler.go index 7291b43..e179848 100644 --- a/src/server/handlers/agentHandler.go +++ b/src/server/handlers/agentHandler.go @@ -84,8 +84,9 @@ func (h *Handlers) AgentRegister(responseWriter http.ResponseWriter, r *http.Req } registerResponse := messages.RegisterResponse{ - Agent: newAgent, - JitConfig: jitConfig, + Agent: newAgent, + JitConfig: jitConfig, + RunnerVersion: trayType.RunnerVersion, } responseWriter.Header().Set("Content-Type", "application/json")