1
0
Fork 0
mirror of https://code.forgejo.org/forgejo/runner.git synced 2025-10-15 19:42:06 +00:00
forgejo-runner/act/container/host_environment.go
Mathieu Fenniak 63351343ba
fix: improve logging to diagnose mystery job terminations (#1048)
Additional logging to support #1044.

Manual testing only.  Cases tested:

Cancel a job from Forgejo UI; this seems like the most likely missing piece in #1044 as two jobs were simultaneously marked as "Failed".  There are codepaths in Forgejo that can set this state to both cancelled and failed, but the runner didn't provide log output indicating that's why a job was stopping:
```
time="2025-10-02T13:22:53-06:00" level=info msg="UpdateTask returned task result RESULT_CANCELLED for a task that was in local state RESULT_UNSPECIFIED - beginning local task termination" func="[ReportState]" file="[reporter.go:410]"
```

Host-based executor hits step timeout in exec, or, is cancelled.  This occurred but only logged the `err` from `exec`, not the context error indicating whether it was a timeout or a cancellation:
```
[Test Action/job1] this step has been cancelled: ctx: context deadline exceeded, exec: RUN signal: killed
[Test Action/job1] this step has been cancelled: ctx: context canceled, exec: RUN signal: killed
```

Unable to `ReportState` due to Forgejo inaccessible.  If the runner isn't able to update state to Forgejo a job could be considered a zombie; this would trigger one of the codepaths where the job would be marked as failed.  If connectivity was later restored, then the runner could identify it was marked as failed and cancel the job context.  (This combination doesn't seem likely, but, I think it's reasonable to consider these failures as warnings because there may be unexpected errors here that we're not aware of).
```
time="2025-10-02T13:27:19-06:00" level=warning msg="ReportState error: unavailable: 502 Bad Gateway" func="[RunDaemon]" file="[reporter.go:207]"
```

Runner shutdown logging; just changed up to `Info` level:
```
time="2025-10-02T13:31:36-06:00" level=info msg="forcing the jobs to shutdown" func="[Shutdown]" file="[poller.go:93]"
[Test Action/job1]     Failure - Main sleep 120
[Test Action/job1] this step has been cancelled: ctx: context canceled, exec: RUN signal: killed
```

<!--start release-notes-assistant-->
<!--URL:https://code.forgejo.org/forgejo/runner-->
- bug fixes
  - [PR](https://code.forgejo.org/forgejo/runner/pulls/1048): <!--number 1048 --><!--line 0 --><!--description Zml4OiBpbXByb3ZlIGxvZ2dpbmcgdG8gZGlhZ25vc2UgbXlzdGVyeSBqb2IgdGVybWluYXRpb25z-->fix: improve logging to diagnose mystery job terminations<!--description-->
<!--end release-notes-assistant-->

Reviewed-on: https://code.forgejo.org/forgejo/runner/pulls/1048
Reviewed-by: earl-warren <earl-warren@noreply.code.forgejo.org>
Co-authored-by: Mathieu Fenniak <mathieu@fenniak.net>
Co-committed-by: Mathieu Fenniak <mathieu@fenniak.net>
2025-10-02 22:43:50 +00:00

511 lines
12 KiB
Go

package container
import (
"archive/tar"
"bytes"
"context"
"errors"
"fmt"
"io"
"io/fs"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"sync/atomic"
"time"
"github.com/go-git/go-billy/v5/helper/polyfill"
"github.com/go-git/go-billy/v5/osfs"
"github.com/go-git/go-git/v5/plumbing/format/gitignore"
"golang.org/x/term"
"code.forgejo.org/forgejo/runner/v11/act/common"
"code.forgejo.org/forgejo/runner/v11/act/filecollector"
"code.forgejo.org/forgejo/runner/v11/act/lookpath"
)
type HostEnvironment struct {
Name string
Path string
TmpDir string
ToolCache string
Workdir string
ActPath string
Root string
CleanUp func()
StdOut io.Writer
LXC bool
}
func (e *HostEnvironment) Create(_, _ []string) common.Executor {
return func(ctx context.Context) error {
return nil
}
}
func (e *HostEnvironment) ConnectToNetwork(name string) common.Executor {
return func(ctx context.Context) error {
return nil
}
}
func (e *HostEnvironment) Close() common.Executor {
return func(ctx context.Context) error {
return nil
}
}
func (e *HostEnvironment) Copy(destPath string, files ...*FileEntry) common.Executor {
return func(ctx context.Context) error {
for _, f := range files {
if err := os.MkdirAll(filepath.Dir(filepath.Join(destPath, f.Name)), 0o777); err != nil {
return err
}
if err := os.WriteFile(filepath.Join(destPath, f.Name), []byte(f.Body), fs.FileMode(f.Mode)); err != nil { //nolint:gosec
return err
}
}
return nil
}
}
func (e *HostEnvironment) CopyTarStream(ctx context.Context, destPath string, tarStream io.Reader) error {
if err := os.RemoveAll(destPath); err != nil {
return err
}
tr := tar.NewReader(tarStream)
cp := &filecollector.CopyCollector{
DstDir: destPath,
}
for {
ti, err := tr.Next()
if errors.Is(err, io.EOF) {
return nil
} else if err != nil {
return err
}
if ti.FileInfo().IsDir() {
continue
}
if ctx.Err() != nil {
return fmt.Errorf("CopyTarStream has been cancelled")
}
if err := cp.WriteFile(ti.Name, ti.FileInfo(), ti.Linkname, tr); err != nil {
return err
}
}
}
func (e *HostEnvironment) CopyDir(destPath, srcPath string, useGitIgnore bool) common.Executor {
return func(ctx context.Context) error {
logger := common.Logger(ctx)
srcPrefix := filepath.Dir(srcPath)
if !strings.HasSuffix(srcPrefix, string(filepath.Separator)) {
srcPrefix += string(filepath.Separator)
}
logger.Debugf("Stripping prefix:%s src:%s", srcPrefix, srcPath)
var ignorer gitignore.Matcher
if useGitIgnore {
ps, err := gitignore.ReadPatterns(polyfill.New(osfs.New(srcPath)), nil)
if err != nil {
logger.Debugf("Error loading .gitignore: %v", err)
}
ignorer = gitignore.NewMatcher(ps)
}
fc := &filecollector.FileCollector{
Fs: &filecollector.DefaultFs{},
Ignorer: ignorer,
SrcPath: srcPath,
SrcPrefix: srcPrefix,
Handler: &filecollector.CopyCollector{
DstDir: destPath,
},
}
return filepath.Walk(srcPath, fc.CollectFiles(ctx, []string{}))
}
}
func (e *HostEnvironment) GetContainerArchive(ctx context.Context, srcPath string) (io.ReadCloser, error) {
buf := &bytes.Buffer{}
tw := tar.NewWriter(buf)
defer tw.Close()
srcPath = filepath.Clean(srcPath)
fi, err := os.Lstat(srcPath)
if err != nil {
return nil, err
}
tc := &filecollector.TarCollector{
TarWriter: tw,
}
if fi.IsDir() {
srcPrefix := srcPath
if !strings.HasSuffix(srcPrefix, string(filepath.Separator)) {
srcPrefix += string(filepath.Separator)
}
fc := &filecollector.FileCollector{
Fs: &filecollector.DefaultFs{},
SrcPath: srcPath,
SrcPrefix: srcPrefix,
Handler: tc,
}
err = filepath.Walk(srcPath, fc.CollectFiles(ctx, []string{}))
if err != nil {
return nil, err
}
} else {
var f io.ReadCloser
var linkname string
if fi.Mode()&fs.ModeSymlink != 0 {
linkname, err = os.Readlink(srcPath)
if err != nil {
return nil, err
}
} else {
f, err = os.Open(srcPath)
if err != nil {
return nil, err
}
defer f.Close()
}
err := tc.WriteFile(fi.Name(), fi, linkname, f)
if err != nil {
return nil, err
}
}
return io.NopCloser(buf), nil
}
func (e *HostEnvironment) Pull(_ bool) common.Executor {
return func(ctx context.Context) error {
return nil
}
}
func (e *HostEnvironment) Start(_ bool) common.Executor {
return func(ctx context.Context) error {
return nil
}
}
type ptyWriter struct {
Out io.Writer
AutoStop atomic.Bool
dirtyLine bool
}
func (w *ptyWriter) Write(buf []byte) (int, error) {
if w.AutoStop.Load() && len(buf) > 0 && buf[len(buf)-1] == 4 {
n, err := w.Out.Write(buf[:len(buf)-1])
if err != nil {
return n, err
}
if w.dirtyLine || len(buf) > 1 && buf[len(buf)-2] != '\n' {
_, _ = w.Out.Write([]byte("\n"))
return n, io.EOF
}
return n, io.EOF
}
w.dirtyLine = strings.LastIndex(string(buf), "\n") < len(buf)-1
return w.Out.Write(buf)
}
type localEnv struct {
env map[string]string
}
func (l *localEnv) Getenv(name string) string {
if runtime.GOOS == "windows" {
for k, v := range l.env {
if strings.EqualFold(name, k) {
return v
}
}
return ""
}
return l.env[name]
}
func lookupPathHost(cmd string, env map[string]string, writer io.Writer) (string, error) {
f, err := lookpath.LookPath2(cmd, &localEnv{env: env})
if err != nil {
err := "Cannot find: " + fmt.Sprint(cmd) + " in PATH"
if _, _err := writer.Write([]byte(err + "\n")); _err != nil {
return "", fmt.Errorf("%v: %w", err, _err)
}
return "", errors.New(err)
}
return f, nil
}
func setupPty(cmd *exec.Cmd, cmdline string) (*os.File, *os.File, error) {
ppty, tty, err := openPty()
if err != nil {
return nil, nil, err
}
if term.IsTerminal(int(tty.Fd())) {
_, err := term.MakeRaw(int(tty.Fd()))
if err != nil {
ppty.Close()
tty.Close()
return nil, nil, err
}
}
cmd.Stdin = tty
cmd.Stdout = tty
cmd.Stderr = tty
cmd.SysProcAttr = getSysProcAttr(cmdline, true)
return ppty, tty, nil
}
func writeKeepAlive(ppty io.Writer) {
c := 1
var err error
for c == 1 && err == nil {
c, err = ppty.Write([]byte{4})
<-time.After(time.Second)
}
}
func copyPtyOutput(writer io.Writer, ppty io.Reader, finishLog context.CancelFunc) {
defer func() {
finishLog()
}()
if _, err := io.Copy(writer, ppty); err != nil {
return
}
}
func (e *HostEnvironment) UpdateFromImageEnv(_ *map[string]string) common.Executor {
return func(ctx context.Context) error {
return nil
}
}
func getEnvListFromMap(env map[string]string) []string {
envList := make([]string, 0)
for k, v := range env {
envList = append(envList, fmt.Sprintf("%s=%s", k, v))
}
return envList
}
func (e *HostEnvironment) exec(ctx context.Context, commandparam []string, cmdline string, env map[string]string, user, workdir string) error {
envList := getEnvListFromMap(env)
var wd string
if workdir != "" {
if filepath.IsAbs(workdir) {
wd = workdir
} else {
wd = filepath.Join(e.Path, workdir)
}
} else {
wd = e.Path
}
if _, err := os.Stat(wd); err != nil {
common.Logger(ctx).Debugf("Failed to stat working directory %s %v\n", wd, err.Error())
}
command := make([]string, len(commandparam))
copy(command, commandparam)
if e.GetLXC() {
if user == "root" {
command = append([]string{"/usr/bin/sudo"}, command...)
} else {
common.Logger(ctx).Debugf("lxc-attach --name %v %v", e.Name, command)
command = append([]string{"/usr/bin/sudo", "--preserve-env", "--preserve-env=PATH", "/usr/bin/lxc-attach", "--keep-env", "--name", e.Name, "--"}, command...)
}
}
f, err := lookupPathHost(command[0], env, e.StdOut)
if err != nil {
return err
}
cmd := exec.CommandContext(ctx, f)
cmd.Path = f
cmd.Args = command
cmd.Stdin = nil
cmd.Stdout = e.StdOut
cmd.Env = envList
cmd.Stderr = e.StdOut
cmd.Dir = wd
cmd.SysProcAttr = getSysProcAttr(cmdline, false)
var ppty *os.File
var tty *os.File
defer func() {
if ppty != nil {
ppty.Close()
}
if tty != nil {
tty.Close()
}
}()
if true /* allocate Terminal */ {
var err error
ppty, tty, err = setupPty(cmd, cmdline)
if err != nil {
common.Logger(ctx).Debugf("Failed to setup Pty %v\n", err.Error())
}
}
writer := &ptyWriter{Out: e.StdOut}
logctx, finishLog := context.WithCancel(context.Background())
if ppty != nil {
go copyPtyOutput(writer, ppty, finishLog)
} else {
finishLog()
}
if ppty != nil {
go writeKeepAlive(ppty)
}
err = cmd.Run()
if err != nil {
return fmt.Errorf("RUN %w", err)
}
if tty != nil {
writer.AutoStop.Store(true)
if _, err := tty.Write([]byte("\x04")); err != nil {
common.Logger(ctx).Debug("Failed to write EOT")
}
}
<-logctx.Done()
if ppty != nil {
ppty.Close()
ppty = nil
}
return err
}
func (e *HostEnvironment) Exec(command []string /*cmdline string, */, env map[string]string, user, workdir string) common.Executor {
return e.ExecWithCmdLine(command, "", env, user, workdir)
}
func (e *HostEnvironment) ExecWithCmdLine(command []string, cmdline string, env map[string]string, user, workdir string) common.Executor {
return func(ctx context.Context) error {
if err := e.exec(ctx, command, cmdline, env, user, workdir); err != nil {
select {
case <-ctx.Done():
return fmt.Errorf("this step has been cancelled: ctx: %w, exec: %w", ctx.Err(), err)
default:
return err
}
}
return nil
}
}
func (e *HostEnvironment) UpdateFromEnv(srcPath string, env *map[string]string) common.Executor {
return parseEnvFile(e, srcPath, env)
}
func (e *HostEnvironment) Remove() common.Executor {
return func(ctx context.Context) error {
if e.CleanUp != nil {
e.CleanUp()
}
common.Logger(ctx).Debugf("HostEnvironment.Remove %s", e.Path)
return os.RemoveAll(e.Path)
}
}
func (e *HostEnvironment) ToContainerPath(path string) string {
if bp, err := filepath.Rel(e.Workdir, path); err != nil {
return filepath.Join(e.Path, bp)
} else if filepath.Clean(e.Workdir) == filepath.Clean(path) {
return e.Path
}
return path
}
func (e *HostEnvironment) GetLXC() bool {
return e.LXC
}
func (e *HostEnvironment) GetName() string {
return e.Name
}
func (e *HostEnvironment) GetRoot() string {
return e.Root
}
func (e *HostEnvironment) GetActPath() string {
actPath := e.ActPath
if runtime.GOOS == "windows" {
actPath = strings.ReplaceAll(actPath, "\\", "/")
}
return actPath
}
func (*HostEnvironment) GetPathVariableName() string {
switch runtime.GOOS {
case "plan9":
return "path"
case "windows":
return "Path" // Actually we need a case insensitive map
}
return "PATH"
}
func (e *HostEnvironment) DefaultPathVariable() string {
v, _ := os.LookupEnv(e.GetPathVariableName())
return v
}
func (*HostEnvironment) JoinPathVariable(paths ...string) string {
return strings.Join(paths, string(filepath.ListSeparator))
}
// Reference for Arch values for runner.arch
// https://docs.github.com/en/actions/learn-github-actions/contexts#runner-context
func goArchToActionArch(arch string) string {
archMapper := map[string]string{
"amd64": "X64",
"x86_64": "X64",
"386": "X86",
"aarch64": "ARM64",
}
if arch, ok := archMapper[arch]; ok {
return arch
}
return arch
}
func goOsToActionOs(os string) string {
osMapper := map[string]string{
"linux": "Linux",
"windows": "Windows",
"darwin": "macOS",
}
if os, ok := osMapper[os]; ok {
return os
}
return os
}
func (e *HostEnvironment) GetRunnerContext(_ context.Context) map[string]any {
return map[string]any{
"os": goOsToActionOs(runtime.GOOS),
"arch": goArchToActionArch(runtime.GOARCH),
"temp": e.TmpDir,
"tool_cache": e.ToolCache,
}
}
func (e *HostEnvironment) IsHealthy(ctx context.Context) (time.Duration, error) {
return 0, nil
}
func (e *HostEnvironment) ReplaceLogWriter(stdout, _ io.Writer) (io.Writer, io.Writer) {
org := e.StdOut
e.StdOut = stdout
return org, org
}
func (*HostEnvironment) IsEnvironmentCaseInsensitive() bool {
return runtime.GOOS == "windows"
}