diff --git a/act/container/host_environment.go b/act/container/host_environment.go index 405862a9..c8742008 100644 --- a/act/container/host_environment.go +++ b/act/container/host_environment.go @@ -389,7 +389,7 @@ func (e *HostEnvironment) ExecWithCmdLine(command []string, cmdline string, env if err := e.exec(ctx, command, cmdline, env, user, workdir); err != nil { select { case <-ctx.Done(): - return fmt.Errorf("this step has been cancelled: %w", err) + return fmt.Errorf("this step has been cancelled: ctx: %w, exec: %w", ctx.Err(), err) default: return err } diff --git a/act/runner/run_context.go b/act/runner/run_context.go index f344a046..bb79ad3a 100644 --- a/act/runner/run_context.go +++ b/act/runner/run_context.go @@ -949,7 +949,7 @@ func (rc *RunContext) Executor() (common.Executor, error) { return err } if res { - timeoutctx, cancelTimeOut := evaluateTimeout(ctx, rc.ExprEval, rc.Run.Job().TimeoutMinutes) + timeoutctx, cancelTimeOut := evaluateTimeout(ctx, "job", rc.ExprEval, rc.Run.Job().TimeoutMinutes) defer cancelTimeOut() return executor(timeoutctx) diff --git a/act/runner/step.go b/act/runner/step.go index 322b6f7e..c93291c4 100644 --- a/act/runner/step.go +++ b/act/runner/step.go @@ -177,7 +177,7 @@ func runStepExecutor(step step, stage stepStage, executor common.Executor) commo Mode: 0o666, })(ctx) - timeoutctx, cancelTimeOut := evaluateTimeout(ctx, rc.ExprEval, stepModel.TimeoutMinutes) + timeoutctx, cancelTimeOut := evaluateTimeout(ctx, "step", rc.ExprEval, stepModel.TimeoutMinutes) defer cancelTimeOut() err = executor(timeoutctx) @@ -213,12 +213,12 @@ func runStepExecutor(step step, stage stepStage, executor common.Executor) commo } } -func evaluateTimeout(ctx context.Context, exprEval ExpressionEvaluator, timeoutMinutes string) (context.Context, context.CancelFunc) { +func evaluateTimeout(ctx context.Context, contextType string, exprEval ExpressionEvaluator, timeoutMinutes string) (context.Context, context.CancelFunc) { timeout := exprEval.Interpolate(ctx, timeoutMinutes) if timeout != "" { timeOutMinutes, err := strconv.ParseInt(timeout, 10, 64) if err == nil { - common.Logger(ctx).Debugf("the step will stop in timeout-minutes %s", timeout) + common.Logger(ctx).Debugf("the %s will stop in timeout-minutes %s", contextType, timeout) return context.WithTimeout(ctx, time.Duration(timeOutMinutes)*time.Minute) } common.Logger(ctx).Errorf("timeout-minutes %s cannot be parsed and will be ignored: %w", timeout, err) diff --git a/internal/app/poll/poller.go b/internal/app/poll/poller.go index 7b954cb7..c880bc67 100644 --- a/internal/app/poll/poller.go +++ b/internal/app/poll/poller.go @@ -90,10 +90,10 @@ func (p *poller) Shutdown(ctx context.Context) error { return nil case <-ctx.Done(): - log.Trace("forcing the jobs to shutdown") + log.Info("forcing the jobs to shutdown") p.shutdownJobs() <-p.done - log.Trace("all jobs have been shutdown") + log.Info("all jobs have been shutdown") return ctx.Err() } } diff --git a/internal/pkg/report/reporter.go b/internal/pkg/report/reporter.go index 7e46b8bb..804744cc 100644 --- a/internal/pkg/report/reporter.go +++ b/internal/pkg/report/reporter.go @@ -48,6 +48,7 @@ type Reporter struct { debugOutputEnabled bool stopCommandEndToken string + issuedLocalCancel bool } func NewReporter(ctx context.Context, cancel context.CancelFunc, c client.Client, task *runnerv1.Task, reportInterval time.Duration) *Reporter { @@ -192,11 +193,19 @@ func (r *Reporter) RunDaemon() { return } if r.ctx.Err() != nil { + // This shouldn't happen because DaemonContext is used for `r.ctx` which should outlive any running job. + log.Warnf("Terminating RunDaemon on an active job due to error: %v", r.ctx.Err()) return } - _ = r.ReportLog(false) - _ = r.ReportState() + err := r.ReportLog(false) + if err != nil { + log.Warnf("ReportLog error: %v", err) + } + err = r.ReportState() + if err != nil { + log.Warnf("ReportState error: %v", err) + } time.AfterFunc(r.reportInterval, r.RunDaemon) } @@ -391,8 +400,17 @@ func (r *Reporter) ReportState() error { r.outputs.Store(k, struct{}{}) } - switch resp.Msg.GetState().GetResult() { + localResultState := state.GetResult() + remoteResultState := resp.Msg.GetState().GetResult() + switch remoteResultState { case runnerv1.Result_RESULT_CANCELLED, runnerv1.Result_RESULT_FAILURE: + // issuedLocalCancel is just used to deduplicate this log message if our local state doesn't catch up with our + // remote state as quickly as the report-interval, which would cause this message to repeat in the logs. + if !r.issuedLocalCancel && remoteResultState != localResultState { + log.Infof("UpdateTask returned task result %v for a task that was in local state %v - beginning local task termination", + remoteResultState, localResultState) + r.issuedLocalCancel = true + } r.cancel() }