From 165b44deec82dbf9ff915100a23f8f857e3ff49f Mon Sep 17 00:00:00 2001 From: Mathieu Fenniak Date: Sat, 16 Aug 2025 20:44:40 +0000 Subject: [PATCH] fix: data race in 'runs-on' expressions causes incorrect job labels during execution (#871) A job with a `runs-on` that references matrix variables will not run with the expected labels. eg. ``` jobs: matrix-runs-on: strategy: matrix: os: [ubuntu-latest, ubuntu-20.04] runs-on: ${{ matrix.os }} steps: ... ``` Due to shared mutated state, both jobs that this generates will (w/ a race condition) either run with the `ubuntu-latest` or `ubuntu-20.04`, but rarely (never observed) with the expected outcome of running on both labels. `EvaluateYamlNode` is used to evaluate expressions in the `runs-on` field in the context of the current running job, but mutating an object shared between multiple concurrent jobs (in matrix evaluation). This results in the evaluation results from one job spilling into another and corrupting their `runs-on` labels. ``` ================== WARNING: DATA RACE Write at 0x00c00047e0b0 by goroutine 1739: reflect.typedmemmove() /.../go/pkg/mod/golang.org/toolchain@v0.0.1-go1.24.6.linux-amd64/src/runtime/mbarrier.go:213 +0x0 reflect.Value.Set() /.../go/pkg/mod/golang.org/toolchain@v0.0.1-go1.24.6.linux-amd64/src/reflect/value.go:2062 +0x184 gopkg.in/yaml%2ev3.(*decoder).unmarshal() /.../go/pkg/mod/gopkg.in/yaml.v3@v3.0.1/decode.go:493 +0x7b4 gopkg.in/yaml%2ev3.(*Node).Decode() /.../go/pkg/mod/gopkg.in/yaml.v3@v3.0.1/yaml.go:149 +0x355 code.forgejo.org/forgejo/runner/v9/act/runner.expressionEvaluator.EvaluateYamlNode() /.../forgejo-runner/act/runner/expression.go:372 +0x7a code.forgejo.org/forgejo/runner/v9/act/runner.(*expressionEvaluator).EvaluateYamlNode() :1 +0x6b code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).runsOnPlatformNames() /.../forgejo-runner/act/runner/run_context.go:1019 +0x2af code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).runsOnImage() /.../forgejo-runner/act/runner/run_context.go:1002 +0x772 code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).platformImage() /.../forgejo-runner/act/runner/run_context.go:1032 +0x77 code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).isEnabled() /.../forgejo-runner/act/runner/run_context.go:1069 +0x3c7 code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).Executor.func1() /.../forgejo-runner/act/runner/run_context.go:964 +0x4b code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.1() /.../forgejo-runner/act/runner/runner.go:223 +0x271 code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.NewParallelExecutor.2.1() /.../forgejo-runner/act/common/executor.go:107 +0x61 code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.NewParallelExecutor.2.gowrap1() /.../forgejo-runner/act/common/executor.go:109 +0x4f Previous read at 0x00c00047e0b0 by goroutine 1742: code.forgejo.org/forgejo/runner/v9/act/model.(*Job).RunsOn() /.../forgejo-runner/act/model/workflow.go:361 +0x3c4 code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).runsOnImage() /.../forgejo-runner/act/runner/run_context.go:991 +0x57a code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).platformImage() /.../forgejo-runner/act/runner/run_context.go:1032 +0x77 code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).isEnabled() /.../forgejo-runner/act/runner/run_context.go:1069 +0x3c7 code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).Executor.func1() /.../forgejo-runner/act/runner/run_context.go:964 +0x4b code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.1() /.../forgejo-runner/act/runner/runner.go:223 +0x271 code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.NewParallelExecutor.2.1() /.../forgejo-runner/act/common/executor.go:107 +0x61 code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.NewParallelExecutor.2.gowrap1() /.../forgejo-runner/act/common/executor.go:109 +0x4f ... ================== ``` - bug fixes - [PR](https://code.forgejo.org/forgejo/runner/pulls/871): fix: data race in 'runs-on' expressions causes incorrect job labels during execution Reviewed-on: https://code.forgejo.org/forgejo/runner/pulls/871 Reviewed-by: earl-warren Co-authored-by: Mathieu Fenniak Co-committed-by: Mathieu Fenniak --- act/jobparser/model.go | 4 -- act/model/workflow.go | 13 ++++-- act/runner/run_context.go | 9 +++- act/runner/runner_test.go | 48 ++++++++++++++++++++- act/runner/testdata/matrix-runs-on/push.yml | 14 ++++++ 5 files changed, 78 insertions(+), 10 deletions(-) create mode 100644 act/runner/testdata/matrix-runs-on/push.yml diff --git a/act/jobparser/model.go b/act/jobparser/model.go index dd446596..9de9eab7 100644 --- a/act/jobparser/model.go +++ b/act/jobparser/model.go @@ -119,10 +119,6 @@ func (j *Job) EraseNeeds() *Job { return j } -func (j *Job) RunsOn() []string { - return (&model.Job{RawRunsOn: j.RawRunsOn}).RunsOn() -} - type Step struct { ID string `yaml:"id,omitempty"` If yaml.Node `yaml:"if,omitempty"` diff --git a/act/model/workflow.go b/act/model/workflow.go index 0cc337a4..c9acd375 100644 --- a/act/model/workflow.go +++ b/act/model/workflow.go @@ -339,14 +339,21 @@ func (j *Job) Needs() []string { // RunsOn list for Job func (j *Job) RunsOn() []string { - switch j.RawRunsOn.Kind { + return FlattenRunsOnNode(j.RawRunsOn) +} + +// Given an already expression-evaluated `runs-on` yaml node from a job, compute all the labels that will be run for a +// job. Can be a single string label, an array of labels, or an object { group: "...", labels: [...] }; +// FlattenRunsOnNode will flatten all the options down to a []string. +func FlattenRunsOnNode(runsOn yaml.Node) []string { + switch runsOn.Kind { case yaml.MappingNode: var val struct { Group string Labels yaml.Node } - if !decodeNode(j.RawRunsOn, &val) { + if !decodeNode(runsOn, &val) { return nil } @@ -358,7 +365,7 @@ func (j *Job) RunsOn() []string { return labels default: - return nodeAsStringSlice(j.RawRunsOn) + return nodeAsStringSlice(runsOn) } } diff --git a/act/runner/run_context.go b/act/runner/run_context.go index 9a8bd3e5..86500854 100644 --- a/act/runner/run_context.go +++ b/act/runner/run_context.go @@ -993,12 +993,17 @@ func (rc *RunContext) runsOnPlatformNames(ctx context.Context) []string { return []string{} } - if err := rc.ExprEval.EvaluateYamlNode(ctx, &job.RawRunsOn); err != nil { + // Copy rawRunsOn from the job. `EvaluateYamlNode` later will mutate the yaml node in-place applying expression + // evaluation to it from the RunContext -- but the job object is shared in matrix executions between multiple + // running matrix jobs and `rc.EvalExpr` is specific to one matrix job. By copying the object we avoid mutating the + // shared field as it is accessed by multiple goroutines. + rawRunsOn := job.RawRunsOn + if err := rc.ExprEval.EvaluateYamlNode(ctx, &rawRunsOn); err != nil { common.Logger(ctx).Errorf("Error while evaluating runs-on: %v", err) return []string{} } - return job.RunsOn() + return model.FlattenRunsOnNode(rawRunsOn) } func (rc *RunContext) platformImage(ctx context.Context) string { diff --git a/act/runner/runner_test.go b/act/runner/runner_test.go index 10634673..cee533e3 100644 --- a/act/runner/runner_test.go +++ b/act/runner/runner_test.go @@ -9,6 +9,7 @@ import ( "path" "path/filepath" "strings" + "sync" "testing" "github.com/joho/godotenv" @@ -476,7 +477,24 @@ func TestRunner_RunSkipped(t *testing.T) { } type maskJobLoggerFactory struct { - Output bytes.Buffer + Output syncBuffer +} + +type syncBuffer struct { + buffer bytes.Buffer + mutex sync.Mutex +} + +func (sb *syncBuffer) Write(p []byte) (n int, err error) { + sb.mutex.Lock() + defer sb.mutex.Unlock() + return sb.buffer.Write(p) +} + +func (sb *syncBuffer) String() string { + sb.mutex.Lock() + defer sb.mutex.Unlock() + return sb.buffer.String() } func (f *maskJobLoggerFactory) WithJobLogger() *log.Logger { @@ -641,3 +659,31 @@ func TestRunner_RunMatrixWithUserDefinedInclusions(t *testing.T) { tjfi.runTest(t.Context(), t, &Config{Matrix: matrix}) } + +// Regression test against `runs-on` in a matrix run, which references the matrix values, being corrupted by multiple +// concurrent goroutines. +func TestRunner_RunsOnMatrix(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + log.SetLevel(log.DebugLevel) + + tjfi := TestJobFileInfo{ + workdir: workdir, + workflowPath: "matrix-runs-on", + eventName: "push", + errorMessage: "", + platforms: platforms, + } + + logger := &maskJobLoggerFactory{} + tjfi.runTest(WithJobLoggerFactory(common.WithLogger(t.Context(), logger.WithJobLogger()), logger), t, &Config{}) + output := logger.Output.String() + + // job 1 should succeed because `ubuntu-latest` is a valid runs-on target... + assert.Contains(t, output, "msg=\"🏁 Job succeeded\" dryrun=false job=test/matrix-runs-on-1", "expected job 1 to succeed, but did not find success message") + + // job 2 should be skipped because `ubuntu-20.04` is not a valid runs-on target in `platforms`. + assert.Contains(t, output, "msg=\"🚧 Skipping unsupported platform -- Try running with `-P ubuntu-20.04=...`\" dryrun=false job=test/matrix-runs-on-2", "expected job 2 to be skipped, but it was not") +} diff --git a/act/runner/testdata/matrix-runs-on/push.yml b/act/runner/testdata/matrix-runs-on/push.yml new file mode 100644 index 00000000..8863b17f --- /dev/null +++ b/act/runner/testdata/matrix-runs-on/push.yml @@ -0,0 +1,14 @@ +name: test + +on: push + +jobs: + matrix-runs-on: + strategy: + matrix: + os: [ubuntu-latest, ubuntu-20.04] + runs-on: ${{ matrix.os }} + steps: + - run: | + echo TESTOUTPUT: matrix.os == ${{ matrix.os }} + echo TESTOUTPUT: runs-on == ${{ jobs.matrix-runs-on.runs-on }}