1
0
Fork 0
mirror of https://code.forgejo.org/forgejo/runner.git synced 2025-09-05 18:40:59 +00:00

fix: data race in 'runs-on' expressions causes incorrect job labels during execution (#871)

A job with a `runs-on` that references matrix variables will not run with the expected labels.  eg.

```
jobs:
  matrix-runs-on:
    strategy:
      matrix:
        os: [ubuntu-latest, ubuntu-20.04]
    runs-on: ${{ matrix.os }}
    steps:
      ...
```
Due to shared mutated state, both jobs that this generates will (w/ a race condition) either run with the `ubuntu-latest` or `ubuntu-20.04`, but rarely (never observed) with the expected outcome of running on both labels.

`EvaluateYamlNode` is used to evaluate expressions in the `runs-on` field in the context of the current running job, but mutating an object shared between multiple concurrent jobs (in matrix evaluation).  This results in the evaluation results from one job spilling into another and corrupting their `runs-on` labels.

```
==================
WARNING: DATA RACE
Write at 0x00c00047e0b0 by goroutine 1739:
  reflect.typedmemmove()
      /.../go/pkg/mod/golang.org/toolchain@v0.0.1-go1.24.6.linux-amd64/src/runtime/mbarrier.go:213 +0x0
  reflect.Value.Set()
      /.../go/pkg/mod/golang.org/toolchain@v0.0.1-go1.24.6.linux-amd64/src/reflect/value.go:2062 +0x184
  gopkg.in/yaml%2ev3.(*decoder).unmarshal()
      /.../go/pkg/mod/gopkg.in/yaml.v3@v3.0.1/decode.go:493 +0x7b4
  gopkg.in/yaml%2ev3.(*Node).Decode()
      /.../go/pkg/mod/gopkg.in/yaml.v3@v3.0.1/yaml.go:149 +0x355
  code.forgejo.org/forgejo/runner/v9/act/runner.expressionEvaluator.EvaluateYamlNode()
      /.../forgejo-runner/act/runner/expression.go:372 +0x7a
  code.forgejo.org/forgejo/runner/v9/act/runner.(*expressionEvaluator).EvaluateYamlNode()
      <autogenerated>:1 +0x6b
  code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).runsOnPlatformNames()
      /.../forgejo-runner/act/runner/run_context.go:1019 +0x2af
  code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).runsOnImage()
      /.../forgejo-runner/act/runner/run_context.go:1002 +0x772
  code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).platformImage()
      /.../forgejo-runner/act/runner/run_context.go:1032 +0x77
  code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).isEnabled()
      /.../forgejo-runner/act/runner/run_context.go:1069 +0x3c7
  code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).Executor.func1()
      /.../forgejo-runner/act/runner/run_context.go:964 +0x4b
  code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.1()
      /.../forgejo-runner/act/runner/runner.go:223 +0x271
  code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.NewParallelExecutor.2.1()
      /.../forgejo-runner/act/common/executor.go:107 +0x61
  code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.NewParallelExecutor.2.gowrap1()
      /.../forgejo-runner/act/common/executor.go:109 +0x4f

Previous read at 0x00c00047e0b0 by goroutine 1742:
  code.forgejo.org/forgejo/runner/v9/act/model.(*Job).RunsOn()
      /.../forgejo-runner/act/model/workflow.go:361 +0x3c4
  code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).runsOnImage()
      /.../forgejo-runner/act/runner/run_context.go:991 +0x57a
  code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).platformImage()
      /.../forgejo-runner/act/runner/run_context.go:1032 +0x77
  code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).isEnabled()
      /.../forgejo-runner/act/runner/run_context.go:1069 +0x3c7
  code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).Executor.func1()
      /.../forgejo-runner/act/runner/run_context.go:964 +0x4b
  code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.1()
      /.../forgejo-runner/act/runner/runner.go:223 +0x271
  code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.NewParallelExecutor.2.1()
      /.../forgejo-runner/act/common/executor.go:107 +0x61
  code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.NewParallelExecutor.2.gowrap1()
      /.../forgejo-runner/act/common/executor.go:109 +0x4f
...
==================
```

<!--start release-notes-assistant-->
<!--URL:https://code.forgejo.org/forgejo/runner-->
- bug fixes
  - [PR](https://code.forgejo.org/forgejo/runner/pulls/871): <!--number 871 --><!--line 0 --><!--description Zml4OiBkYXRhIHJhY2UgaW4gJ3J1bnMtb24nIGV4cHJlc3Npb25zIGNhdXNlcyBpbmNvcnJlY3Qgam9iIGxhYmVscyBkdXJpbmcgZXhlY3V0aW9u-->fix: data race in 'runs-on' expressions causes incorrect job labels during execution<!--description-->
<!--end release-notes-assistant-->

Reviewed-on: https://code.forgejo.org/forgejo/runner/pulls/871
Reviewed-by: earl-warren <earl-warren@noreply.code.forgejo.org>
Co-authored-by: Mathieu Fenniak <mathieu@fenniak.net>
Co-committed-by: Mathieu Fenniak <mathieu@fenniak.net>
This commit is contained in:
Mathieu Fenniak 2025-08-16 20:44:40 +00:00 committed by earl-warren
parent 776f0641fd
commit 165b44deec
No known key found for this signature in database
GPG key ID: F128CBE6AB3A7201
5 changed files with 78 additions and 10 deletions

View file

@ -119,10 +119,6 @@ func (j *Job) EraseNeeds() *Job {
return j
}
func (j *Job) RunsOn() []string {
return (&model.Job{RawRunsOn: j.RawRunsOn}).RunsOn()
}
type Step struct {
ID string `yaml:"id,omitempty"`
If yaml.Node `yaml:"if,omitempty"`

View file

@ -339,14 +339,21 @@ func (j *Job) Needs() []string {
// RunsOn list for Job
func (j *Job) RunsOn() []string {
switch j.RawRunsOn.Kind {
return FlattenRunsOnNode(j.RawRunsOn)
}
// Given an already expression-evaluated `runs-on` yaml node from a job, compute all the labels that will be run for a
// job. Can be a single string label, an array of labels, or an object { group: "...", labels: [...] };
// FlattenRunsOnNode will flatten all the options down to a []string.
func FlattenRunsOnNode(runsOn yaml.Node) []string {
switch runsOn.Kind {
case yaml.MappingNode:
var val struct {
Group string
Labels yaml.Node
}
if !decodeNode(j.RawRunsOn, &val) {
if !decodeNode(runsOn, &val) {
return nil
}
@ -358,7 +365,7 @@ func (j *Job) RunsOn() []string {
return labels
default:
return nodeAsStringSlice(j.RawRunsOn)
return nodeAsStringSlice(runsOn)
}
}

View file

@ -993,12 +993,17 @@ func (rc *RunContext) runsOnPlatformNames(ctx context.Context) []string {
return []string{}
}
if err := rc.ExprEval.EvaluateYamlNode(ctx, &job.RawRunsOn); err != nil {
// Copy rawRunsOn from the job. `EvaluateYamlNode` later will mutate the yaml node in-place applying expression
// evaluation to it from the RunContext -- but the job object is shared in matrix executions between multiple
// running matrix jobs and `rc.EvalExpr` is specific to one matrix job. By copying the object we avoid mutating the
// shared field as it is accessed by multiple goroutines.
rawRunsOn := job.RawRunsOn
if err := rc.ExprEval.EvaluateYamlNode(ctx, &rawRunsOn); err != nil {
common.Logger(ctx).Errorf("Error while evaluating runs-on: %v", err)
return []string{}
}
return job.RunsOn()
return model.FlattenRunsOnNode(rawRunsOn)
}
func (rc *RunContext) platformImage(ctx context.Context) string {

View file

@ -9,6 +9,7 @@ import (
"path"
"path/filepath"
"strings"
"sync"
"testing"
"github.com/joho/godotenv"
@ -476,7 +477,24 @@ func TestRunner_RunSkipped(t *testing.T) {
}
type maskJobLoggerFactory struct {
Output bytes.Buffer
Output syncBuffer
}
type syncBuffer struct {
buffer bytes.Buffer
mutex sync.Mutex
}
func (sb *syncBuffer) Write(p []byte) (n int, err error) {
sb.mutex.Lock()
defer sb.mutex.Unlock()
return sb.buffer.Write(p)
}
func (sb *syncBuffer) String() string {
sb.mutex.Lock()
defer sb.mutex.Unlock()
return sb.buffer.String()
}
func (f *maskJobLoggerFactory) WithJobLogger() *log.Logger {
@ -641,3 +659,31 @@ func TestRunner_RunMatrixWithUserDefinedInclusions(t *testing.T) {
tjfi.runTest(t.Context(), t, &Config{Matrix: matrix})
}
// Regression test against `runs-on` in a matrix run, which references the matrix values, being corrupted by multiple
// concurrent goroutines.
func TestRunner_RunsOnMatrix(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test")
}
log.SetLevel(log.DebugLevel)
tjfi := TestJobFileInfo{
workdir: workdir,
workflowPath: "matrix-runs-on",
eventName: "push",
errorMessage: "",
platforms: platforms,
}
logger := &maskJobLoggerFactory{}
tjfi.runTest(WithJobLoggerFactory(common.WithLogger(t.Context(), logger.WithJobLogger()), logger), t, &Config{})
output := logger.Output.String()
// job 1 should succeed because `ubuntu-latest` is a valid runs-on target...
assert.Contains(t, output, "msg=\"🏁 Job succeeded\" dryrun=false job=test/matrix-runs-on-1", "expected job 1 to succeed, but did not find success message")
// job 2 should be skipped because `ubuntu-20.04` is not a valid runs-on target in `platforms`.
assert.Contains(t, output, "msg=\"🚧 Skipping unsupported platform -- Try running with `-P ubuntu-20.04=...`\" dryrun=false job=test/matrix-runs-on-2", "expected job 2 to be skipped, but it was not")
}

View file

@ -0,0 +1,14 @@
name: test
on: push
jobs:
matrix-runs-on:
strategy:
matrix:
os: [ubuntu-latest, ubuntu-20.04]
runs-on: ${{ matrix.os }}
steps:
- run: |
echo TESTOUTPUT: matrix.os == ${{ matrix.os }}
echo TESTOUTPUT: runs-on == ${{ jobs.matrix-runs-on.runs-on }}