mirror of
https://code.forgejo.org/forgejo/runner.git
synced 2025-09-05 18:40:59 +00:00
fix: data race in 'runs-on' expressions causes incorrect job labels during execution (#871)
A job with a `runs-on` that references matrix variables will not run with the expected labels. eg. ``` jobs: matrix-runs-on: strategy: matrix: os: [ubuntu-latest, ubuntu-20.04] runs-on: ${{ matrix.os }} steps: ... ``` Due to shared mutated state, both jobs that this generates will (w/ a race condition) either run with the `ubuntu-latest` or `ubuntu-20.04`, but rarely (never observed) with the expected outcome of running on both labels. `EvaluateYamlNode` is used to evaluate expressions in the `runs-on` field in the context of the current running job, but mutating an object shared between multiple concurrent jobs (in matrix evaluation). This results in the evaluation results from one job spilling into another and corrupting their `runs-on` labels. ``` ================== WARNING: DATA RACE Write at 0x00c00047e0b0 by goroutine 1739: reflect.typedmemmove() /.../go/pkg/mod/golang.org/toolchain@v0.0.1-go1.24.6.linux-amd64/src/runtime/mbarrier.go:213 +0x0 reflect.Value.Set() /.../go/pkg/mod/golang.org/toolchain@v0.0.1-go1.24.6.linux-amd64/src/reflect/value.go:2062 +0x184 gopkg.in/yaml%2ev3.(*decoder).unmarshal() /.../go/pkg/mod/gopkg.in/yaml.v3@v3.0.1/decode.go:493 +0x7b4 gopkg.in/yaml%2ev3.(*Node).Decode() /.../go/pkg/mod/gopkg.in/yaml.v3@v3.0.1/yaml.go:149 +0x355 code.forgejo.org/forgejo/runner/v9/act/runner.expressionEvaluator.EvaluateYamlNode() /.../forgejo-runner/act/runner/expression.go:372 +0x7a code.forgejo.org/forgejo/runner/v9/act/runner.(*expressionEvaluator).EvaluateYamlNode() <autogenerated>:1 +0x6b code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).runsOnPlatformNames() /.../forgejo-runner/act/runner/run_context.go:1019 +0x2af code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).runsOnImage() /.../forgejo-runner/act/runner/run_context.go:1002 +0x772 code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).platformImage() /.../forgejo-runner/act/runner/run_context.go:1032 +0x77 code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).isEnabled() /.../forgejo-runner/act/runner/run_context.go:1069 +0x3c7 code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).Executor.func1() /.../forgejo-runner/act/runner/run_context.go:964 +0x4b code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.1() /.../forgejo-runner/act/runner/runner.go:223 +0x271 code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.NewParallelExecutor.2.1() /.../forgejo-runner/act/common/executor.go:107 +0x61 code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.NewParallelExecutor.2.gowrap1() /.../forgejo-runner/act/common/executor.go:109 +0x4f Previous read at 0x00c00047e0b0 by goroutine 1742: code.forgejo.org/forgejo/runner/v9/act/model.(*Job).RunsOn() /.../forgejo-runner/act/model/workflow.go:361 +0x3c4 code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).runsOnImage() /.../forgejo-runner/act/runner/run_context.go:991 +0x57a code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).platformImage() /.../forgejo-runner/act/runner/run_context.go:1032 +0x77 code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).isEnabled() /.../forgejo-runner/act/runner/run_context.go:1069 +0x3c7 code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).Executor.func1() /.../forgejo-runner/act/runner/run_context.go:964 +0x4b code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.1() /.../forgejo-runner/act/runner/runner.go:223 +0x271 code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.NewParallelExecutor.2.1() /.../forgejo-runner/act/common/executor.go:107 +0x61 code.forgejo.org/forgejo/runner/v9/act/runner.(*runnerImpl).NewPlanExecutor.func1.NewParallelExecutor.2.gowrap1() /.../forgejo-runner/act/common/executor.go:109 +0x4f ... ================== ``` <!--start release-notes-assistant--> <!--URL:https://code.forgejo.org/forgejo/runner--> - bug fixes - [PR](https://code.forgejo.org/forgejo/runner/pulls/871): <!--number 871 --><!--line 0 --><!--description Zml4OiBkYXRhIHJhY2UgaW4gJ3J1bnMtb24nIGV4cHJlc3Npb25zIGNhdXNlcyBpbmNvcnJlY3Qgam9iIGxhYmVscyBkdXJpbmcgZXhlY3V0aW9u-->fix: data race in 'runs-on' expressions causes incorrect job labels during execution<!--description--> <!--end release-notes-assistant--> Reviewed-on: https://code.forgejo.org/forgejo/runner/pulls/871 Reviewed-by: earl-warren <earl-warren@noreply.code.forgejo.org> Co-authored-by: Mathieu Fenniak <mathieu@fenniak.net> Co-committed-by: Mathieu Fenniak <mathieu@fenniak.net>
This commit is contained in:
parent
776f0641fd
commit
165b44deec
5 changed files with 78 additions and 10 deletions
|
@ -119,10 +119,6 @@ func (j *Job) EraseNeeds() *Job {
|
||||||
return j
|
return j
|
||||||
}
|
}
|
||||||
|
|
||||||
func (j *Job) RunsOn() []string {
|
|
||||||
return (&model.Job{RawRunsOn: j.RawRunsOn}).RunsOn()
|
|
||||||
}
|
|
||||||
|
|
||||||
type Step struct {
|
type Step struct {
|
||||||
ID string `yaml:"id,omitempty"`
|
ID string `yaml:"id,omitempty"`
|
||||||
If yaml.Node `yaml:"if,omitempty"`
|
If yaml.Node `yaml:"if,omitempty"`
|
||||||
|
|
|
@ -339,14 +339,21 @@ func (j *Job) Needs() []string {
|
||||||
|
|
||||||
// RunsOn list for Job
|
// RunsOn list for Job
|
||||||
func (j *Job) RunsOn() []string {
|
func (j *Job) RunsOn() []string {
|
||||||
switch j.RawRunsOn.Kind {
|
return FlattenRunsOnNode(j.RawRunsOn)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Given an already expression-evaluated `runs-on` yaml node from a job, compute all the labels that will be run for a
|
||||||
|
// job. Can be a single string label, an array of labels, or an object { group: "...", labels: [...] };
|
||||||
|
// FlattenRunsOnNode will flatten all the options down to a []string.
|
||||||
|
func FlattenRunsOnNode(runsOn yaml.Node) []string {
|
||||||
|
switch runsOn.Kind {
|
||||||
case yaml.MappingNode:
|
case yaml.MappingNode:
|
||||||
var val struct {
|
var val struct {
|
||||||
Group string
|
Group string
|
||||||
Labels yaml.Node
|
Labels yaml.Node
|
||||||
}
|
}
|
||||||
|
|
||||||
if !decodeNode(j.RawRunsOn, &val) {
|
if !decodeNode(runsOn, &val) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -358,7 +365,7 @@ func (j *Job) RunsOn() []string {
|
||||||
|
|
||||||
return labels
|
return labels
|
||||||
default:
|
default:
|
||||||
return nodeAsStringSlice(j.RawRunsOn)
|
return nodeAsStringSlice(runsOn)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -993,12 +993,17 @@ func (rc *RunContext) runsOnPlatformNames(ctx context.Context) []string {
|
||||||
return []string{}
|
return []string{}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := rc.ExprEval.EvaluateYamlNode(ctx, &job.RawRunsOn); err != nil {
|
// Copy rawRunsOn from the job. `EvaluateYamlNode` later will mutate the yaml node in-place applying expression
|
||||||
|
// evaluation to it from the RunContext -- but the job object is shared in matrix executions between multiple
|
||||||
|
// running matrix jobs and `rc.EvalExpr` is specific to one matrix job. By copying the object we avoid mutating the
|
||||||
|
// shared field as it is accessed by multiple goroutines.
|
||||||
|
rawRunsOn := job.RawRunsOn
|
||||||
|
if err := rc.ExprEval.EvaluateYamlNode(ctx, &rawRunsOn); err != nil {
|
||||||
common.Logger(ctx).Errorf("Error while evaluating runs-on: %v", err)
|
common.Logger(ctx).Errorf("Error while evaluating runs-on: %v", err)
|
||||||
return []string{}
|
return []string{}
|
||||||
}
|
}
|
||||||
|
|
||||||
return job.RunsOn()
|
return model.FlattenRunsOnNode(rawRunsOn)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rc *RunContext) platformImage(ctx context.Context) string {
|
func (rc *RunContext) platformImage(ctx context.Context) string {
|
||||||
|
|
|
@ -9,6 +9,7 @@ import (
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/joho/godotenv"
|
"github.com/joho/godotenv"
|
||||||
|
@ -476,7 +477,24 @@ func TestRunner_RunSkipped(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
type maskJobLoggerFactory struct {
|
type maskJobLoggerFactory struct {
|
||||||
Output bytes.Buffer
|
Output syncBuffer
|
||||||
|
}
|
||||||
|
|
||||||
|
type syncBuffer struct {
|
||||||
|
buffer bytes.Buffer
|
||||||
|
mutex sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sb *syncBuffer) Write(p []byte) (n int, err error) {
|
||||||
|
sb.mutex.Lock()
|
||||||
|
defer sb.mutex.Unlock()
|
||||||
|
return sb.buffer.Write(p)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sb *syncBuffer) String() string {
|
||||||
|
sb.mutex.Lock()
|
||||||
|
defer sb.mutex.Unlock()
|
||||||
|
return sb.buffer.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *maskJobLoggerFactory) WithJobLogger() *log.Logger {
|
func (f *maskJobLoggerFactory) WithJobLogger() *log.Logger {
|
||||||
|
@ -641,3 +659,31 @@ func TestRunner_RunMatrixWithUserDefinedInclusions(t *testing.T) {
|
||||||
|
|
||||||
tjfi.runTest(t.Context(), t, &Config{Matrix: matrix})
|
tjfi.runTest(t.Context(), t, &Config{Matrix: matrix})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Regression test against `runs-on` in a matrix run, which references the matrix values, being corrupted by multiple
|
||||||
|
// concurrent goroutines.
|
||||||
|
func TestRunner_RunsOnMatrix(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("skipping integration test")
|
||||||
|
}
|
||||||
|
|
||||||
|
log.SetLevel(log.DebugLevel)
|
||||||
|
|
||||||
|
tjfi := TestJobFileInfo{
|
||||||
|
workdir: workdir,
|
||||||
|
workflowPath: "matrix-runs-on",
|
||||||
|
eventName: "push",
|
||||||
|
errorMessage: "",
|
||||||
|
platforms: platforms,
|
||||||
|
}
|
||||||
|
|
||||||
|
logger := &maskJobLoggerFactory{}
|
||||||
|
tjfi.runTest(WithJobLoggerFactory(common.WithLogger(t.Context(), logger.WithJobLogger()), logger), t, &Config{})
|
||||||
|
output := logger.Output.String()
|
||||||
|
|
||||||
|
// job 1 should succeed because `ubuntu-latest` is a valid runs-on target...
|
||||||
|
assert.Contains(t, output, "msg=\"🏁 Job succeeded\" dryrun=false job=test/matrix-runs-on-1", "expected job 1 to succeed, but did not find success message")
|
||||||
|
|
||||||
|
// job 2 should be skipped because `ubuntu-20.04` is not a valid runs-on target in `platforms`.
|
||||||
|
assert.Contains(t, output, "msg=\"🚧 Skipping unsupported platform -- Try running with `-P ubuntu-20.04=...`\" dryrun=false job=test/matrix-runs-on-2", "expected job 2 to be skipped, but it was not")
|
||||||
|
}
|
||||||
|
|
14
act/runner/testdata/matrix-runs-on/push.yml
vendored
Normal file
14
act/runner/testdata/matrix-runs-on/push.yml
vendored
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
name: test
|
||||||
|
|
||||||
|
on: push
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
matrix-runs-on:
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest, ubuntu-20.04]
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
steps:
|
||||||
|
- run: |
|
||||||
|
echo TESTOUTPUT: matrix.os == ${{ matrix.os }}
|
||||||
|
echo TESTOUTPUT: runs-on == ${{ jobs.matrix-runs-on.runs-on }}
|
Loading…
Add table
Add a link
Reference in a new issue