From 76bf87f472ae3dca88dbd970032610bf1ea6f44e Mon Sep 17 00:00:00 2001
From: Mathieu Fenniak <mathieu@fenniak.net>
Date: Fri, 15 Aug 2025 21:10:53 +0000
Subject: [PATCH] fix: data race condition causing incorrect `shell` on a task
 step if it referenced a matrix variable (#865)

The `setupShell` function would update the shell stored on the `Step` object, setting it to either a default value from the job, an expression evaluated in the context of the job, a default from the workflow, or finally falling back to bash or powershell defaults.  Typically this would be fine -- although it would trigger the data race detector because the `Step` is a shared object between multiple concurrent matrix evaluations for the job.

In the *really quite unlikely* case that the `shell` field on a step or job referenced a matrix variable, this data race would actually trigger the shared step's `Shell` value to end up as "whichever one was evaluated last", causing the wrong shell to be used.  The new `matrix-shell` test triggers this behavior, and fails without the associated code fix.

As a fix, the `Shell` field in `Step` is never mutated; instead only the value on non-shared `stepRun` instance is updated from `setupShellCommand`.  `Shell` was renamed to `RawShell` as part of verifying all references were updated and it seemed to make sense to keep that name since it is a pre-evaluator value.

```
==================
WARNING: DATA RACE
Write at 0x00c00013e9b0 by goroutine 1470:
  code.forgejo.org/forgejo/runner/v9/act/runner.(*stepRun).setupShell()
      /.../forgejo-runner/act/runner/step_run.go:210 +0x8f2
  code.forgejo.org/forgejo/runner/v9/act/common/git.FindGitRevision()
      /.../forgejo-runner/act/common/git/git.go:58 +0xc4
  code.forgejo.org/forgejo/runner/v9/act/model.(*GithubContext).SetSha()
      /.../forgejo-runner/act/model/github_context.go:161 +0x6b5
  code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).getGithubContext()
      /.../forgejo-runner/act/runner/run_context.go:1228 +0x26ca
  ...

Previous write at 0x00c00013e9b0 by goroutine 1469:
  code.forgejo.org/forgejo/runner/v9/act/runner.(*stepRun).setupShell()
      /.../forgejo-runner/act/runner/step_run.go:210 +0x8f2
  code.forgejo.org/forgejo/runner/v9/act/common/git.FindGitRevision()
      /.../forgejo-runner/act/common/git/git.go:58 +0xc4
  code.forgejo.org/forgejo/runner/v9/act/model.(*GithubContext).SetSha()
      /.../forgejo-runner/act/model/github_context.go:161 +0x6b5
  code.forgejo.org/forgejo/runner/v9/act/runner.(*RunContext).getGithubContext()
      /.../forgejo-runner/act/runner/run_context.go:1228 +0x26ca

...
==================
```

<!--start release-notes-assistant-->
<!--URL:https://code.forgejo.org/forgejo/runner-->
- bug fixes
  - [PR](https://code.forgejo.org/forgejo/runner/pulls/865): <!--number 865 --><!--line 0 --><!--description Zml4OiBkYXRhIHJhY2UgY29uZGl0aW9uIGNhdXNpbmcgaW5jb3JyZWN0IGBzaGVsbGAgb24gYSB0YXNrIHN0ZXAgaWYgaXQgcmVmZXJlbmNlZCBhIG1hdHJpeCB2YXJpYWJsZQ==-->fix: data race condition causing incorrect `shell` on a task step if it referenced a matrix variable<!--description-->
<!--end release-notes-assistant-->

Reviewed-on: https://code.forgejo.org/forgejo/runner/pulls/865
Reviewed-by: earl-warren <earl-warren@noreply.code.forgejo.org>
Co-authored-by: Mathieu Fenniak <mathieu@fenniak.net>
Co-committed-by: Mathieu Fenniak <mathieu@fenniak.net>
---
 act/model/workflow.go                     | 28 +----------
 act/model/workflow_test.go                | 19 --------
 act/runner/runner_test.go                 |  1 +
 act/runner/step_run.go                    | 57 ++++++++++++++++-------
 act/runner/step_run_test.go               | 19 ++++++++
 act/runner/testdata/matrix-shell/push.yml | 32 +++++++++++++
 6 files changed, 93 insertions(+), 63 deletions(-)
 create mode 100644 act/runner/testdata/matrix-shell/push.yml

diff --git a/act/model/workflow.go b/act/model/workflow.go
index 5ca423c3..0cc337a4 100644
--- a/act/model/workflow.go
+++ b/act/model/workflow.go
@@ -613,7 +613,7 @@ type Step struct {
 	Uses               string            `yaml:"uses"`
 	Run                string            `yaml:"run"`
 	WorkingDirectory   string            `yaml:"working-directory"`
-	Shell              string            `yaml:"shell"`
+	RawShell           string            `yaml:"shell"`
 	Env                yaml.Node         `yaml:"env"`
 	With               map[string]string `yaml:"with"`
 	RawContinueOnError string            `yaml:"continue-on-error"`
@@ -649,32 +649,6 @@ func (s *Step) GetEnv() map[string]string {
 	return env
 }
 
-// ShellCommand returns the command for the shell
-func (s *Step) ShellCommand() string {
-	var shellCommand string
-
-	// Reference: https://github.com/actions/runner/blob/8109c962f09d9acc473d92c595ff43afceddb347/src/Runner.Worker/Handlers/ScriptHandlerHelpers.cs#L9-L17
-	switch s.Shell {
-	case "", "bash":
-		shellCommand = "bash --noprofile --norc -e -o pipefail {0}"
-	case "pwsh":
-		shellCommand = "pwsh -command . '{0}'"
-	case "python":
-		shellCommand = "python {0}"
-	case "sh":
-		shellCommand = "sh -e {0}"
-	case "cmd":
-		shellCommand = "cmd /D /E:ON /V:OFF /S /C \"CALL \"{0}\"\""
-	case "powershell":
-		shellCommand = "powershell -command . '{0}'"
-	case "node":
-		shellCommand = "node {0}"
-	default:
-		shellCommand = s.Shell
-	}
-	return shellCommand
-}
-
 // StepType describes what type of step we are about to run
 type StepType int
 
diff --git a/act/model/workflow_test.go b/act/model/workflow_test.go
index 6859ab8c..d1a237e4 100644
--- a/act/model/workflow_test.go
+++ b/act/model/workflow_test.go
@@ -546,25 +546,6 @@ func TestReadWorkflow_Strategy(t *testing.T) {
 	assert.Equal(t, job.Strategy.FailFast, false)
 }
 
-func TestStep_ShellCommand(t *testing.T) {
-	tests := []struct {
-		shell string
-		want  string
-	}{
-		{"pwsh -v '. {0}'", "pwsh -v '. {0}'"},
-		{"pwsh", "pwsh -command . '{0}'"},
-		{"powershell", "powershell -command . '{0}'"},
-		{"node", "node {0}"},
-		{"python", "python {0}"},
-	}
-	for _, tt := range tests {
-		t.Run(tt.shell, func(t *testing.T) {
-			got := (&Step{Shell: tt.shell}).ShellCommand()
-			assert.Equal(t, got, tt.want)
-		})
-	}
-}
-
 func TestReadWorkflow_WorkflowDispatchConfig(t *testing.T) {
 	yaml := `
     name: local-action-docker-url
diff --git a/act/runner/runner_test.go b/act/runner/runner_test.go
index 8f2ab0db..074875eb 100644
--- a/act/runner/runner_test.go
+++ b/act/runner/runner_test.go
@@ -283,6 +283,7 @@ func TestRunner_RunEvent(t *testing.T) {
 		{workdir, "matrix", "push", "", platforms, secrets},
 		{workdir, "matrix-include-exclude", "push", "", platforms, secrets},
 		{workdir, "matrix-exitcode", "push", "Job 'test' failed", platforms, secrets},
+		{workdir, "matrix-shell", "push", "", platforms, secrets},
 		{workdir, "commands", "push", "", platforms, secrets},
 		{workdir, "workdir", "push", "", platforms, secrets},
 		{workdir, "defaults-run", "push", "", platforms, secrets},
diff --git a/act/runner/step_run.go b/act/runner/step_run.go
index 68daa143..23e45a92 100644
--- a/act/runner/step_run.go
+++ b/act/runner/step_run.go
@@ -94,6 +94,28 @@ func getScriptName(rc *RunContext, step *model.Step) string {
 	return fmt.Sprintf("workflow/%s", scriptName)
 }
 
+func shellCommand(shell string) string {
+	// Reference: https://github.com/actions/runner/blob/8109c962f09d9acc473d92c595ff43afceddb347/src/Runner.Worker/Handlers/ScriptHandlerHelpers.cs#L9-L17
+	switch shell {
+	case "", "bash":
+		return "bash --noprofile --norc -e -o pipefail {0}"
+	case "pwsh":
+		return "pwsh -command . '{0}'"
+	case "python":
+		return "python {0}"
+	case "sh":
+		return "sh -e {0}"
+	case "cmd":
+		return "cmd /D /E:ON /V:OFF /S /C \"CALL \"{0}\"\""
+	case "powershell":
+		return "powershell -command . '{0}'"
+	case "node":
+		return "node {0}"
+	default:
+		return shell
+	}
+}
+
 // TODO: Currently we just ignore top level keys, BUT we should return proper error on them
 // BUTx2 I leave this for when we rewrite act to use actionlint for workflow validation
 // so we return proper errors before any execution or spawning containers
@@ -101,22 +123,21 @@ func getScriptName(rc *RunContext, step *model.Step) string {
 // OCI runtime exec failed: exec failed: container_linux.go:380: starting container process caused: exec: "${{": executable file not found in $PATH: unknown
 func (sr *stepRun) setupShellCommand(ctx context.Context) (name, script string, err error) {
 	logger := common.Logger(ctx)
-	sr.setupShell(ctx)
+	shell := sr.interpretShell(ctx)
 	sr.setupWorkingDirectory(ctx)
 
 	step := sr.Step
 
 	script = sr.RunContext.NewStepExpressionEvaluator(ctx, sr).Interpolate(ctx, step.Run)
 
-	scCmd := step.ShellCommand()
-
+	shellCommand := shellCommand(shell)
 	name = getScriptName(sr.RunContext, step)
 
 	// Reference: https://github.com/actions/runner/blob/8109c962f09d9acc473d92c595ff43afceddb347/src/Runner.Worker/Handlers/ScriptHandlerHelpers.cs#L47-L64
 	// Reference: https://github.com/actions/runner/blob/8109c962f09d9acc473d92c595ff43afceddb347/src/Runner.Worker/Handlers/ScriptHandlerHelpers.cs#L19-L27
 	runPrepend := ""
 	runAppend := ""
-	switch step.Shell {
+	switch shell {
 	case "bash", "sh":
 		name += ".sh"
 	case "pwsh", "powershell":
@@ -142,7 +163,7 @@ func (sr *stepRun) setupShellCommand(ctx context.Context) (name, script string,
 
 	rc := sr.getRunContext()
 	scriptPath := fmt.Sprintf("%s/%s", rc.JobContainer.GetActPath(), name)
-	sr.cmdline = strings.Replace(scCmd, `{0}`, scriptPath, 1)
+	sr.cmdline = strings.Replace(shellCommand, `{0}`, scriptPath, 1)
 	sr.cmd, err = shellquote.Split(sr.cmdline)
 
 	return name, script, err
@@ -164,34 +185,34 @@ func (l *localEnv) Getenv(name string) string {
 	return l.env[name]
 }
 
-func (sr *stepRun) setupShell(ctx context.Context) {
+func (sr *stepRun) interpretShell(ctx context.Context) string {
 	rc := sr.RunContext
-	step := sr.Step
+	shell := sr.Step.RawShell
 
-	if step.Shell == "" {
-		step.Shell = rc.Run.Job().Defaults.Run.Shell
+	if shell == "" {
+		shell = rc.Run.Job().Defaults.Run.Shell
 	}
 
-	step.Shell = rc.NewExpressionEvaluator(ctx).Interpolate(ctx, step.Shell)
+	shell = rc.NewExpressionEvaluator(ctx).Interpolate(ctx, shell)
 
-	if step.Shell == "" {
-		step.Shell = rc.Run.Workflow.Defaults.Run.Shell
+	if shell == "" {
+		shell = rc.Run.Workflow.Defaults.Run.Shell
 	}
 
-	if step.Shell == "" {
+	if shell == "" {
 		if _, ok := rc.JobContainer.(*container.HostEnvironment); ok {
 			shellWithFallback := []string{"bash", "sh"}
 			// Don't use bash on windows by default, if not using a docker container
 			if runtime.GOOS == "windows" {
 				shellWithFallback = []string{"pwsh", "powershell"}
 			}
-			step.Shell = shellWithFallback[0]
+			shell = shellWithFallback[0]
 			lenv := &localEnv{env: map[string]string{}}
 			maps.Copy(lenv.env, sr.env)
 			sr.getRunContext().ApplyExtraPath(ctx, &lenv.env)
 			_, err := lookpath.LookPath2(shellWithFallback[0], lenv)
 			if err != nil {
-				step.Shell = shellWithFallback[1]
+				shell = shellWithFallback[1]
 			}
 		} else {
 			shellFallback := `
@@ -204,11 +225,13 @@ fi
 			stdout, _, err := rc.sh(ctx, shellFallback)
 			if err != nil {
 				common.Logger(ctx).Error("fail to run %q: %v", shellFallback, err)
-				return
+			} else {
+				shell = stdout
 			}
-			step.Shell = stdout
 		}
 	}
+
+	return shell
 }
 
 func (sr *stepRun) setupWorkingDirectory(ctx context.Context) {
diff --git a/act/runner/step_run_test.go b/act/runner/step_run_test.go
index b49e2405..1fe376f2 100644
--- a/act/runner/step_run_test.go
+++ b/act/runner/step_run_test.go
@@ -93,3 +93,22 @@ func TestStepRunPrePost(t *testing.T) {
 	err = sr.post()(ctx)
 	assert.Nil(t, err)
 }
+
+func TestStepShellCommand(t *testing.T) {
+	tests := []struct {
+		shell string
+		want  string
+	}{
+		{"pwsh -v '. {0}'", "pwsh -v '. {0}'"},
+		{"pwsh", "pwsh -command . '{0}'"},
+		{"powershell", "powershell -command . '{0}'"},
+		{"node", "node {0}"},
+		{"python", "python {0}"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.shell, func(t *testing.T) {
+			got := shellCommand(tt.shell)
+			assert.Equal(t, got, tt.want)
+		})
+	}
+}
diff --git a/act/runner/testdata/matrix-shell/push.yml b/act/runner/testdata/matrix-shell/push.yml
new file mode 100644
index 00000000..d7e1e5ef
--- /dev/null
+++ b/act/runner/testdata/matrix-shell/push.yml
@@ -0,0 +1,32 @@
+name: test
+
+on: push
+
+jobs:
+  matrix-shell:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        shell-to-use: ["bash", "cat {0}", "sh"]
+    steps:
+      - shell: ${{ matrix.shell-to-use }}
+        run: |
+          expected=${{ matrix.shell-to-use }}
+          if [ "$expected" = "bash" ]; then
+            if [ -z "$BASH_VERSION" ]; then
+              echo "Expected to execute bash shell, but BASH_VERSION is unset"
+              exit 1
+            else
+              echo "Successfully running in bash ($BASH_VERSION)"
+            fi
+          elif [ "$expected" = "sh" ]; then
+            if [ -n "$BASH_VERSION" ]; then
+              echo "Expected to execute in sh shell, but BASH_VERSION is set ($BASH_VERSION)"
+              exit 1
+            else
+              echo "Probably running in sh"
+            fi
+          else
+            echo "Not sure what's happening; expected shell is $expected ?!"
+            exit 1
+          fi