From 37f1b278b92ddd0804c259cdf6cf3b3ba2f64496 Mon Sep 17 00:00:00 2001 From: Earl Warren Date: Sun, 3 Aug 2025 11:58:58 +0200 Subject: [PATCH] feat: wait for services to be healthy before starting a job If a --health-cmd is defined for a container, block until its status is healthy or unhealthy. The timeout is defined by the server internal logic based on associated --health-* defined delays. If it blocks indefinitely, the job timeout will eventually cancel it. While waiting, the simplest solution would be to sleep 1 second until the container is healthy or unhealthy. To minimize log verbosity, the sleep interval is instead set to --health-interval and default to one second if it is not defined. This logic does not apply to host containers as they do not support services. They are assumed to always be healthy. If --health-cmd is set for the container running a job, the first step will start to run without waiting for the container to become healthy. There may be valid use cases for that but they are not the focus of this implementation. --- act/container/container_types.go | 2 + act/container/docker_run.go | 42 ++++++++++ act/container/docker_run_test.go | 76 +++++++++++++++++++ act/container/host_environment.go | 4 + act/runner/run_context.go | 30 ++++++++ act/runner/run_context_test.go | 44 +++++++++++ act/runner/runner_test.go | 2 + .../push.yml | 21 +++++ .../push.yml | 17 +++++ act/runner/testdata/services/push.yaml | 4 +- 10 files changed, 240 insertions(+), 2 deletions(-) create mode 100644 act/runner/testdata/mysql-service-container-premature-terminate/push.yml create mode 100644 act/runner/testdata/mysql-service-container-with-health-check/push.yml diff --git a/act/container/container_types.go b/act/container/container_types.go index 3d3a5162..8fe651f3 100644 --- a/act/container/container_types.go +++ b/act/container/container_types.go @@ -3,6 +3,7 @@ package container import ( "context" "io" + "time" "code.forgejo.org/forgejo/runner/v9/act/common" "github.com/docker/go-connections/nat" @@ -63,6 +64,7 @@ type Container interface { Remove() common.Executor Close() common.Executor ReplaceLogWriter(io.Writer, io.Writer) (io.Writer, io.Writer) + IsHealthy(ctx context.Context) (time.Duration, error) } // NewDockerBuildExecutorInput the input for the NewDockerBuildExecutor function diff --git a/act/container/docker_run.go b/act/container/docker_run.go index 1c790d6a..34eb3703 100644 --- a/act/container/docker_run.go +++ b/act/container/docker_run.go @@ -15,6 +15,7 @@ import ( "runtime" "strconv" "strings" + "time" "github.com/Masterminds/semver" "github.com/docker/cli/cli/compose/loader" @@ -191,6 +192,47 @@ func (cr *containerReference) Remove() common.Executor { ).IfNot(common.Dryrun) } +func (cr *containerReference) inspect(ctx context.Context) (container.InspectResponse, error) { + resp, err := cr.cli.ContainerInspect(ctx, cr.id) + if err != nil { + err = fmt.Errorf("service %v: %s", cr.input.NetworkAliases, err) + } + return resp, err +} + +func (cr *containerReference) IsHealthy(ctx context.Context) (time.Duration, error) { + resp, err := cr.inspect(ctx) + if err != nil { + return 0, err + } + return cr.isHealthy(ctx, resp) +} + +func (cr *containerReference) isHealthy(ctx context.Context, resp container.InspectResponse) (time.Duration, error) { + logger := common.Logger(ctx) + if resp.Config == nil || resp.Config.Healthcheck == nil || resp.State == nil || resp.State.Health == nil || len(resp.Config.Healthcheck.Test) == 1 && strings.EqualFold(resp.Config.Healthcheck.Test[0], "NONE") { + logger.Debugf("no container health check defined, hope for the best") + return 0, nil + } + + switch resp.State.Health.Status { + case container.Starting: + wait := resp.Config.Healthcheck.Interval + if wait <= 0 { + wait = time.Second + } + logger.Infof("service %v: container health check %s (%s) is starting, waiting %v", cr.input.NetworkAliases, cr.id, resp.Config.Image, wait) + return wait, nil + case container.Healthy: + logger.Infof("service %v: container health check %s (%s) is healthy", cr.input.NetworkAliases, cr.id, resp.Config.Image) + return 0, nil + case container.Unhealthy: + return 0, fmt.Errorf("service %v: container health check %s (%s) is not healthy", cr.input.NetworkAliases, cr.id, resp.Config.Image) + default: + return 0, fmt.Errorf("service %v: unexpected health status %s (%s) %v", cr.input.NetworkAliases, cr.id, resp.Config.Image, resp.State.Health.Status) + } +} + func (cr *containerReference) ReplaceLogWriter(stdout, stderr io.Writer) (io.Writer, io.Writer) { out := cr.input.Stdout err := cr.input.Stderr diff --git a/act/container/docker_run_test.go b/act/container/docker_run_test.go index 1069f895..91c579fe 100644 --- a/act/container/docker_run_test.go +++ b/act/container/docker_run_test.go @@ -386,3 +386,79 @@ func TestMergeJobOptions(t *testing.T) { }) } } + +func TestDockerRun_isHealthy(t *testing.T) { + cr := containerReference{ + id: "containerid", + input: &NewContainerInput{ + NetworkAliases: []string{"servicename"}, + }, + } + ctx := context.Background() + makeInspectResponse := func(interval time.Duration, status container.HealthStatus, test []string) container.InspectResponse { + return container.InspectResponse{ + Config: &container.Config{ + Image: "example.com/some/image", + Healthcheck: &container.HealthConfig{ + Interval: interval, + Test: test, + }, + }, + ContainerJSONBase: &container.ContainerJSONBase{ + State: &container.State{ + Health: &container.Health{ + Status: status, + }, + }, + }, + } + } + + t.Run("IncompleteResponseOrNoHealthCheck", func(t *testing.T) { + wait, err := cr.isHealthy(ctx, container.InspectResponse{}) + assert.Zero(t, wait) + assert.NoError(t, err) + + // --no-healthcheck translates into a NONE test command + resp := makeInspectResponse(0, container.NoHealthcheck, []string{"NONE"}) + wait, err = cr.isHealthy(ctx, resp) + assert.Zero(t, wait) + assert.NoError(t, err) + }) + + t.Run("StartingUndefinedIntervalIsNotZero", func(t *testing.T) { + resp := makeInspectResponse(0, container.Starting, nil) + wait, err := cr.isHealthy(ctx, resp) + assert.NotZero(t, wait) + assert.NoError(t, err) + }) + + t.Run("StartingWithInterval", func(t *testing.T) { + expectedWait := time.Duration(42) + resp := makeInspectResponse(expectedWait, container.Starting, nil) + actualWait, err := cr.isHealthy(ctx, resp) + assert.Equal(t, expectedWait, actualWait) + assert.NoError(t, err) + }) + + t.Run("Unhealthy", func(t *testing.T) { + resp := makeInspectResponse(0, container.Unhealthy, nil) + wait, err := cr.isHealthy(ctx, resp) + assert.Zero(t, wait) + assert.ErrorContains(t, err, "is not healthy") + }) + + t.Run("Healthy", func(t *testing.T) { + resp := makeInspectResponse(0, container.Healthy, nil) + wait, err := cr.isHealthy(ctx, resp) + assert.Zero(t, wait) + assert.NoError(t, err) + }) + + t.Run("UnknownStatus", func(t *testing.T) { + resp := makeInspectResponse(0, container.NoHealthcheck, nil) + wait, err := cr.isHealthy(ctx, resp) + assert.Zero(t, wait) + assert.ErrorContains(t, err, "unexpected") + }) +} diff --git a/act/container/host_environment.go b/act/container/host_environment.go index 045d7774..fe1fef6d 100644 --- a/act/container/host_environment.go +++ b/act/container/host_environment.go @@ -493,6 +493,10 @@ func (e *HostEnvironment) GetRunnerContext(_ context.Context) map[string]interfa } } +func (e *HostEnvironment) IsHealthy(ctx context.Context) (time.Duration, error) { + return 0, nil +} + func (e *HostEnvironment) ReplaceLogWriter(stdout, _ io.Writer) (io.Writer, io.Writer) { org := e.StdOut e.StdOut = stdout diff --git a/act/runner/run_context.go b/act/runner/run_context.go index d1fc1cd8..213f5dd5 100644 --- a/act/runner/run_context.go +++ b/act/runner/run_context.go @@ -610,6 +610,7 @@ func (rc *RunContext) startJobContainer() common.Executor { Mode: 0o666, Body: "", }), + rc.waitForServiceContainers(), )(ctx) } } @@ -744,6 +745,35 @@ func (rc *RunContext) startServiceContainers(_ string) common.Executor { } } +func waitForServiceContainer(ctx context.Context, c container.ExecutionsEnvironment) error { + for { + wait, err := c.IsHealthy(ctx) + if err != nil { + return err + } + if wait == time.Duration(0) { + return nil + } + select { + case <-ctx.Done(): + return nil + case <-time.After(wait): + } + } +} + +func (rc *RunContext) waitForServiceContainers() common.Executor { + return func(ctx context.Context) error { + execs := []common.Executor{} + for _, c := range rc.ServiceContainers { + execs = append(execs, func(ctx context.Context) error { + return waitForServiceContainer(ctx, c) + }) + } + return common.NewParallelExecutor(len(execs), execs...)(ctx) + } +} + func (rc *RunContext) stopServiceContainers() common.Executor { return func(ctx context.Context) error { execs := []common.Executor{} diff --git a/act/runner/run_context_test.go b/act/runner/run_context_test.go index 24caa4ad..0fa41307 100644 --- a/act/runner/run_context_test.go +++ b/act/runner/run_context_test.go @@ -3,12 +3,14 @@ package runner import ( "cmp" "context" + "errors" "fmt" "os" "runtime" "slices" "strings" "testing" + "time" "code.forgejo.org/forgejo/runner/v9/act/container" "code.forgejo.org/forgejo/runner/v9/act/exprparser" @@ -18,6 +20,7 @@ import ( "github.com/docker/go-connections/nat" log "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" yaml "gopkg.in/yaml.v3" ) @@ -824,3 +827,44 @@ jobs: }) } } + +type waitForServiceContainerMock struct { + mock.Mock + container.Container + container.LinuxContainerEnvironmentExtensions +} + +func (o *waitForServiceContainerMock) IsHealthy(ctx context.Context) (time.Duration, error) { + args := o.Called(ctx) + return args.Get(0).(time.Duration), args.Error(1) +} + +func Test_waitForServiceContainer(t *testing.T) { + t.Run("Wait", func(t *testing.T) { + m := &waitForServiceContainerMock{} + ctx := context.Background() + mock.InOrder( + m.On("IsHealthy", ctx).Return(1*time.Millisecond, nil).Once(), + m.On("IsHealthy", ctx).Return(time.Duration(0), nil).Once(), + ) + require.NoError(t, waitForServiceContainer(ctx, m)) + m.AssertExpectations(t) + }) + + t.Run("Cancel", func(t *testing.T) { + m := &waitForServiceContainerMock{} + ctx, cancel := context.WithCancel(context.Background()) + cancel() + m.On("IsHealthy", ctx).Return(1*time.Millisecond, nil).Once() + require.NoError(t, waitForServiceContainer(ctx, m)) + m.AssertExpectations(t) + }) + + t.Run("Error", func(t *testing.T) { + m := &waitForServiceContainerMock{} + ctx := context.Background() + m.On("IsHealthy", ctx).Return(time.Duration(0), errors.New("ERROR")) + require.ErrorContains(t, waitForServiceContainer(ctx, m), "ERROR") + m.AssertExpectations(t) + }) +} diff --git a/act/runner/runner_test.go b/act/runner/runner_test.go index 0a11a5ea..d7a5b876 100644 --- a/act/runner/runner_test.go +++ b/act/runner/runner_test.go @@ -322,6 +322,8 @@ func TestRunner_RunEvent(t *testing.T) { // services {workdir, "services", "push", "", platforms, secrets}, {workdir, "services-with-container", "push", "", platforms, secrets}, + {workdir, "mysql-service-container-with-health-check", "push", "", platforms, secrets}, + {workdir, "mysql-service-container-premature-terminate", "push", "service [maindb]", platforms, secrets}, } for _, table := range tables { diff --git a/act/runner/testdata/mysql-service-container-premature-terminate/push.yml b/act/runner/testdata/mysql-service-container-premature-terminate/push.yml new file mode 100644 index 00000000..768e3e3e --- /dev/null +++ b/act/runner/testdata/mysql-service-container-premature-terminate/push.yml @@ -0,0 +1,21 @@ +name: service-container +on: push +jobs: + service-container-test: + runs-on: ubuntu-latest + container: code.forgejo.org/oci/mysql:8.4 + services: + maindb: + image: code.forgejo.org/oci/mysql:8.4 + # This container should immediately exit due to missing env variable for password config. ... [ERROR] + # [Entrypoint]: Database is uninitialized and password option is not specified You need to specify one of the + # following as an environment variable: + # - MYSQL_ROOT_PASSWORD + # - MYSQL_ALLOW_EMPTY_PASSWORD + # - MYSQL_RANDOM_ROOT_PASSWORD + # + # This container should retain the same health check config as the mysql-service-container-with-health-check + # case. + options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3 + steps: + - run: exit 100 # should never be hit since service will never be healthy diff --git a/act/runner/testdata/mysql-service-container-with-health-check/push.yml b/act/runner/testdata/mysql-service-container-with-health-check/push.yml new file mode 100644 index 00000000..efb8b2e5 --- /dev/null +++ b/act/runner/testdata/mysql-service-container-with-health-check/push.yml @@ -0,0 +1,17 @@ +name: service-container +on: push +jobs: + service-container-test: + runs-on: ubuntu-latest + container: code.forgejo.org/oci/mysql:8.4 + services: + maindb: + image: code.forgejo.org/oci/mysql:8.4 + env: + MYSQL_DATABASE: dbname + MYSQL_USER: dbuser + MYSQL_PASSWORD: dbpass + MYSQL_RANDOM_ROOT_PASSWORD: yes + options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3 + steps: + - run: mysql -u dbuser -D dbname -pdbpass -h maindb -e "create table T(id INT NOT NULL AUTO_INCREMENT, val VARCHAR(255), PRIMARY KEY (id))" diff --git a/act/runner/testdata/services/push.yaml b/act/runner/testdata/services/push.yaml index 03ac0855..3884a43f 100644 --- a/act/runner/testdata/services/push.yaml +++ b/act/runner/testdata/services/push.yaml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest services: postgres: - image: code.forgejo.org/oci/bitnami/postgresql:16 + image: code.forgejo.org/oci/postgres:16 env: POSTGRES_USER: runner POSTGRES_PASSWORD: mysecretdbpass @@ -15,7 +15,7 @@ jobs: --health-cmd pg_isready --health-interval 10s --health-timeout 5s - --health-retries 5 + --health-retries 20 ports: - 5432:5432 steps: