From aa70cb7d7b118a0e2906d0b97533d13591524820 Mon Sep 17 00:00:00 2001 From: Mathieu Fenniak Date: Tue, 5 Aug 2025 20:54:44 -0600 Subject: [PATCH] add test case for premature termination before health check completes --- act/container/container_types.go | 3 +++ act/container/docker_run.go | 4 ++++ act/runner/run_context.go | 10 +++++++-- act/runner/runner_test.go | 3 ++- .../push.yml | 21 +++++++++++++++++++ 5 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 act/runner/testdata/mysql-service-container-premature-terminate/push.yml diff --git a/act/container/container_types.go b/act/container/container_types.go index 4790d852..76b27255 100644 --- a/act/container/container_types.go +++ b/act/container/container_types.go @@ -7,6 +7,7 @@ import ( "code.forgejo.org/forgejo/runner/v9/act/common" "github.com/docker/go-connections/nat" + "github.com/pkg/errors" ) // NewContainerInput the input for the New function @@ -93,3 +94,5 @@ const ( HealthHealthy HealthUnHealthy ) + +var ErrContainerNotFound = errors.New("container not found") diff --git a/act/container/docker_run.go b/act/container/docker_run.go index 9ed50a69..6894654f 100644 --- a/act/container/docker_run.go +++ b/act/container/docker_run.go @@ -18,6 +18,7 @@ import ( "time" "github.com/Masterminds/semver" + cerrdefs "github.com/containerd/errdefs" "github.com/docker/cli/cli/compose/loader" "github.com/docker/cli/cli/connhelper" "github.com/docker/docker/api/types" @@ -194,6 +195,9 @@ func (cr *containerReference) Remove() common.Executor { func (cr *containerReference) GetHealth(ctx context.Context) (Health, error) { resp, err := cr.cli.ContainerInspect(ctx, cr.id) + if cerrdefs.IsNotFound(err) { + return HealthUnHealthy, ErrContainerNotFound + } logger := common.Logger(ctx) if err != nil { return HealthUnHealthy, err diff --git a/act/runner/run_context.go b/act/runner/run_context.go index 57c61ad6..380853d5 100644 --- a/act/runner/run_context.go +++ b/act/runner/run_context.go @@ -768,10 +768,16 @@ func (rc *RunContext) waitForServiceContainer(c container.ExecutionsEnvironment) return fmt.Errorf("service container %s: timed out while waiting for healthy or unhealthy status to be reported", c.GetName()) } else if errors.Is(err, context.Canceled) { return err + } else if errors.Is(err, container.ErrContainerNotFound) || (err == nil && health == container.HealthUnHealthy) { + // Container absent (terminated during health check) and unhealthy are difficult to consistently report + // differently from each other as, in docker, a terminated container will briefly appear unhealthy and + // then start reporting container not found; so, report both the same. Without any detection of the + // ErrContainerNotFound case we would just treat it as a transient failure and timeout, which would be a + // slower error mode that this is working to avoid. + return fmt.Errorf("service container %s: failed health check or terminated before becoming healthy", c.GetName()) } else if err != nil { + // assume transient error in the execution environment logger.Warnf("service container %s: error while checking for health state, will retry: %v", c.GetName(), err) - } else if health == container.HealthUnHealthy { - return fmt.Errorf("service container %s failed health check", c.GetName()) } else if health == container.HealthHealthy { return nil } diff --git a/act/runner/runner_test.go b/act/runner/runner_test.go index 2ae871b8..ac912b3a 100644 --- a/act/runner/runner_test.go +++ b/act/runner/runner_test.go @@ -323,7 +323,8 @@ func TestRunner_RunEvent(t *testing.T) { {workdir, "services", "push", "", platforms, secrets}, {workdir, "services-with-container", "push", "", platforms, secrets}, {workdir, "mysql-service-container-with-health-check", "push", "", platforms, secrets}, - {workdir, "mysql-service-container-failed-health-check", "push", "service container NAME failed health check", platforms, secrets}, + {workdir, "mysql-service-container-failed-health-check", "push", "service container NAME: failed health check or terminated before becoming healthy", platforms, secrets}, + {workdir, "mysql-service-container-premature-terminate", "push", "service container NAME: failed health check or terminated before becoming healthy", platforms, secrets}, } for _, table := range tables { diff --git a/act/runner/testdata/mysql-service-container-premature-terminate/push.yml b/act/runner/testdata/mysql-service-container-premature-terminate/push.yml new file mode 100644 index 00000000..d970dd76 --- /dev/null +++ b/act/runner/testdata/mysql-service-container-premature-terminate/push.yml @@ -0,0 +1,21 @@ +name: service-container +on: push +jobs: + service-container-test: + runs-on: ubuntu-latest + container: code.forgejo.org/oci/mysql:8.4 + services: + maindb: + image: code.forgejo.org/oci/mysql:8.4 + # This container should immediately exit due to missing env variable for poassword config. ... [ERROR] + # [Entrypoint]: Database is uninitialized and password option is not specified You need to specify one of the + # following as an environment variable: + # - MYSQL_ROOT_PASSWORD + # - MYSQL_ALLOW_EMPTY_PASSWORD + # - MYSQL_RANDOM_ROOT_PASSWORD + # + # This container should retain the same health check config as the mysql-service-container-with-health-check + # case. + options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3 + steps: + - run: exit 100 # should never be hit since service will never be healthy