1
0
Fork 0
mirror of https://code.forgejo.org/forgejo/runner.git synced 2025-08-11 17:50:58 +00:00

wait as long as required for health check configuration

timeout when context is cancelled, rather than fixed iteration count
This commit is contained in:
Mathieu Fenniak 2025-08-05 14:58:53 -06:00
parent d14092ea56
commit b46c03d75a
4 changed files with 66 additions and 20 deletions

View file

@ -3,6 +3,7 @@ package container
import (
"context"
"io"
"time"
"code.forgejo.org/forgejo/runner/v9/act/common"
"github.com/docker/go-connections/nat"
@ -63,7 +64,8 @@ type Container interface {
Remove() common.Executor
Close() common.Executor
ReplaceLogWriter(io.Writer, io.Writer) (io.Writer, io.Writer)
GetHealth(ctx context.Context) Health
GetHealth(ctx context.Context) (Health, error)
GetHealthCheckTimeout(ctx context.Context) (*time.Duration, error)
}
// NewDockerBuildExecutorInput the input for the NewDockerBuildExecutor function

View file

@ -15,6 +15,7 @@ import (
"runtime"
"strconv"
"strings"
"time"
"github.com/Masterminds/semver"
"github.com/docker/cli/cli/compose/loader"
@ -191,28 +192,52 @@ func (cr *containerReference) Remove() common.Executor {
).IfNot(common.Dryrun)
}
func (cr *containerReference) GetHealth(ctx context.Context) Health {
func (cr *containerReference) GetHealth(ctx context.Context) (Health, error) {
resp, err := cr.cli.ContainerInspect(ctx, cr.id)
logger := common.Logger(ctx)
if err != nil {
logger.Errorf("failed to query container health %s", err)
return HealthUnHealthy
return HealthUnHealthy, err
}
if resp.Config == nil || resp.Config.Healthcheck == nil || resp.State == nil || resp.State.Health == nil || len(resp.Config.Healthcheck.Test) == 1 && strings.EqualFold(resp.Config.Healthcheck.Test[0], "NONE") {
logger.Debugf("no container health check defined")
return HealthHealthy
return HealthHealthy, nil
}
logger.Infof("container health of %s (%s) is %s", cr.id, resp.Config.Image, resp.State.Health.Status)
switch resp.State.Health.Status {
case "starting":
return HealthStarting
return HealthStarting, nil
case "healthy":
return HealthHealthy
return HealthHealthy, nil
case "unhealthy":
return HealthUnHealthy
return HealthUnHealthy, nil
}
return HealthUnHealthy
return HealthUnHealthy, fmt.Errorf("unrecognized health state: %v", resp.State.Health.Status)
}
func (cr *containerReference) GetHealthCheckTimeout(ctx context.Context) (*time.Duration, error) {
resp, err := cr.cli.ContainerInspect(ctx, cr.id)
if err != nil {
return nil, err
}
if resp.Config == nil || resp.Config.Healthcheck == nil || len(resp.Config.Healthcheck.Test) == 1 && strings.EqualFold(resp.Config.Healthcheck.Test[0], "NONE") {
return nil, nil
}
retries := time.Duration(resp.Config.Healthcheck.Retries)
// Prefer using `--health-start-interval` option since we're using this timing for service container startup, but
// fallback to `--health-interval` if it isn't defined.
interval := resp.Config.Healthcheck.StartInterval
if interval == 0 {
interval = resp.Config.Healthcheck.Interval
}
// Docker will run one health check, with a maximum cmd time of Timeout, every StartInterval, up to the number of
// Retries, after an initial pause of StartPeriod. Therefore the pessimistic time that we would wait is...
maxWait := resp.Config.Healthcheck.StartPeriod +
(retries * resp.Config.Healthcheck.Timeout) +
(retries * interval)
return &maxWait, nil
}
func (cr *containerReference) ReplaceLogWriter(stdout, stderr io.Writer) (io.Writer, io.Writer) {

View file

@ -493,8 +493,12 @@ func (e *HostEnvironment) GetRunnerContext(_ context.Context) map[string]interfa
}
}
func (e *HostEnvironment) GetHealth(ctx context.Context) Health {
return HealthHealthy
func (e *HostEnvironment) GetHealth(ctx context.Context) (Health, error) {
return HealthHealthy, nil
}
func (e *HostEnvironment) GetHealthCheckTimeout(ctx context.Context) (*time.Duration, error) {
return nil, nil
}
func (e *HostEnvironment) ReplaceLogWriter(stdout, _ io.Writer) (io.Writer, io.Writer) {

View file

@ -746,15 +746,34 @@ func (rc *RunContext) startServiceContainers(_ string) common.Executor {
}
func (rc *RunContext) waitForServiceContainer(c container.ExecutionsEnvironment) common.Executor {
// FIXME: GetName() is definitely 'wrong' because it just returns "NAME". :-p
return func(ctx context.Context) error {
sctx, cancel := context.WithTimeout(ctx, time.Minute*5)
logger := common.Logger(ctx)
timeout, err := c.GetHealthCheckTimeout(ctx)
if err != nil {
return fmt.Errorf("service container %s could not detect health check timeout due to error, no health check wait will occur: %v", c.GetName(), err)
} else if timeout == nil {
logger.Debugf("service container %s had no health check", c.GetName())
return nil
}
ctx, cancel := context.WithTimeout(ctx, *timeout)
defer cancel()
var health container.Health
delay := time.Second
for i := 0; ; i++ {
health = c.GetHealth(sctx)
if health != container.HealthStarting || i > 30 {
break
health, err := c.GetHealth(ctx)
if errors.Is(err, context.DeadlineExceeded) {
return fmt.Errorf("service container %s: timed out while waiting for healthy or unhealthy status to be reported", c.GetName())
} else if errors.Is(err, context.Canceled) {
return err
} else if err != nil {
logger.Warnf("service container %s: error while checking for health state, will retry: %v", c.GetName(), err)
} else if health == container.HealthUnHealthy {
return fmt.Errorf("service container %s failed health check", c.GetName())
} else if health == container.HealthHealthy {
return nil
}
time.Sleep(delay)
delay *= 2
@ -762,10 +781,6 @@ func (rc *RunContext) waitForServiceContainer(c container.ExecutionsEnvironment)
delay = 10 * time.Second
}
}
if health == container.HealthHealthy {
return nil
}
return fmt.Errorf("service container failed to start")
}
}