mirror of
https://code.forgejo.org/forgejo/runner.git
synced 2025-08-11 17:50:58 +00:00
wait as long as required for health check configuration
timeout when context is cancelled, rather than fixed iteration count
This commit is contained in:
parent
d14092ea56
commit
b46c03d75a
4 changed files with 66 additions and 20 deletions
|
@ -3,6 +3,7 @@ package container
|
|||
import (
|
||||
"context"
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"code.forgejo.org/forgejo/runner/v9/act/common"
|
||||
"github.com/docker/go-connections/nat"
|
||||
|
@ -63,7 +64,8 @@ type Container interface {
|
|||
Remove() common.Executor
|
||||
Close() common.Executor
|
||||
ReplaceLogWriter(io.Writer, io.Writer) (io.Writer, io.Writer)
|
||||
GetHealth(ctx context.Context) Health
|
||||
GetHealth(ctx context.Context) (Health, error)
|
||||
GetHealthCheckTimeout(ctx context.Context) (*time.Duration, error)
|
||||
}
|
||||
|
||||
// NewDockerBuildExecutorInput the input for the NewDockerBuildExecutor function
|
||||
|
|
|
@ -15,6 +15,7 @@ import (
|
|||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/Masterminds/semver"
|
||||
"github.com/docker/cli/cli/compose/loader"
|
||||
|
@ -191,28 +192,52 @@ func (cr *containerReference) Remove() common.Executor {
|
|||
).IfNot(common.Dryrun)
|
||||
}
|
||||
|
||||
func (cr *containerReference) GetHealth(ctx context.Context) Health {
|
||||
func (cr *containerReference) GetHealth(ctx context.Context) (Health, error) {
|
||||
resp, err := cr.cli.ContainerInspect(ctx, cr.id)
|
||||
logger := common.Logger(ctx)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to query container health %s", err)
|
||||
return HealthUnHealthy
|
||||
return HealthUnHealthy, err
|
||||
}
|
||||
if resp.Config == nil || resp.Config.Healthcheck == nil || resp.State == nil || resp.State.Health == nil || len(resp.Config.Healthcheck.Test) == 1 && strings.EqualFold(resp.Config.Healthcheck.Test[0], "NONE") {
|
||||
logger.Debugf("no container health check defined")
|
||||
return HealthHealthy
|
||||
return HealthHealthy, nil
|
||||
}
|
||||
|
||||
logger.Infof("container health of %s (%s) is %s", cr.id, resp.Config.Image, resp.State.Health.Status)
|
||||
switch resp.State.Health.Status {
|
||||
case "starting":
|
||||
return HealthStarting
|
||||
return HealthStarting, nil
|
||||
case "healthy":
|
||||
return HealthHealthy
|
||||
return HealthHealthy, nil
|
||||
case "unhealthy":
|
||||
return HealthUnHealthy
|
||||
return HealthUnHealthy, nil
|
||||
}
|
||||
return HealthUnHealthy
|
||||
return HealthUnHealthy, fmt.Errorf("unrecognized health state: %v", resp.State.Health.Status)
|
||||
}
|
||||
|
||||
func (cr *containerReference) GetHealthCheckTimeout(ctx context.Context) (*time.Duration, error) {
|
||||
resp, err := cr.cli.ContainerInspect(ctx, cr.id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if resp.Config == nil || resp.Config.Healthcheck == nil || len(resp.Config.Healthcheck.Test) == 1 && strings.EqualFold(resp.Config.Healthcheck.Test[0], "NONE") {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
retries := time.Duration(resp.Config.Healthcheck.Retries)
|
||||
// Prefer using `--health-start-interval` option since we're using this timing for service container startup, but
|
||||
// fallback to `--health-interval` if it isn't defined.
|
||||
interval := resp.Config.Healthcheck.StartInterval
|
||||
if interval == 0 {
|
||||
interval = resp.Config.Healthcheck.Interval
|
||||
}
|
||||
|
||||
// Docker will run one health check, with a maximum cmd time of Timeout, every StartInterval, up to the number of
|
||||
// Retries, after an initial pause of StartPeriod. Therefore the pessimistic time that we would wait is...
|
||||
maxWait := resp.Config.Healthcheck.StartPeriod +
|
||||
(retries * resp.Config.Healthcheck.Timeout) +
|
||||
(retries * interval)
|
||||
return &maxWait, nil
|
||||
}
|
||||
|
||||
func (cr *containerReference) ReplaceLogWriter(stdout, stderr io.Writer) (io.Writer, io.Writer) {
|
||||
|
|
|
@ -493,8 +493,12 @@ func (e *HostEnvironment) GetRunnerContext(_ context.Context) map[string]interfa
|
|||
}
|
||||
}
|
||||
|
||||
func (e *HostEnvironment) GetHealth(ctx context.Context) Health {
|
||||
return HealthHealthy
|
||||
func (e *HostEnvironment) GetHealth(ctx context.Context) (Health, error) {
|
||||
return HealthHealthy, nil
|
||||
}
|
||||
|
||||
func (e *HostEnvironment) GetHealthCheckTimeout(ctx context.Context) (*time.Duration, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (e *HostEnvironment) ReplaceLogWriter(stdout, _ io.Writer) (io.Writer, io.Writer) {
|
||||
|
|
|
@ -746,15 +746,34 @@ func (rc *RunContext) startServiceContainers(_ string) common.Executor {
|
|||
}
|
||||
|
||||
func (rc *RunContext) waitForServiceContainer(c container.ExecutionsEnvironment) common.Executor {
|
||||
// FIXME: GetName() is definitely 'wrong' because it just returns "NAME". :-p
|
||||
|
||||
return func(ctx context.Context) error {
|
||||
sctx, cancel := context.WithTimeout(ctx, time.Minute*5)
|
||||
logger := common.Logger(ctx)
|
||||
timeout, err := c.GetHealthCheckTimeout(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("service container %s could not detect health check timeout due to error, no health check wait will occur: %v", c.GetName(), err)
|
||||
} else if timeout == nil {
|
||||
logger.Debugf("service container %s had no health check", c.GetName())
|
||||
return nil
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, *timeout)
|
||||
defer cancel()
|
||||
var health container.Health
|
||||
|
||||
delay := time.Second
|
||||
for i := 0; ; i++ {
|
||||
health = c.GetHealth(sctx)
|
||||
if health != container.HealthStarting || i > 30 {
|
||||
break
|
||||
health, err := c.GetHealth(ctx)
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
return fmt.Errorf("service container %s: timed out while waiting for healthy or unhealthy status to be reported", c.GetName())
|
||||
} else if errors.Is(err, context.Canceled) {
|
||||
return err
|
||||
} else if err != nil {
|
||||
logger.Warnf("service container %s: error while checking for health state, will retry: %v", c.GetName(), err)
|
||||
} else if health == container.HealthUnHealthy {
|
||||
return fmt.Errorf("service container %s failed health check", c.GetName())
|
||||
} else if health == container.HealthHealthy {
|
||||
return nil
|
||||
}
|
||||
time.Sleep(delay)
|
||||
delay *= 2
|
||||
|
@ -762,10 +781,6 @@ func (rc *RunContext) waitForServiceContainer(c container.ExecutionsEnvironment)
|
|||
delay = 10 * time.Second
|
||||
}
|
||||
}
|
||||
if health == container.HealthHealthy {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("service container failed to start")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue