mirror of
https://code.forgejo.org/forgejo/runner.git
synced 2025-08-11 17:50:58 +00:00
wait as long as required for health check configuration
timeout when context is cancelled, rather than fixed iteration count
This commit is contained in:
parent
d14092ea56
commit
b46c03d75a
4 changed files with 66 additions and 20 deletions
|
@ -3,6 +3,7 @@ package container
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"io"
|
"io"
|
||||||
|
"time"
|
||||||
|
|
||||||
"code.forgejo.org/forgejo/runner/v9/act/common"
|
"code.forgejo.org/forgejo/runner/v9/act/common"
|
||||||
"github.com/docker/go-connections/nat"
|
"github.com/docker/go-connections/nat"
|
||||||
|
@ -63,7 +64,8 @@ type Container interface {
|
||||||
Remove() common.Executor
|
Remove() common.Executor
|
||||||
Close() common.Executor
|
Close() common.Executor
|
||||||
ReplaceLogWriter(io.Writer, io.Writer) (io.Writer, io.Writer)
|
ReplaceLogWriter(io.Writer, io.Writer) (io.Writer, io.Writer)
|
||||||
GetHealth(ctx context.Context) Health
|
GetHealth(ctx context.Context) (Health, error)
|
||||||
|
GetHealthCheckTimeout(ctx context.Context) (*time.Duration, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewDockerBuildExecutorInput the input for the NewDockerBuildExecutor function
|
// NewDockerBuildExecutorInput the input for the NewDockerBuildExecutor function
|
||||||
|
|
|
@ -15,6 +15,7 @@ import (
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/Masterminds/semver"
|
"github.com/Masterminds/semver"
|
||||||
"github.com/docker/cli/cli/compose/loader"
|
"github.com/docker/cli/cli/compose/loader"
|
||||||
|
@ -191,28 +192,52 @@ func (cr *containerReference) Remove() common.Executor {
|
||||||
).IfNot(common.Dryrun)
|
).IfNot(common.Dryrun)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cr *containerReference) GetHealth(ctx context.Context) Health {
|
func (cr *containerReference) GetHealth(ctx context.Context) (Health, error) {
|
||||||
resp, err := cr.cli.ContainerInspect(ctx, cr.id)
|
resp, err := cr.cli.ContainerInspect(ctx, cr.id)
|
||||||
logger := common.Logger(ctx)
|
logger := common.Logger(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Errorf("failed to query container health %s", err)
|
return HealthUnHealthy, err
|
||||||
return HealthUnHealthy
|
|
||||||
}
|
}
|
||||||
if resp.Config == nil || resp.Config.Healthcheck == nil || resp.State == nil || resp.State.Health == nil || len(resp.Config.Healthcheck.Test) == 1 && strings.EqualFold(resp.Config.Healthcheck.Test[0], "NONE") {
|
if resp.Config == nil || resp.Config.Healthcheck == nil || resp.State == nil || resp.State.Health == nil || len(resp.Config.Healthcheck.Test) == 1 && strings.EqualFold(resp.Config.Healthcheck.Test[0], "NONE") {
|
||||||
logger.Debugf("no container health check defined")
|
logger.Debugf("no container health check defined")
|
||||||
return HealthHealthy
|
return HealthHealthy, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.Infof("container health of %s (%s) is %s", cr.id, resp.Config.Image, resp.State.Health.Status)
|
logger.Infof("container health of %s (%s) is %s", cr.id, resp.Config.Image, resp.State.Health.Status)
|
||||||
switch resp.State.Health.Status {
|
switch resp.State.Health.Status {
|
||||||
case "starting":
|
case "starting":
|
||||||
return HealthStarting
|
return HealthStarting, nil
|
||||||
case "healthy":
|
case "healthy":
|
||||||
return HealthHealthy
|
return HealthHealthy, nil
|
||||||
case "unhealthy":
|
case "unhealthy":
|
||||||
return HealthUnHealthy
|
return HealthUnHealthy, nil
|
||||||
}
|
}
|
||||||
return HealthUnHealthy
|
return HealthUnHealthy, fmt.Errorf("unrecognized health state: %v", resp.State.Health.Status)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cr *containerReference) GetHealthCheckTimeout(ctx context.Context) (*time.Duration, error) {
|
||||||
|
resp, err := cr.cli.ContainerInspect(ctx, cr.id)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if resp.Config == nil || resp.Config.Healthcheck == nil || len(resp.Config.Healthcheck.Test) == 1 && strings.EqualFold(resp.Config.Healthcheck.Test[0], "NONE") {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
retries := time.Duration(resp.Config.Healthcheck.Retries)
|
||||||
|
// Prefer using `--health-start-interval` option since we're using this timing for service container startup, but
|
||||||
|
// fallback to `--health-interval` if it isn't defined.
|
||||||
|
interval := resp.Config.Healthcheck.StartInterval
|
||||||
|
if interval == 0 {
|
||||||
|
interval = resp.Config.Healthcheck.Interval
|
||||||
|
}
|
||||||
|
|
||||||
|
// Docker will run one health check, with a maximum cmd time of Timeout, every StartInterval, up to the number of
|
||||||
|
// Retries, after an initial pause of StartPeriod. Therefore the pessimistic time that we would wait is...
|
||||||
|
maxWait := resp.Config.Healthcheck.StartPeriod +
|
||||||
|
(retries * resp.Config.Healthcheck.Timeout) +
|
||||||
|
(retries * interval)
|
||||||
|
return &maxWait, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cr *containerReference) ReplaceLogWriter(stdout, stderr io.Writer) (io.Writer, io.Writer) {
|
func (cr *containerReference) ReplaceLogWriter(stdout, stderr io.Writer) (io.Writer, io.Writer) {
|
||||||
|
|
|
@ -493,8 +493,12 @@ func (e *HostEnvironment) GetRunnerContext(_ context.Context) map[string]interfa
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *HostEnvironment) GetHealth(ctx context.Context) Health {
|
func (e *HostEnvironment) GetHealth(ctx context.Context) (Health, error) {
|
||||||
return HealthHealthy
|
return HealthHealthy, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *HostEnvironment) GetHealthCheckTimeout(ctx context.Context) (*time.Duration, error) {
|
||||||
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *HostEnvironment) ReplaceLogWriter(stdout, _ io.Writer) (io.Writer, io.Writer) {
|
func (e *HostEnvironment) ReplaceLogWriter(stdout, _ io.Writer) (io.Writer, io.Writer) {
|
||||||
|
|
|
@ -746,15 +746,34 @@ func (rc *RunContext) startServiceContainers(_ string) common.Executor {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rc *RunContext) waitForServiceContainer(c container.ExecutionsEnvironment) common.Executor {
|
func (rc *RunContext) waitForServiceContainer(c container.ExecutionsEnvironment) common.Executor {
|
||||||
|
// FIXME: GetName() is definitely 'wrong' because it just returns "NAME". :-p
|
||||||
|
|
||||||
return func(ctx context.Context) error {
|
return func(ctx context.Context) error {
|
||||||
sctx, cancel := context.WithTimeout(ctx, time.Minute*5)
|
logger := common.Logger(ctx)
|
||||||
|
timeout, err := c.GetHealthCheckTimeout(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("service container %s could not detect health check timeout due to error, no health check wait will occur: %v", c.GetName(), err)
|
||||||
|
} else if timeout == nil {
|
||||||
|
logger.Debugf("service container %s had no health check", c.GetName())
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, *timeout)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
var health container.Health
|
|
||||||
delay := time.Second
|
delay := time.Second
|
||||||
for i := 0; ; i++ {
|
for i := 0; ; i++ {
|
||||||
health = c.GetHealth(sctx)
|
health, err := c.GetHealth(ctx)
|
||||||
if health != container.HealthStarting || i > 30 {
|
if errors.Is(err, context.DeadlineExceeded) {
|
||||||
break
|
return fmt.Errorf("service container %s: timed out while waiting for healthy or unhealthy status to be reported", c.GetName())
|
||||||
|
} else if errors.Is(err, context.Canceled) {
|
||||||
|
return err
|
||||||
|
} else if err != nil {
|
||||||
|
logger.Warnf("service container %s: error while checking for health state, will retry: %v", c.GetName(), err)
|
||||||
|
} else if health == container.HealthUnHealthy {
|
||||||
|
return fmt.Errorf("service container %s failed health check", c.GetName())
|
||||||
|
} else if health == container.HealthHealthy {
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
time.Sleep(delay)
|
time.Sleep(delay)
|
||||||
delay *= 2
|
delay *= 2
|
||||||
|
@ -762,10 +781,6 @@ func (rc *RunContext) waitForServiceContainer(c container.ExecutionsEnvironment)
|
||||||
delay = 10 * time.Second
|
delay = 10 * time.Second
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if health == container.HealthHealthy {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return fmt.Errorf("service container failed to start")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue