From 36ca627f2e0a415a7d47170830d9d8e14034f1e3 Mon Sep 17 00:00:00 2001 From: Earl Warren Date: Thu, 4 Sep 2025 16:55:26 +0200 Subject: [PATCH] feat: cache: fatal() helper to gracefully terminate the runner in case of an error that is not recoverable (e.g. failing to open the bolthold database), the cache can call fatal() to log the error and send a TERM signal that will gracefully shutdown the daemon. --- act/artifactcache/handler.go | 8 ++++++ act/artifactcache/handler_test.go | 43 +++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/act/artifactcache/handler.go b/act/artifactcache/handler.go index ac7355d0..429b661c 100644 --- a/act/artifactcache/handler.go +++ b/act/artifactcache/handler.go @@ -13,6 +13,7 @@ import ( "strconv" "strings" "sync/atomic" + "syscall" "time" "github.com/julienschmidt/httprouter" @@ -27,6 +28,13 @@ const ( urlBase = "/_apis/artifactcache" ) +var fatal = func(logger logrus.FieldLogger, err error) { + logger.Errorf("unrecoverable error in the cache: %v", err) + if err := syscall.Kill(syscall.Getpid(), syscall.SIGTERM); err != nil { + logger.Errorf("unrecoverable error in the cache: failed to send the TERM signal to shutdown the daemon %v", err) + } +} + type Handler interface { ExternalURL() string Close() error diff --git a/act/artifactcache/handler_test.go b/act/artifactcache/handler_test.go index 8fc317b8..5970ea1a 100644 --- a/act/artifactcache/handler_test.go +++ b/act/artifactcache/handler_test.go @@ -4,11 +4,15 @@ import ( "bytes" "crypto/rand" "encoding/json" + "errors" "fmt" "io" "net/http" + "os" + "os/signal" "path/filepath" "strings" + "syscall" "testing" "time" @@ -1032,3 +1036,42 @@ func TestHandler_ExternalURL(t *testing.T) { assert.True(t, handler.isClosed()) }) } + +var ( + settleTime = 100 * time.Millisecond + fatalWaitingTime = 30 * time.Second +) + +func waitSig(t *testing.T, c <-chan os.Signal, sig os.Signal) { + t.Helper() + + // Sleep multiple times to give the kernel more tries to + // deliver the signal. + start := time.Now() + timer := time.NewTimer(settleTime / 10) + defer timer.Stop() + for time.Since(start) < fatalWaitingTime { + select { + case s := <-c: + if s == sig { + return + } + t.Fatalf("signal was %v, want %v", s, sig) + case <-timer.C: + timer.Reset(settleTime / 10) + } + } + t.Fatalf("timeout after %v waiting for %v", fatalWaitingTime, sig) +} + +func TestHandler_fatal(t *testing.T) { + c := make(chan os.Signal, 1) + signal.Notify(c, syscall.SIGTERM) + defer signal.Stop(c) + + discard := logrus.New() + discard.Out = io.Discard + fatal(discard, errors.New("fatal error")) + + waitSig(t, c, syscall.SIGTERM) +}