2023-04-28 23:57:40 +08:00
package artifactcache
import (
"encoding/json"
"errors"
"fmt"
"io"
"net"
"net/http"
"os"
"path/filepath"
2024-01-20 14:11:50 +02:00
"regexp"
2023-04-28 23:57:40 +08:00
"strconv"
"strings"
"sync/atomic"
"time"
"github.com/julienschmidt/httprouter"
"github.com/sirupsen/logrus"
"github.com/timshannon/bolthold"
"go.etcd.io/bbolt"
2025-07-31 10:35:11 +00:00
"code.forgejo.org/forgejo/runner/v9/act/common"
2023-04-28 23:57:40 +08:00
)
const (
urlBase = "/_apis/artifactcache"
)
2025-09-04 14:38:50 +00:00
type Handler interface {
ExternalURL ( ) string
Close ( ) error
isClosed ( ) bool
openDB ( ) ( * bolthold . Store , error )
find ( w http . ResponseWriter , r * http . Request , params httprouter . Params )
reserve ( w http . ResponseWriter , r * http . Request , params httprouter . Params )
upload ( w http . ResponseWriter , r * http . Request , params httprouter . Params )
commit ( w http . ResponseWriter , r * http . Request , params httprouter . Params )
get ( w http . ResponseWriter , r * http . Request , params httprouter . Params )
clean ( w http . ResponseWriter , r * http . Request , _ httprouter . Params )
middleware ( handler httprouter . Handle ) httprouter . Handle
readCache ( id uint64 ) ( * Cache , error )
useCache ( id uint64 ) error
setgcAt ( at time . Time )
gcCache ( )
responseJSON ( w http . ResponseWriter , r * http . Request , code int , v ... any )
}
type handler struct {
2023-07-10 18:57:06 +02:00
dir string
2023-04-28 23:57:40 +08:00
storage * Storage
router * httprouter . Router
listener net . Listener
server * http . Server
logger logrus . FieldLogger
2024-11-21 22:49:12 +01:00
secret string
2023-04-28 23:57:40 +08:00
2024-03-29 00:42:02 +08:00
gcing atomic . Bool
2023-04-28 23:57:40 +08:00
gcAt time . Time
outboundIP string
}
2025-09-04 14:38:50 +00:00
func StartHandler ( dir , outboundIP string , port uint16 , secret string , logger logrus . FieldLogger ) ( Handler , error ) {
h := & handler {
2024-11-21 22:49:12 +01:00
secret : secret ,
}
2023-04-28 23:57:40 +08:00
if logger == nil {
discard := logrus . New ( )
discard . Out = io . Discard
logger = discard
}
logger = logger . WithField ( "module" , "artifactcache" )
h . logger = logger
if dir == "" {
home , err := os . UserHomeDir ( )
if err != nil {
return nil , err
}
dir = filepath . Join ( home , ".cache" , "actcache" )
}
if err := os . MkdirAll ( dir , 0 o755 ) ; err != nil {
return nil , err
}
2023-07-10 18:57:06 +02:00
h . dir = dir
2023-04-28 23:57:40 +08:00
storage , err := NewStorage ( filepath . Join ( dir , "cache" ) )
if err != nil {
return nil , err
}
h . storage = storage
if outboundIP != "" {
h . outboundIP = outboundIP
} else if ip := common . GetOutboundIP ( ) ; ip == nil {
return nil , fmt . Errorf ( "unable to determine outbound IP address" )
} else {
h . outboundIP = ip . String ( )
}
router := httprouter . New ( )
2024-12-07 17:48:07 +01:00
router . GET ( urlBase + "/cache" , h . middleware ( h . find ) )
router . POST ( urlBase + "/caches" , h . middleware ( h . reserve ) )
router . PATCH ( urlBase + "/caches/:id" , h . middleware ( h . upload ) )
router . POST ( urlBase + "/caches/:id" , h . middleware ( h . commit ) )
router . GET ( urlBase + "/artifacts/:id" , h . middleware ( h . get ) )
router . POST ( urlBase + "/clean" , h . middleware ( h . clean ) )
2023-04-28 23:57:40 +08:00
h . router = router
h . gcCache ( )
listener , err := net . Listen ( "tcp" , fmt . Sprintf ( ":%d" , port ) ) // listen on all interfaces
if err != nil {
return nil , err
}
server := & http . Server {
ReadHeaderTimeout : 2 * time . Second ,
Handler : router ,
}
go func ( ) {
if err := server . Serve ( listener ) ; err != nil && errors . Is ( err , net . ErrClosed ) {
logger . Errorf ( "http serve: %v" , err )
}
} ( )
h . listener = listener
h . server = server
return h , nil
}
2025-09-04 14:38:50 +00:00
func ( h * handler ) ExternalURL ( ) string {
2025-05-25 19:16:18 +02:00
port := strconv . Itoa ( h . listener . Addr ( ) . ( * net . TCPAddr ) . Port )
2023-04-28 23:57:40 +08:00
// TODO: make the external url configurable if necessary
2025-05-25 19:16:18 +02:00
return fmt . Sprintf ( "http://%s" , net . JoinHostPort ( h . outboundIP , port ) )
2023-04-28 23:57:40 +08:00
}
2025-09-04 14:38:50 +00:00
func ( h * handler ) Close ( ) error {
2023-04-28 23:57:40 +08:00
if h == nil {
return nil
}
var retErr error
if h . server != nil {
err := h . server . Close ( )
if err != nil {
retErr = err
}
h . server = nil
}
if h . listener != nil {
err := h . listener . Close ( )
if errors . Is ( err , net . ErrClosed ) {
err = nil
}
if err != nil {
retErr = err
}
h . listener = nil
}
return retErr
}
2025-09-04 14:38:50 +00:00
func ( h * handler ) isClosed ( ) bool {
return h . listener == nil && h . server == nil
}
func ( h * handler ) openDB ( ) ( * bolthold . Store , error ) {
2023-07-10 18:57:06 +02:00
return bolthold . Open ( filepath . Join ( h . dir , "bolt.db" ) , 0 o644 , & bolthold . Options {
Encoder : json . Marshal ,
Decoder : json . Unmarshal ,
Options : & bbolt . Options {
Timeout : 5 * time . Second ,
NoGrowSync : bbolt . DefaultOptions . NoGrowSync ,
FreelistType : bbolt . DefaultOptions . FreelistType ,
} ,
} )
}
2023-04-28 23:57:40 +08:00
// GET /_apis/artifactcache/cache
2025-09-04 14:38:50 +00:00
func ( h * handler ) find ( w http . ResponseWriter , r * http . Request , params httprouter . Params ) {
2024-12-07 17:48:07 +01:00
rundata := runDataFromHeaders ( r )
repo , err := h . validateMac ( rundata )
2024-11-21 22:49:12 +01:00
if err != nil {
2025-01-13 16:59:07 +01:00
h . responseJSON ( w , r , 403 , err )
2024-11-21 22:49:12 +01:00
return
}
2023-04-28 23:57:40 +08:00
keys := strings . Split ( r . URL . Query ( ) . Get ( "keys" ) , "," )
// cache keys are case insensitive
for i , key := range keys {
keys [ i ] = strings . ToLower ( key )
}
version := r . URL . Query ( ) . Get ( "version" )
2023-07-10 18:57:06 +02:00
db , err := h . openDB ( )
if err != nil {
h . responseJSON ( w , r , 500 , err )
return
}
defer db . Close ( )
2025-08-15 20:26:35 -06:00
cache , err := findCache ( db , repo , keys , version , rundata . WriteIsolationKey )
2023-04-28 23:57:40 +08:00
if err != nil {
h . responseJSON ( w , r , 500 , err )
return
}
2025-08-15 20:26:35 -06:00
// If read was scoped to WriteIsolationKey and didn't find anything, we can fallback to the non-isolated cache read
if cache == nil && rundata . WriteIsolationKey != "" {
cache , err = findCache ( db , repo , keys , version , "" )
if err != nil {
h . responseJSON ( w , r , 500 , err )
return
}
}
2023-04-28 23:57:40 +08:00
if cache == nil {
h . responseJSON ( w , r , 204 )
return
}
if ok , err := h . storage . Exist ( cache . ID ) ; err != nil {
h . responseJSON ( w , r , 500 , err )
return
} else if ! ok {
2023-07-10 18:57:06 +02:00
_ = db . Delete ( cache . ID , cache )
2023-04-28 23:57:40 +08:00
h . responseJSON ( w , r , 204 )
return
}
2025-01-13 16:50:45 +01:00
archiveLocation := fmt . Sprintf ( "%s/%s%s/artifacts/%d" , r . Header . Get ( "Forgejo-Cache-Host" ) , r . Header . Get ( "Forgejo-Cache-RunId" ) , urlBase , cache . ID )
2023-04-28 23:57:40 +08:00
h . responseJSON ( w , r , 200 , map [ string ] any {
"result" : "hit" ,
2025-01-13 16:50:45 +01:00
"archiveLocation" : archiveLocation ,
2023-04-28 23:57:40 +08:00
"cacheKey" : cache . Key ,
} )
}
// POST /_apis/artifactcache/caches
2025-09-04 14:38:50 +00:00
func ( h * handler ) reserve ( w http . ResponseWriter , r * http . Request , params httprouter . Params ) {
2024-12-07 17:48:07 +01:00
rundata := runDataFromHeaders ( r )
repo , err := h . validateMac ( rundata )
2024-11-21 22:49:12 +01:00
if err != nil {
2025-01-13 16:59:07 +01:00
h . responseJSON ( w , r , 403 , err )
2024-11-21 22:49:12 +01:00
return
}
2023-04-28 23:57:40 +08:00
api := & Request { }
if err := json . NewDecoder ( r . Body ) . Decode ( api ) ; err != nil {
h . responseJSON ( w , r , 400 , err )
return
}
// cache keys are case insensitive
api . Key = strings . ToLower ( api . Key )
cache := api . ToCache ( )
2023-07-10 18:57:06 +02:00
db , err := h . openDB ( )
if err != nil {
h . responseJSON ( w , r , 500 , err )
return
}
defer db . Close ( )
2023-04-28 23:57:40 +08:00
now := time . Now ( ) . Unix ( )
cache . CreatedAt = now
cache . UsedAt = now
2024-11-21 22:49:12 +01:00
cache . Repo = repo
2025-08-15 20:26:35 -06:00
cache . WriteIsolationKey = rundata . WriteIsolationKey
2024-03-29 00:42:02 +08:00
if err := insertCache ( db , cache ) ; err != nil {
2023-04-28 23:57:40 +08:00
h . responseJSON ( w , r , 500 , err )
return
}
h . responseJSON ( w , r , 200 , map [ string ] any {
"cacheId" : cache . ID ,
} )
}
// PATCH /_apis/artifactcache/caches/:id
2025-09-04 14:38:50 +00:00
func ( h * handler ) upload ( w http . ResponseWriter , r * http . Request , params httprouter . Params ) {
2024-12-07 17:48:07 +01:00
rundata := runDataFromHeaders ( r )
repo , err := h . validateMac ( rundata )
2024-11-21 22:49:12 +01:00
if err != nil {
2025-01-13 16:59:07 +01:00
h . responseJSON ( w , r , 403 , err )
2024-11-21 22:49:12 +01:00
return
}
2024-11-22 01:01:12 +01:00
id , err := strconv . ParseUint ( params . ByName ( "id" ) , 10 , 64 )
2023-04-28 23:57:40 +08:00
if err != nil {
h . responseJSON ( w , r , 400 , err )
return
}
2025-03-24 10:48:28 +01:00
cache , err := h . readCache ( id )
2023-07-10 18:57:06 +02:00
if err != nil {
2023-04-28 23:57:40 +08:00
if errors . Is ( err , bolthold . ErrNotFound ) {
2025-03-24 10:48:28 +01:00
h . responseJSON ( w , r , 404 , fmt . Errorf ( "cache %d: not reserved" , id ) )
2023-04-28 23:57:40 +08:00
return
}
2025-03-23 22:31:16 +01:00
h . responseJSON ( w , r , 500 , fmt . Errorf ( "cache Get: %w" , err ) )
2023-04-28 23:57:40 +08:00
return
}
2024-11-21 22:49:12 +01:00
// Should not happen
if cache . Repo != repo {
2025-03-23 22:31:16 +01:00
h . responseJSON ( w , r , 500 , fmt . Errorf ( "cache repo is not valid" ) )
2024-11-21 22:49:12 +01:00
return
}
2025-08-15 20:26:35 -06:00
if cache . WriteIsolationKey != rundata . WriteIsolationKey {
h . responseJSON ( w , r , 403 , fmt . Errorf ( "cache authorized for write isolation %q, but attempting to operate on %q" , rundata . WriteIsolationKey , cache . WriteIsolationKey ) )
return
}
2024-11-21 22:49:12 +01:00
2023-04-28 23:57:40 +08:00
if cache . Complete {
h . responseJSON ( w , r , 400 , fmt . Errorf ( "cache %v %q: already complete" , cache . ID , cache . Key ) )
return
}
start , _ , err := parseContentRange ( r . Header . Get ( "Content-Range" ) )
if err != nil {
2025-03-23 22:31:16 +01:00
h . responseJSON ( w , r , 400 , fmt . Errorf ( "cache parseContentRange(%s): %w" , r . Header . Get ( "Content-Range" ) , err ) )
2023-04-28 23:57:40 +08:00
return
}
if err := h . storage . Write ( cache . ID , start , r . Body ) ; err != nil {
2025-03-23 22:31:16 +01:00
h . responseJSON ( w , r , 500 , fmt . Errorf ( "cache storage.Write: %w" , err ) )
return
}
if err := h . useCache ( id ) ; err != nil {
h . responseJSON ( w , r , 500 , fmt . Errorf ( "cache useCache: %w" , err ) )
return
2023-04-28 23:57:40 +08:00
}
h . responseJSON ( w , r , 200 )
}
// POST /_apis/artifactcache/caches/:id
2025-09-04 14:38:50 +00:00
func ( h * handler ) commit ( w http . ResponseWriter , r * http . Request , params httprouter . Params ) {
2024-12-07 17:48:07 +01:00
rundata := runDataFromHeaders ( r )
repo , err := h . validateMac ( rundata )
2024-11-21 22:49:12 +01:00
if err != nil {
2025-01-13 16:59:07 +01:00
h . responseJSON ( w , r , 403 , err )
2024-11-21 22:49:12 +01:00
return
}
2024-11-22 01:01:12 +01:00
id , err := strconv . ParseUint ( params . ByName ( "id" ) , 10 , 64 )
2023-04-28 23:57:40 +08:00
if err != nil {
h . responseJSON ( w , r , 400 , err )
return
}
2025-03-24 10:48:28 +01:00
cache , err := h . readCache ( id )
2023-07-10 18:57:06 +02:00
if err != nil {
2023-04-28 23:57:40 +08:00
if errors . Is ( err , bolthold . ErrNotFound ) {
2025-03-24 10:48:28 +01:00
h . responseJSON ( w , r , 404 , fmt . Errorf ( "cache %d: not reserved" , id ) )
2023-04-28 23:57:40 +08:00
return
}
2025-03-24 10:48:28 +01:00
h . responseJSON ( w , r , 500 , fmt . Errorf ( "cache Get: %w" , err ) )
2023-04-28 23:57:40 +08:00
return
}
2024-11-21 22:49:12 +01:00
// Should not happen
if cache . Repo != repo {
2025-03-23 22:31:16 +01:00
h . responseJSON ( w , r , 500 , fmt . Errorf ( "cache repo is not valid" ) )
2024-11-21 22:49:12 +01:00
return
}
2025-08-15 20:26:35 -06:00
if cache . WriteIsolationKey != rundata . WriteIsolationKey {
h . responseJSON ( w , r , 403 , fmt . Errorf ( "cache authorized for write isolation %q, but attempting to operate on %q" , rundata . WriteIsolationKey , cache . WriteIsolationKey ) )
return
}
2024-11-21 22:49:12 +01:00
2023-04-28 23:57:40 +08:00
if cache . Complete {
h . responseJSON ( w , r , 400 , fmt . Errorf ( "cache %v %q: already complete" , cache . ID , cache . Key ) )
return
}
2023-07-11 11:35:27 +08:00
size , err := h . storage . Commit ( cache . ID , cache . Size )
if err != nil {
2023-04-28 23:57:40 +08:00
h . responseJSON ( w , r , 500 , err )
return
}
2023-07-11 11:35:27 +08:00
// write real size back to cache, it may be different from the current value when the request doesn't specify it.
cache . Size = size
2023-04-28 23:57:40 +08:00
2025-03-24 10:48:28 +01:00
db , err := h . openDB ( )
2023-07-10 18:57:06 +02:00
if err != nil {
h . responseJSON ( w , r , 500 , err )
return
}
defer db . Close ( )
2023-04-28 23:57:40 +08:00
cache . Complete = true
2023-07-10 18:57:06 +02:00
if err := db . Update ( cache . ID , cache ) ; err != nil {
2023-04-28 23:57:40 +08:00
h . responseJSON ( w , r , 500 , err )
return
}
h . responseJSON ( w , r , 200 )
}
// GET /_apis/artifactcache/artifacts/:id
2025-09-04 14:38:50 +00:00
func ( h * handler ) get ( w http . ResponseWriter , r * http . Request , params httprouter . Params ) {
2024-12-07 17:48:07 +01:00
rundata := runDataFromHeaders ( r )
repo , err := h . validateMac ( rundata )
2024-11-21 22:49:12 +01:00
if err != nil {
2025-01-13 16:59:07 +01:00
h . responseJSON ( w , r , 403 , err )
2024-11-21 22:49:12 +01:00
return
}
2024-11-22 01:01:12 +01:00
id , err := strconv . ParseUint ( params . ByName ( "id" ) , 10 , 64 )
2023-04-28 23:57:40 +08:00
if err != nil {
h . responseJSON ( w , r , 400 , err )
return
}
2024-11-21 22:49:12 +01:00
2025-03-24 10:48:28 +01:00
cache , err := h . readCache ( id )
if err != nil {
if errors . Is ( err , bolthold . ErrNotFound ) {
h . responseJSON ( w , r , 404 , fmt . Errorf ( "cache %d: not reserved" , id ) )
2024-11-21 22:49:12 +01:00
return
}
2025-03-24 10:48:28 +01:00
h . responseJSON ( w , r , 500 , fmt . Errorf ( "cache Get: %w" , err ) )
return
2024-11-21 22:49:12 +01:00
}
// Should not happen
if cache . Repo != repo {
2025-03-23 22:31:16 +01:00
h . responseJSON ( w , r , 500 , fmt . Errorf ( "cache repo is not valid" ) )
2024-11-21 22:49:12 +01:00
return
}
2025-08-15 20:26:35 -06:00
// reads permitted against caches w/ the same isolation key, or no isolation key
if cache . WriteIsolationKey != rundata . WriteIsolationKey && cache . WriteIsolationKey != "" {
h . responseJSON ( w , r , 403 , fmt . Errorf ( "cache authorized for write isolation %q, but attempting to operate on %q" , rundata . WriteIsolationKey , cache . WriteIsolationKey ) )
return
}
2024-11-21 22:49:12 +01:00
2025-03-23 22:31:16 +01:00
if err := h . useCache ( id ) ; err != nil {
h . responseJSON ( w , r , 500 , fmt . Errorf ( "cache useCache: %w" , err ) )
return
}
2024-11-22 01:01:12 +01:00
h . storage . Serve ( w , r , id )
2023-04-28 23:57:40 +08:00
}
// POST /_apis/artifactcache/clean
2025-09-04 14:38:50 +00:00
func ( h * handler ) clean ( w http . ResponseWriter , r * http . Request , _ httprouter . Params ) {
2024-12-07 17:48:07 +01:00
rundata := runDataFromHeaders ( r )
_ , err := h . validateMac ( rundata )
2024-11-21 22:49:12 +01:00
if err != nil {
2025-01-13 16:59:07 +01:00
h . responseJSON ( w , r , 403 , err )
2024-11-21 22:49:12 +01:00
return
}
2023-04-28 23:57:40 +08:00
// TODO: don't support force deleting cache entries
// see: https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#force-deleting-cache-entries
h . responseJSON ( w , r , 200 )
}
2025-09-04 14:38:50 +00:00
func ( h * handler ) middleware ( handler httprouter . Handle ) httprouter . Handle {
2023-04-28 23:57:40 +08:00
return func ( w http . ResponseWriter , r * http . Request , params httprouter . Params ) {
h . logger . Debugf ( "%s %s" , r . Method , r . RequestURI )
handler ( w , r , params )
go h . gcCache ( )
}
}
// if not found, return (nil, nil) instead of an error.
2025-08-15 20:26:35 -06:00
func findCache ( db * bolthold . Store , repo string , keys [ ] string , version , writeIsolationKey string ) ( * Cache , error ) {
2024-03-29 00:42:02 +08:00
cache := & Cache { }
2024-03-05 07:04:54 +01:00
for _ , prefix := range keys {
2024-03-29 03:07:20 +01:00
// if a key in the list matches exactly, don't return partial matches
if err := db . FindOne ( cache ,
fix: artifact cache DB not using indexes for searching (#878)
Uses the `Repo` field as an index during searches of the cache database. Removes unused indexes.
To measure the performance of this change, I created a synthetic test which wrote 10,000 records into the artifact cache DB. Of course, all benchmarks are lies that can't be generalized to real-world usage, but it seems clear from the magnitude of improvement that this fixes a flawed implementation, even if it's not perfect.
- Unmodified performance:
- Write: 196 records/second
- Read: 1 record/second
- With `Repo` index being used for reads, and other indexes being removed:
- Write: 347 records/second
- Read: 22,398 records/second
`Repo` is, I think, the only index that made sense to remain, with an eye on workflow run performance:
- `Key` -- can't be used for index because `findCache` searches for key *prefixes*, not equal values.
- `Version` -- isn't very distinct for different workflow runs (https://code.forgejo.org/actions/cache#cache-version)
- `Complete` - significant portion of the cache DB will be complete, making it the least selective possible index
- `UsedAt` & `CreatedAt` - only used in GC operation, so could remain, but this isn't a performance-sensitive codepath
Closes #874.
<!--start release-notes-assistant-->
<!--URL:https://code.forgejo.org/forgejo/runner-->
- bug fixes
- [PR](https://code.forgejo.org/forgejo/runner/pulls/878): <!--number 878 --><!--line 0 --><!--description Zml4OiBhcnRpZmFjdCBjYWNoZSBEQiBub3QgdXNpbmcgaW5kZXhlcyBmb3Igc2VhcmNoaW5n-->fix: artifact cache DB not using indexes for searching<!--description-->
<!--end release-notes-assistant-->
Reviewed-on: https://code.forgejo.org/forgejo/runner/pulls/878
Reviewed-by: Michael Kriese <michael.kriese@gmx.de>
Co-authored-by: Mathieu Fenniak <mathieu@fenniak.net>
Co-committed-by: Mathieu Fenniak <mathieu@fenniak.net>
2025-08-19 20:19:23 +00:00
bolthold . Where ( "Repo" ) . Eq ( repo ) . Index ( "Repo" ) .
2024-11-21 22:49:12 +01:00
And ( "Key" ) . Eq ( prefix ) .
2024-03-29 03:07:20 +01:00
And ( "Version" ) . Eq ( version ) .
2025-08-15 20:26:35 -06:00
And ( "WriteIsolationKey" ) . Eq ( writeIsolationKey ) .
2024-03-29 03:07:20 +01:00
And ( "Complete" ) . Eq ( true ) .
SortBy ( "CreatedAt" ) . Reverse ( ) ) ; err == nil || ! errors . Is ( err , bolthold . ErrNotFound ) {
if err != nil {
return nil , fmt . Errorf ( "find cache: %w" , err )
}
return cache , nil
}
2024-01-20 14:11:50 +02:00
prefixPattern := fmt . Sprintf ( "^%s" , regexp . QuoteMeta ( prefix ) )
re , err := regexp . Compile ( prefixPattern )
if err != nil {
continue
}
2024-03-29 00:42:02 +08:00
if err := db . FindOne ( cache ,
fix: artifact cache DB not using indexes for searching (#878)
Uses the `Repo` field as an index during searches of the cache database. Removes unused indexes.
To measure the performance of this change, I created a synthetic test which wrote 10,000 records into the artifact cache DB. Of course, all benchmarks are lies that can't be generalized to real-world usage, but it seems clear from the magnitude of improvement that this fixes a flawed implementation, even if it's not perfect.
- Unmodified performance:
- Write: 196 records/second
- Read: 1 record/second
- With `Repo` index being used for reads, and other indexes being removed:
- Write: 347 records/second
- Read: 22,398 records/second
`Repo` is, I think, the only index that made sense to remain, with an eye on workflow run performance:
- `Key` -- can't be used for index because `findCache` searches for key *prefixes*, not equal values.
- `Version` -- isn't very distinct for different workflow runs (https://code.forgejo.org/actions/cache#cache-version)
- `Complete` - significant portion of the cache DB will be complete, making it the least selective possible index
- `UsedAt` & `CreatedAt` - only used in GC operation, so could remain, but this isn't a performance-sensitive codepath
Closes #874.
<!--start release-notes-assistant-->
<!--URL:https://code.forgejo.org/forgejo/runner-->
- bug fixes
- [PR](https://code.forgejo.org/forgejo/runner/pulls/878): <!--number 878 --><!--line 0 --><!--description Zml4OiBhcnRpZmFjdCBjYWNoZSBEQiBub3QgdXNpbmcgaW5kZXhlcyBmb3Igc2VhcmNoaW5n-->fix: artifact cache DB not using indexes for searching<!--description-->
<!--end release-notes-assistant-->
Reviewed-on: https://code.forgejo.org/forgejo/runner/pulls/878
Reviewed-by: Michael Kriese <michael.kriese@gmx.de>
Co-authored-by: Mathieu Fenniak <mathieu@fenniak.net>
Co-committed-by: Mathieu Fenniak <mathieu@fenniak.net>
2025-08-19 20:19:23 +00:00
bolthold . Where ( "Repo" ) . Eq ( repo ) . Index ( "Repo" ) .
2024-11-21 22:49:12 +01:00
And ( "Key" ) . RegExp ( re ) .
2024-03-29 00:42:02 +08:00
And ( "Version" ) . Eq ( version ) .
2025-08-15 20:26:35 -06:00
And ( "WriteIsolationKey" ) . Eq ( writeIsolationKey ) .
2024-03-29 00:42:02 +08:00
And ( "Complete" ) . Eq ( true ) .
SortBy ( "CreatedAt" ) . Reverse ( ) ) ; err != nil {
if errors . Is ( err , bolthold . ErrNotFound ) {
continue
2023-04-28 23:57:40 +08:00
}
2024-03-29 00:42:02 +08:00
return nil , fmt . Errorf ( "find cache: %w" , err )
2023-04-28 23:57:40 +08:00
}
2024-03-29 00:42:02 +08:00
return cache , nil
2023-04-28 23:57:40 +08:00
}
return nil , nil
}
2024-03-29 00:42:02 +08:00
func insertCache ( db * bolthold . Store , cache * Cache ) error {
if err := db . Insert ( bolthold . NextSequence ( ) , cache ) ; err != nil {
return fmt . Errorf ( "insert cache: %w" , err )
}
// write back id to db
if err := db . Update ( cache . ID , cache ) ; err != nil {
return fmt . Errorf ( "write back id to db: %w" , err )
}
return nil
}
2025-09-04 14:38:50 +00:00
func ( h * handler ) readCache ( id uint64 ) ( * Cache , error ) {
2025-03-24 10:48:28 +01:00
db , err := h . openDB ( )
if err != nil {
return nil , err
}
defer db . Close ( )
cache := & Cache { }
if err := db . Get ( id , cache ) ; err != nil {
return nil , err
}
return cache , nil
}
2025-09-04 14:38:50 +00:00
func ( h * handler ) useCache ( id uint64 ) error {
2025-03-23 22:31:16 +01:00
db , err := h . openDB ( )
if err != nil {
return err
}
defer db . Close ( )
cache := & Cache { }
if err := db . Get ( id , cache ) ; err != nil {
return err
}
2023-04-28 23:57:40 +08:00
cache . UsedAt = time . Now ( ) . Unix ( )
2025-03-23 22:31:16 +01:00
return db . Update ( cache . ID , cache )
2023-04-28 23:57:40 +08:00
}
2024-03-29 00:42:02 +08:00
const (
keepUsed = 30 * 24 * time . Hour
keepUnused = 7 * 24 * time . Hour
keepTemp = 5 * time . Minute
keepOld = 5 * time . Minute
)
2025-09-04 14:38:50 +00:00
func ( h * handler ) setgcAt ( at time . Time ) {
h . gcAt = at
}
func ( h * handler ) gcCache ( ) {
2024-03-29 00:42:02 +08:00
if h . gcing . Load ( ) {
2023-04-28 23:57:40 +08:00
return
}
2024-03-29 00:42:02 +08:00
if ! h . gcing . CompareAndSwap ( false , true ) {
2023-04-28 23:57:40 +08:00
return
}
2024-03-29 00:42:02 +08:00
defer h . gcing . Store ( false )
2023-04-28 23:57:40 +08:00
if time . Since ( h . gcAt ) < time . Hour {
h . logger . Debugf ( "skip gc: %v" , h . gcAt . String ( ) )
return
}
h . gcAt = time . Now ( )
h . logger . Debugf ( "gc: %v" , h . gcAt . String ( ) )
2023-07-10 18:57:06 +02:00
db , err := h . openDB ( )
if err != nil {
return
}
defer db . Close ( )
2024-03-29 00:42:02 +08:00
// Remove the caches which are not completed for a while, they are most likely to be broken.
2023-04-28 23:57:40 +08:00
var caches [ ] * Cache
2024-03-29 00:42:02 +08:00
if err := db . Find ( & caches , bolthold .
Where ( "UsedAt" ) . Lt ( time . Now ( ) . Add ( - keepTemp ) . Unix ( ) ) .
And ( "Complete" ) . Eq ( false ) ,
) ; err != nil {
2023-04-28 23:57:40 +08:00
h . logger . Warnf ( "find caches: %v" , err )
} else {
for _ , cache := range caches {
h . storage . Remove ( cache . ID )
2023-07-10 18:57:06 +02:00
if err := db . Delete ( cache . ID , cache ) ; err != nil {
2023-04-28 23:57:40 +08:00
h . logger . Warnf ( "delete cache: %v" , err )
continue
}
h . logger . Infof ( "deleted cache: %+v" , cache )
}
}
2024-03-29 00:42:02 +08:00
// Remove the old caches which have not been used recently.
2023-04-28 23:57:40 +08:00
caches = caches [ : 0 ]
2024-03-29 00:42:02 +08:00
if err := db . Find ( & caches , bolthold .
Where ( "UsedAt" ) . Lt ( time . Now ( ) . Add ( - keepUnused ) . Unix ( ) ) ,
) ; err != nil {
2023-04-28 23:57:40 +08:00
h . logger . Warnf ( "find caches: %v" , err )
} else {
for _ , cache := range caches {
h . storage . Remove ( cache . ID )
2023-07-10 18:57:06 +02:00
if err := db . Delete ( cache . ID , cache ) ; err != nil {
2023-04-28 23:57:40 +08:00
h . logger . Warnf ( "delete cache: %v" , err )
continue
}
h . logger . Infof ( "deleted cache: %+v" , cache )
}
}
2024-03-29 00:42:02 +08:00
// Remove the old caches which are too old.
2023-04-28 23:57:40 +08:00
caches = caches [ : 0 ]
2024-03-29 00:42:02 +08:00
if err := db . Find ( & caches , bolthold .
Where ( "CreatedAt" ) . Lt ( time . Now ( ) . Add ( - keepUsed ) . Unix ( ) ) ,
) ; err != nil {
2023-04-28 23:57:40 +08:00
h . logger . Warnf ( "find caches: %v" , err )
} else {
for _ , cache := range caches {
h . storage . Remove ( cache . ID )
2023-07-10 18:57:06 +02:00
if err := db . Delete ( cache . ID , cache ) ; err != nil {
2023-04-28 23:57:40 +08:00
h . logger . Warnf ( "delete cache: %v" , err )
continue
}
h . logger . Infof ( "deleted cache: %+v" , cache )
}
}
2024-03-29 00:42:02 +08:00
// Remove the old caches with the same key and version, keep the latest one.
// Also keep the olds which have been used recently for a while in case of the cache is still in use.
if results , err := db . FindAggregate (
& Cache { } ,
bolthold . Where ( "Complete" ) . Eq ( true ) ,
"Key" , "Version" ,
) ; err != nil {
h . logger . Warnf ( "find aggregate caches: %v" , err )
} else {
for _ , result := range results {
if result . Count ( ) <= 1 {
continue
}
result . Sort ( "CreatedAt" )
caches = caches [ : 0 ]
result . Reduction ( & caches )
for _ , cache := range caches [ : len ( caches ) - 1 ] {
if time . Since ( time . Unix ( cache . UsedAt , 0 ) ) < keepOld {
// Keep it since it has been used recently, even if it's old.
// Or it could break downloading in process.
continue
}
h . storage . Remove ( cache . ID )
if err := db . Delete ( cache . ID , cache ) ; err != nil {
h . logger . Warnf ( "delete cache: %v" , err )
continue
}
h . logger . Infof ( "deleted cache: %+v" , cache )
}
}
}
2023-04-28 23:57:40 +08:00
}
2025-09-04 14:38:50 +00:00
func ( h * handler ) responseJSON ( w http . ResponseWriter , r * http . Request , code int , v ... any ) {
2023-04-28 23:57:40 +08:00
w . Header ( ) . Set ( "Content-Type" , "application/json; charset=utf-8" )
var data [ ] byte
if len ( v ) == 0 || v [ 0 ] == nil {
data , _ = json . Marshal ( struct { } { } )
} else if err , ok := v [ 0 ] . ( error ) ; ok {
h . logger . Errorf ( "%v %v: %v" , r . Method , r . RequestURI , err )
data , _ = json . Marshal ( map [ string ] any {
"error" : err . Error ( ) ,
} )
} else {
data , _ = json . Marshal ( v [ 0 ] )
}
w . WriteHeader ( code )
_ , _ = w . Write ( data )
}
2024-11-22 01:01:12 +01:00
func parseContentRange ( s string ) ( uint64 , uint64 , error ) {
2023-04-28 23:57:40 +08:00
// support the format like "bytes 11-22/*" only
s , _ , _ = strings . Cut ( strings . TrimPrefix ( s , "bytes " ) , "/" )
s1 , s2 , _ := strings . Cut ( s , "-" )
2024-11-22 01:01:12 +01:00
start , err := strconv . ParseUint ( s1 , 10 , 64 )
2023-04-28 23:57:40 +08:00
if err != nil {
return 0 , 0 , fmt . Errorf ( "parse %q: %w" , s , err )
}
2024-11-22 01:01:12 +01:00
stop , err := strconv . ParseUint ( s2 , 10 , 64 )
2023-04-28 23:57:40 +08:00
if err != nil {
return 0 , 0 , fmt . Errorf ( "parse %q: %w" , s , err )
}
return start , stop , nil
}
2024-12-07 17:48:07 +01:00
2025-09-05 06:01:49 +00:00
type RunData struct {
RepositoryFullName string
RunNumber string
Timestamp string
RepositoryMAC string
WriteIsolationKey string
}
func runDataFromHeaders ( r * http . Request ) RunData {
return RunData {
2024-12-07 17:48:07 +01:00
RepositoryFullName : r . Header . Get ( "Forgejo-Cache-Repo" ) ,
RunNumber : r . Header . Get ( "Forgejo-Cache-RunNumber" ) ,
Timestamp : r . Header . Get ( "Forgejo-Cache-Timestamp" ) ,
RepositoryMAC : r . Header . Get ( "Forgejo-Cache-MAC" ) ,
2025-08-15 20:26:35 -06:00
WriteIsolationKey : r . Header . Get ( "Forgejo-Cache-WriteIsolationKey" ) ,
2024-12-07 17:48:07 +01:00
}
}