mirror of
https://code.forgejo.org/forgejo/runner.git
synced 2025-06-27 16:35:58 +00:00
feat: forgejo-runner-service.sh safeguard to avoid overheating
re-creating the LXC container from scratch is expensive. When rebooting or when multiple units start at the same time, it may cause an execessive load. Use a global lock to guard this operation so they happen in sequence and not in parallel. They typically take around one minute to complete which means that in the event of a reboot, it will take around 1 minutes * the number of runners for the unlucky one to start. During this interval workflows will have to wait.
This commit is contained in:
parent
0232fe1255
commit
b79578d98e
1 changed files with 11 additions and 1 deletions
|
@ -31,6 +31,7 @@ SELF_FILENAME=$(basename "$SELF")
|
|||
ETC=/etc/forgejo-runner
|
||||
LIB=/var/lib/forgejo-runner
|
||||
LOG=/var/log/forgejo-runner
|
||||
LOCK=/var/lock/forgejo-runner
|
||||
: ${HOST:=$(hostname)}
|
||||
|
||||
LXC_IPV4_PREFIX="10.105.7"
|
||||
|
@ -253,10 +254,19 @@ function daemon() {
|
|||
set -e
|
||||
}
|
||||
|
||||
function start() {
|
||||
function destroy_and_create() {
|
||||
stop
|
||||
lxc-helpers.sh lxc_container_destroy $(lxc_name)
|
||||
lxc_create
|
||||
}
|
||||
|
||||
function start() {
|
||||
# it should be more than
|
||||
# (time it takes for one runner to be recreated) * (number of runners)
|
||||
# because they will all start at the same time on boot
|
||||
local timeout=3600
|
||||
|
||||
flock --timeout $timeout $LOCK $SELF destroy_and_create
|
||||
|
||||
local log=$LOG/$INPUTS_SERIAL.log
|
||||
if test -f $log; then
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue