From 65b3abbae959e198ff5f67ea52716d2a7a8bde28 Mon Sep 17 00:00:00 2001 From: Davlet Panech Date: Fri, 19 May 2023 14:02:02 -0400 Subject: [PATCH] sign-iso: retry on failure ISO signing via a remote server may fail due to network hiccups. Retry on errors. TESTS ====================================== 1) Run build-iso Jenkins job normally and make sure it works 2) Run build-iso with SIGNING_SERVER set to an invalid host name and make sure it retries Story: 2010226 Task: 48064 Signed-off-by: Davlet Panech Change-Id: Icaa8e07827ddfcc2583f875e5a57247ce7bf8613 --- scripts/lib/job_utils.sh | 3 + scripts/lib/retries.sh | 91 +++++++++++++++++-------- scripts/sign-iso.sh | 9 ++- scripts/templates/build.conf.example.in | 5 ++ 4 files changed, 79 insertions(+), 29 deletions(-) diff --git a/scripts/lib/job_utils.sh b/scripts/lib/job_utils.sh index 6938552..97daf83 100644 --- a/scripts/lib/job_utils.sh +++ b/scripts/lib/job_utils.sh @@ -215,6 +215,9 @@ __set_build_vars() { die "SECUREBOOT_FORMAL must be \"true\" or \"false\"" fi + declare_job_env SIGN_MAX_ATTEMPTS 3 + declare_job_env SIGN_BACKOFF_DELAY 10 + } __started_by_jenkins() { diff --git a/scripts/lib/retries.sh b/scripts/lib/retries.sh index fdfa970..980beb6 100644 --- a/scripts/lib/retries.sh +++ b/scripts/lib/retries.sh @@ -7,53 +7,90 @@ # # -# Utilities to retry commands +# Function to call a command, with support for retries +# +# with_retries [] [...] +# +# Options: +# -d | --delay +# Wait given number of seconds between retries +# -t | --timeout +# Each iteration of the command runs under a timeout +# -k | --kill-timeout +# Each iteration of the command is killed violently +# if it doesn't exit voluntarily within the set time +# after the initial timeout signal. # - function with_retries { + local delay=5 + local max_time=0 + local kill_time=0 + local to_cmd="" + + while [ $1 != "" ]; do + case "$1" in + -d | --delay) + delay=$2 + shift 2 + ;; + -t | --timeout) + max_time=$2 + shift 2 + ;; + -k | --kill-timeout) + kill_time=$2 + shift 2 + ;; + *) + break + ;; + esac + done + local max_attempts=$1 - local delay=$2 - local cmd=$3 + local cmd=$2 + shift 2 + + if [ ${max_time} -gt 0 ]; then + to_cmd="timeout " + if [ ${kill_time} -gt 0 ]; then + to_cmd+="--kill-after=${kill_time} " + fi + to_cmd+="${max_time} " + fi # Pop the first two arguments off the list, # so we can pass additional args to the command safely - shift 3 local -i attempt=0 + local rc=0 while :; do - attempt=$((attempt+1)) + let attempt++ - >&2 echo "Running: ${cmd} $@" - # ok, this is an obscure one ... - # ${cmd} - # ... alone risks setting of bash's 'set -e', - # So I need to hide the error code using a pipe - # with a final commane that returns true. - # original implementation was ... - # ${cmd} "$@" | true - # ... but this sometimes yields a ${PIPESTATUS[0]} of 141 - # if ${cmd} is still writing to stdout when 'true' exits. - # Instead I use 'tee' to consume everything ${cmd} sends to stdout. - ${cmd} "$@" | tee /dev/null - if [ ${PIPESTATUS[0]} -eq 0 ]; then + echo "Running: ${cmd} $@" >&2 + ${to_cmd} ${cmd} "$@" + rc=$? + if [ $rc -eq 0 ]; then return 0 fi - >&2 echo "Command (${cmd}) failed, attempt ${attempt} of ${max_attempts}." + if [ $rc -eq 124 ]; then + echo "Command (${cmd}) timed out, attempt ${attempt} of ${max_attempts}." >&2 + elif [ $rc -eq 137 ]; then + echo "Command (${cmd}) timed out and killed, attempt ${attempt} of ${max_attempts}." >&2 + else + echo "Command (${cmd}) failed, attempt ${attempt} of ${max_attempts}." >&2 + fi + if [ ${attempt} -lt ${max_attempts} ]; then - >&2 echo "Waiting ${delay} seconds before retrying..." + echo "Waiting ${delay} seconds before retrying..." >&2 sleep ${delay} continue else - >&2 echo "Max command attempts reached. Aborting..." + echo "Max command attempts reached. Aborting..." >&2 return 1 fi done } -function with_default_retries { - local cmd=$1 - shift 1 - with_retries ${RETRIES:-1} ${RETRY_INTERVAL_SEC:-1} "${cmd}" "$@" -} diff --git a/scripts/sign-iso.sh b/scripts/sign-iso.sh index 2d60037..2d0a6b6 100755 --- a/scripts/sign-iso.sh +++ b/scripts/sign-iso.sh @@ -8,6 +8,7 @@ set -e source $(dirname "$0")/lib/job_utils.sh +source $(dirname "$0")/lib/retries.sh require_job_env BUILD_HOME require_job_env BUILD_ISO @@ -15,6 +16,8 @@ require_job_env BUILD_ISO load_build_env require_job_env SIGN_ISO_FORMAL +require_job_env SIGN_MAX_ATTEMPTS +require_job_env SIGN_BACKOFF_DELAY $BUILD_ISO || bail "BUILD_ISO=false, bailing out" @@ -32,12 +35,14 @@ sign_iso() { export SIGNING_SERVER export SIGNING_USER maybe_run rm -f "$sig_file" - maybe_run sign_iso_formal.sh "$iso_file" || die "failed to sign ISO" + if ! maybe_run with_retries -d "$SIGN_BACKOFF_DELAY" "$SIGN_MAX_ATTEMPTS" sign_iso_formal.sh "$iso_file" ; then + die "failed to sign ISO" + fi if ! $DRY_RUN ; then [[ -f "$sig_file" ]] || die "failed to sign ISO" info "created signature $sig_file" fi - ) + ) || exit 1 return 0 fi diff --git a/scripts/templates/build.conf.example.in b/scripts/templates/build.conf.example.in index 4681421..7dc2ce6 100644 --- a/scripts/templates/build.conf.example.in +++ b/scripts/templates/build.conf.example.in @@ -56,6 +56,11 @@ SIGN_ISO_FORMAL=true # $SIGNING_SERVER. When "false", don't add secureboot signatures. SECUREBOOT_FORMAL=true +# How many times to retry & sleep between retries, when accessing +# signing server +SIGN_MAX_ATTEMPTS=3 +SIGN_BACKOFF_DELAY=10 + # Run this command inside the build container at the end of the build # Current directory will be set to $MY_WORKSPACE/export. # This command must leave any additional files to be published in that