Change the way of counting the pods running

The count was hard coded with "6", this change use stx control status to
verify how many pods are in the running state and if all of them are we
proceed, if not we show an error after some retries.

Test plan:
    PASS: Pipeline passed without errors when all containers are running
    PASS: Pipeline failed when at least one pod failed, preventing
	  unexpected behavior for the next step of the pipelines

Story: 2011209
Task: 50883
Change-Id: I37810fe558c914a800217e9ef17ec74a739c59d4
This commit is contained in:
Rafael Santos 2024-09-02 15:45:06 -03:00
parent 78f861d29d
commit 16988a0dbf
2 changed files with 16 additions and 1 deletions

View File

@ -261,6 +261,21 @@ load_build_env() {
__set_build_vars || exit 1
}
# Usage: check_all_pods_running
# check if all stx pods are in the Running state
# if there's some pods not Running it will return 1 otherwise 0
check_all_pods_running() {
# Here we create the local variable status_wo_running to get the status of the pods grouping per status
local status_wo_running=$(stx control status 2>&1 |awk '{ print $3}' | grep STATUS -A 50 | grep -v STATUS | sort | uniq -c)
# In this if clausule we expect that status_wo_running is not null and don't mach Running to be true.
if [[ -n $status_wo_running ]] && ! echo "$status_wo_running" | grep -v -q "Running"; then
return 0
else
echo "There are pods that are not running yet, here's their status: ${status_wo_running}"
return 1
fi
}
# Usage: stx_docker_cmd [--dry-run] SHELL_SNIPPET
stx_docker_cmd() {
local dry_run=0

View File

@ -21,7 +21,7 @@ stx control start
# wait for startup
notice "waiting for containers to startup ($BUILDER_POD_STARTUP_TIMEOUT seconds)"
let deadline="$(date '+%s')+$BUILDER_POD_STARTUP_TIMEOUT"
while [[ "$(stx control status | grep -i running | wc -l)" -lt 6 ]] ; do
until check_all_pods_running ; do
if [[ "$(date '+%s')" -ge $deadline ]] ; then
die "pods didn't start up after $BUILDER_POD_STARTUP_TIMEOUT second(s)"
fi