jenkins-pipelines/scripts/archive-misc.sh
Davlet Panech fe5793b71d archive-dir: binary search + parallelism
Performance enhancements for archive-dir:

* While searching for old checksums, use BSD look [1] (binary search),
  rather than grep (linear). This requires a docker image with that
  utility installed. A Dockerfile is included and is meant to be built
  and pushed to Docker Hub manually as needed. Image name:
  starlings/jenkins-pipelines-coreutils:TIMESTAMP .

* Process all files in parallel. Previously we only calculated checksums
  in parallel.

Timings before & after the patch, using a build with ~100K files and
~300K old checksums (docker + aptly + mirrors):

* before patch with JOBS=4: 2 hrs 7 min
* this patch with JOBS=4: 26 min
* this patch with JOBS=1: 1hr 10 min

[1] https://man.openbsd.org/look.1

TESTS
=======================
Run "archive-misc" and make sure it copies/links the same files as
before the patch.

Story: 2010226
Task: 48184

Signed-off-by: Davlet Panech <davlet.panech@windriver.com>
Change-Id: I2ad271be673e8499c17a87e9d52864b40e217fc7
2023-06-06 15:48:11 -04:00

175 lines
5.8 KiB
Bash
Executable File

#!/bin/bash
#
# Copyright (c) 2022 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# Archive evrything except:
# - symlinks that point to $BUILD_OUTPUT_HOME
# - localdisk/deploy/ which is archive separately by archive-iso.sh
# - large temp dirs left behind by ostree
#
set -e
THIS_DIR="$(readlink -f "$(dirname "$0")")"
source "$THIS_DIR"/lib/job_utils.sh
source "$THIS_DIR"/lib/publish_utils.sh
load_build_env
#VERBOSE_ARG="--verbose"
print_regfile_name_if_exists() {
if [[ -f "$1" ]] ; then
echo "$1"
fi
}
find_old_archive_dirs() {
find "$BUILD_OUTPUT_ROOT" -mindepth 1 -maxdepth 1 -type d \! -name "$TIMESTAMP" \
-regextype posix-extended -regex '.*/[0-9]{4,}[^/]*$'
}
find_old_checksum_files__mirrors() {
local archive_dir package_dir
find_old_archive_dirs | while read archive_dir ; do
print_regfile_name_if_exists "$archive_dir/mirrors/$CHECKSUMS_FILENAME"
print_regfile_name_if_exists "$archive_dir/aptly/$CHECKSUMS_FILENAME"
done
check_pipe_status
}
find_old_checksum_files__aptly() {
find_old_checksum_files__mirrors
}
find_old_checksum_files__docker() {
local archive_dir
find_old_archive_dirs | while read archive_dir ; do
print_regfile_name_if_exists "$archive_dir/docker/$CHECKSUMS_FILENAME"
done
check_pipe_status
}
# Usage: do_archive_dir DIR_ID [EXTRA_CHECKSUMS_FILE...]
#
# DIR_ID is "mirrors" "docker" or "aptly"
#
# Example:
#
# # archive mirrors/
# do_archive_dir "mirrors"
#
# # archive aptly/ , but also consider files archived under "mirrors" by the
# # the previous line for hardlinking
# do_archive_dir "aptly" "$BUILD_OUTPUT_HOME/mirrors/StxChecksums"
#
do_archive_dir() {
local id="$1" ; shift || :
local dir="$id"
local spec
local spec_id spec_metod
notice "archiving $id"
# ARCHIVE_BIG_DIRS contains a space-separated list of "method"
# or "dir:method" pairs, eg:
# "top-symlink aptly:shecksum-hardlink",
spec_method="checksum-hardlink"
for spec in $ARCHIVE_BIG_DIRS ; do
if [[ "$spec" =~ : ]] ; then
spec_id="${spec%%:*}"
if [[ "$spec_id" == "$id" ]] ; then
spec_method="${spec#*:}"
fi
continue
fi
spec_method="$spec"
done
info "dir=$dir method=$spec_method"
case "$spec_method" in
top-symlink)
if [[ -e "$BUILD_HOME/$dir" ]] ; then
if [[ -e "$BUILD_OUTPUT_HOME/$dir" && -d "$BUILD_OUTPUT_HOME/$dir" ]] ; then
safe_rm $DRY_RUN_ARG "$BUILD_OUTPUT_HOME/$dir"
fi
maybe_run ln -sfn "$BUILD_HOME/$dir" "$BUILD_OUTPUT_HOME/$dir"
fi
;;
checksum-hardlink|checksum-copy)
if [[ -e "$BUILD_HOME/$dir" ]] ; then
if [[ -e "$BUILD_OUTPUT_HOME/$dir" ]] ; then
safe_rm "$BUILD_OUTPUT_HOME/$dir"
fi
tmp_dir="$BUILD_HOME/tmp/archive-misc"
rm -rf "$tmp_dir/$id"
mkdir -p "$tmp_dir/$id"
cp -a "$THIS_DIR/helpers/archive-dir.sh" "$tmp_dir/"
local archive_args=()
if [[ "$spec_method" == "checksum-hardlink" ]] ; then
local old_checksums_file_list="$tmp_dir/$id/old_checksums_file.list"
local find_func=find_old_checksum_files__$id
$find_func >"$old_checksums_file_list"
archive_args+=("--checksum-hardlink" "$old_checksums_file_list")
local extra_checksums_file
for extra_checksums_file in "$@" ; do
print_regfile_name_if_exists "$extra_checksums_file"
done >>"$old_checksums_file_list"
fi
if $SHELL_XTRACE ; then
archive_args+=("--xtrace")
fi
#local egid
#egid=$(id -g)
#archive_args+=(--owner "$EUID" --group "$egid")
local src_dir="$BUILD_HOME/$dir"
local dst_dir="$BUILD_OUTPUT_HOME/$dir"
maybe_run mkdir -p "$dst_dir"
safe_docker_run $DRY_RUN_ARG --writeable-archive-root --rm "$COREUTILS_DOCKER_IMG" "$tmp_dir/archive-dir.sh" \
"${archive_args[@]}" \
-j ${PARALLEL_CMD_JOBS:-1} \
--output-checksums "$BUILD_OUTPUT_HOME/$dir/$CHECKSUMS_FILENAME" \
"$src_dir" \
"$dst_dir" \
"$tmp_dir/$id"
fi
;;
*)
die "ARCHIVE_BIG_DIRS: invalid copy method \"$spec_method\": expecting \"top_symlink\", \"checksum-hardlink\" or \"checksum-copy\""
;;
esac
}
mkdir -p "$BUILD_OUTPUT_HOME"
# Straight copy the other files
notice "archiving misc files"
exclude_args=()
exclude_args+=(--exclude "/localdisk/designer/**") # symlink inside
exclude_args+=(--exclude "/aptly") # see below
exclude_args+=(--exclude "/mirrors") # see below
exclude_args+=(--exclude "/docker") # see below
exclude_args+=(--exclude "/workspace") # symlink
exclude_args+=(--exclude "/repo") # symlink
exclude_args+=(--exclude "/localdisk/workdir/**") # ostree temp files
exclude_args+=(--exclude "/localdisk/sub_workdir/workdir/**") # ostree temp files
exclude_args+=(--exclude "/localdisk/deploy/**") # archived by archive-iso.sh
exclude_args+=(--exclude "/tmp/*") # some of the files here are quite large, exclude
safe_copy_dir $DRY_RUN_ARG $VERBOSE_ARG \
"${exclude_args[@]}" \
"$BUILD_HOME/" "$BUILD_OUTPUT_HOME/"
# Link or copy big directories
do_archive_dir "mirrors"
do_archive_dir "aptly" "$BUILD_OUTPUT_HOME/mirrors/$CHECKSUMS_FILENAME"
do_archive_dir "docker"