
Performance enhancements for archive-dir: * While searching for old checksums, use BSD look [1] (binary search), rather than grep (linear). This requires a docker image with that utility installed. A Dockerfile is included and is meant to be built and pushed to Docker Hub manually as needed. Image name: starlings/jenkins-pipelines-coreutils:TIMESTAMP . * Process all files in parallel. Previously we only calculated checksums in parallel. Timings before & after the patch, using a build with ~100K files and ~300K old checksums (docker + aptly + mirrors): * before patch with JOBS=4: 2 hrs 7 min * this patch with JOBS=4: 26 min * this patch with JOBS=1: 1hr 10 min [1] https://man.openbsd.org/look.1 TESTS ======================= Run "archive-misc" and make sure it copies/links the same files as before the patch. Story: 2010226 Task: 48184 Signed-off-by: Davlet Panech <davlet.panech@windriver.com> Change-Id: I2ad271be673e8499c17a87e9d52864b40e217fc7
175 lines
5.8 KiB
Bash
Executable File
175 lines
5.8 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
#
|
|
# Copyright (c) 2022 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
|
|
# Archive evrything except:
|
|
# - symlinks that point to $BUILD_OUTPUT_HOME
|
|
# - localdisk/deploy/ which is archive separately by archive-iso.sh
|
|
# - large temp dirs left behind by ostree
|
|
#
|
|
|
|
set -e
|
|
THIS_DIR="$(readlink -f "$(dirname "$0")")"
|
|
source "$THIS_DIR"/lib/job_utils.sh
|
|
source "$THIS_DIR"/lib/publish_utils.sh
|
|
|
|
load_build_env
|
|
|
|
#VERBOSE_ARG="--verbose"
|
|
|
|
print_regfile_name_if_exists() {
|
|
if [[ -f "$1" ]] ; then
|
|
echo "$1"
|
|
fi
|
|
}
|
|
|
|
find_old_archive_dirs() {
|
|
find "$BUILD_OUTPUT_ROOT" -mindepth 1 -maxdepth 1 -type d \! -name "$TIMESTAMP" \
|
|
-regextype posix-extended -regex '.*/[0-9]{4,}[^/]*$'
|
|
}
|
|
|
|
find_old_checksum_files__mirrors() {
|
|
local archive_dir package_dir
|
|
find_old_archive_dirs | while read archive_dir ; do
|
|
print_regfile_name_if_exists "$archive_dir/mirrors/$CHECKSUMS_FILENAME"
|
|
print_regfile_name_if_exists "$archive_dir/aptly/$CHECKSUMS_FILENAME"
|
|
done
|
|
check_pipe_status
|
|
}
|
|
|
|
find_old_checksum_files__aptly() {
|
|
find_old_checksum_files__mirrors
|
|
}
|
|
|
|
find_old_checksum_files__docker() {
|
|
local archive_dir
|
|
find_old_archive_dirs | while read archive_dir ; do
|
|
print_regfile_name_if_exists "$archive_dir/docker/$CHECKSUMS_FILENAME"
|
|
done
|
|
check_pipe_status
|
|
}
|
|
|
|
# Usage: do_archive_dir DIR_ID [EXTRA_CHECKSUMS_FILE...]
|
|
#
|
|
# DIR_ID is "mirrors" "docker" or "aptly"
|
|
#
|
|
# Example:
|
|
#
|
|
# # archive mirrors/
|
|
# do_archive_dir "mirrors"
|
|
#
|
|
# # archive aptly/ , but also consider files archived under "mirrors" by the
|
|
# # the previous line for hardlinking
|
|
# do_archive_dir "aptly" "$BUILD_OUTPUT_HOME/mirrors/StxChecksums"
|
|
#
|
|
do_archive_dir() {
|
|
local id="$1" ; shift || :
|
|
local dir="$id"
|
|
local spec
|
|
local spec_id spec_metod
|
|
|
|
notice "archiving $id"
|
|
|
|
# ARCHIVE_BIG_DIRS contains a space-separated list of "method"
|
|
# or "dir:method" pairs, eg:
|
|
# "top-symlink aptly:shecksum-hardlink",
|
|
spec_method="checksum-hardlink"
|
|
for spec in $ARCHIVE_BIG_DIRS ; do
|
|
if [[ "$spec" =~ : ]] ; then
|
|
spec_id="${spec%%:*}"
|
|
if [[ "$spec_id" == "$id" ]] ; then
|
|
spec_method="${spec#*:}"
|
|
fi
|
|
continue
|
|
fi
|
|
spec_method="$spec"
|
|
done
|
|
|
|
info "dir=$dir method=$spec_method"
|
|
|
|
case "$spec_method" in
|
|
top-symlink)
|
|
if [[ -e "$BUILD_HOME/$dir" ]] ; then
|
|
if [[ -e "$BUILD_OUTPUT_HOME/$dir" && -d "$BUILD_OUTPUT_HOME/$dir" ]] ; then
|
|
safe_rm $DRY_RUN_ARG "$BUILD_OUTPUT_HOME/$dir"
|
|
fi
|
|
maybe_run ln -sfn "$BUILD_HOME/$dir" "$BUILD_OUTPUT_HOME/$dir"
|
|
fi
|
|
;;
|
|
checksum-hardlink|checksum-copy)
|
|
if [[ -e "$BUILD_HOME/$dir" ]] ; then
|
|
|
|
if [[ -e "$BUILD_OUTPUT_HOME/$dir" ]] ; then
|
|
safe_rm "$BUILD_OUTPUT_HOME/$dir"
|
|
fi
|
|
tmp_dir="$BUILD_HOME/tmp/archive-misc"
|
|
rm -rf "$tmp_dir/$id"
|
|
mkdir -p "$tmp_dir/$id"
|
|
cp -a "$THIS_DIR/helpers/archive-dir.sh" "$tmp_dir/"
|
|
local archive_args=()
|
|
if [[ "$spec_method" == "checksum-hardlink" ]] ; then
|
|
local old_checksums_file_list="$tmp_dir/$id/old_checksums_file.list"
|
|
local find_func=find_old_checksum_files__$id
|
|
$find_func >"$old_checksums_file_list"
|
|
archive_args+=("--checksum-hardlink" "$old_checksums_file_list")
|
|
local extra_checksums_file
|
|
for extra_checksums_file in "$@" ; do
|
|
print_regfile_name_if_exists "$extra_checksums_file"
|
|
done >>"$old_checksums_file_list"
|
|
fi
|
|
if $SHELL_XTRACE ; then
|
|
archive_args+=("--xtrace")
|
|
fi
|
|
|
|
#local egid
|
|
#egid=$(id -g)
|
|
#archive_args+=(--owner "$EUID" --group "$egid")
|
|
|
|
local src_dir="$BUILD_HOME/$dir"
|
|
local dst_dir="$BUILD_OUTPUT_HOME/$dir"
|
|
maybe_run mkdir -p "$dst_dir"
|
|
safe_docker_run $DRY_RUN_ARG --writeable-archive-root --rm "$COREUTILS_DOCKER_IMG" "$tmp_dir/archive-dir.sh" \
|
|
"${archive_args[@]}" \
|
|
-j ${PARALLEL_CMD_JOBS:-1} \
|
|
--output-checksums "$BUILD_OUTPUT_HOME/$dir/$CHECKSUMS_FILENAME" \
|
|
"$src_dir" \
|
|
"$dst_dir" \
|
|
"$tmp_dir/$id"
|
|
|
|
fi
|
|
;;
|
|
*)
|
|
die "ARCHIVE_BIG_DIRS: invalid copy method \"$spec_method\": expecting \"top_symlink\", \"checksum-hardlink\" or \"checksum-copy\""
|
|
;;
|
|
esac
|
|
}
|
|
|
|
mkdir -p "$BUILD_OUTPUT_HOME"
|
|
|
|
# Straight copy the other files
|
|
notice "archiving misc files"
|
|
exclude_args=()
|
|
exclude_args+=(--exclude "/localdisk/designer/**") # symlink inside
|
|
exclude_args+=(--exclude "/aptly") # see below
|
|
exclude_args+=(--exclude "/mirrors") # see below
|
|
exclude_args+=(--exclude "/docker") # see below
|
|
exclude_args+=(--exclude "/workspace") # symlink
|
|
exclude_args+=(--exclude "/repo") # symlink
|
|
exclude_args+=(--exclude "/localdisk/workdir/**") # ostree temp files
|
|
exclude_args+=(--exclude "/localdisk/sub_workdir/workdir/**") # ostree temp files
|
|
exclude_args+=(--exclude "/localdisk/deploy/**") # archived by archive-iso.sh
|
|
exclude_args+=(--exclude "/tmp/*") # some of the files here are quite large, exclude
|
|
|
|
safe_copy_dir $DRY_RUN_ARG $VERBOSE_ARG \
|
|
"${exclude_args[@]}" \
|
|
"$BUILD_HOME/" "$BUILD_OUTPUT_HOME/"
|
|
|
|
# Link or copy big directories
|
|
do_archive_dir "mirrors"
|
|
do_archive_dir "aptly" "$BUILD_OUTPUT_HOME/mirrors/$CHECKSUMS_FILENAME"
|
|
do_archive_dir "docker"
|