archive-dir: binary search + parallelism
Performance enhancements for archive-dir: * While searching for old checksums, use BSD look [1] (binary search), rather than grep (linear). This requires a docker image with that utility installed. A Dockerfile is included and is meant to be built and pushed to Docker Hub manually as needed. Image name: starlings/jenkins-pipelines-coreutils:TIMESTAMP . * Process all files in parallel. Previously we only calculated checksums in parallel. Timings before & after the patch, using a build with ~100K files and ~300K old checksums (docker + aptly + mirrors): * before patch with JOBS=4: 2 hrs 7 min * this patch with JOBS=4: 26 min * this patch with JOBS=1: 1hr 10 min [1] https://man.openbsd.org/look.1 TESTS ======================= Run "archive-misc" and make sure it copies/links the same files as before the patch. Story: 2010226 Task: 48184 Signed-off-by: Davlet Panech <davlet.panech@windriver.com> Change-Id: I2ad271be673e8499c17a87e9d52864b40e217fc7
This commit is contained in:
parent
eccb119877
commit
fe5793b71d
1
dockerfiles/coreutils/.dockerignore
Normal file
1
dockerfiles/coreutils/.dockerignore
Normal file
@ -0,0 +1 @@
|
|||||||
|
*
|
8
dockerfiles/coreutils/Dockerfile
Normal file
8
dockerfiles/coreutils/Dockerfile
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
FROM debian:11
|
||||||
|
|
||||||
|
RUN apt-get update -y && \
|
||||||
|
apt-get upgrade -y && \
|
||||||
|
apt-get install -y bsdextrautils parallel && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
8
dockerfiles/coreutils/build.sh
Executable file
8
dockerfiles/coreutils/build.sh
Executable file
@ -0,0 +1,8 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CONTEXT_DIR="$(dirname "$0")"
|
||||||
|
IMAGE="$(source "$CONTEXT_DIR/../../scripts/lib/job_utils.sh" && echo "$COREUTILS_DOCKER_IMG")"
|
||||||
|
|
||||||
|
docker build -t "$IMAGE" "$CONTEXT_DIR"
|
8
dockerfiles/coreutils/push.sh
Executable file
8
dockerfiles/coreutils/push.sh
Executable file
@ -0,0 +1,8 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CONTEXT_DIR="$(dirname "$0")"
|
||||||
|
IMAGE="$(source "$CONTEXT_DIR/../../scripts/lib/job_utils.sh" && echo "$COREUTILS_DOCKER_IMG")"
|
||||||
|
|
||||||
|
docker push "$IMAGE"
|
@ -19,27 +19,8 @@ source "$THIS_DIR"/lib/publish_utils.sh
|
|||||||
|
|
||||||
load_build_env
|
load_build_env
|
||||||
|
|
||||||
notice "archiving misc files"
|
|
||||||
|
|
||||||
#VERBOSE_ARG="--verbose"
|
#VERBOSE_ARG="--verbose"
|
||||||
|
|
||||||
exclude_args=()
|
|
||||||
exclude_args+=(--exclude "/localdisk/designer/**") # symlink inside
|
|
||||||
exclude_args+=(--exclude "/aptly") # see below
|
|
||||||
exclude_args+=(--exclude "/mirrors") # see below
|
|
||||||
exclude_args+=(--exclude "/docker") # see below
|
|
||||||
exclude_args+=(--exclude "/workspace") # symlink
|
|
||||||
exclude_args+=(--exclude "/repo") # symlink
|
|
||||||
exclude_args+=(--exclude "/localdisk/workdir/**") # ostree temp files
|
|
||||||
exclude_args+=(--exclude "/localdisk/sub_workdir/workdir/**") # ostree temp files
|
|
||||||
exclude_args+=(--exclude "/localdisk/deploy/**") # archived by archive-iso.sh
|
|
||||||
|
|
||||||
mkdir -p "$BUILD_OUTPUT_HOME"
|
|
||||||
safe_copy_dir $DRY_RUN_ARG $VERBOSE_ARG \
|
|
||||||
"${exclude_args[@]}" \
|
|
||||||
"$BUILD_HOME/" "$BUILD_OUTPUT_HOME/"
|
|
||||||
|
|
||||||
|
|
||||||
print_regfile_name_if_exists() {
|
print_regfile_name_if_exists() {
|
||||||
if [[ -f "$1" ]] ; then
|
if [[ -f "$1" ]] ; then
|
||||||
echo "$1"
|
echo "$1"
|
||||||
@ -126,6 +107,7 @@ do_archive_dir() {
|
|||||||
safe_rm "$BUILD_OUTPUT_HOME/$dir"
|
safe_rm "$BUILD_OUTPUT_HOME/$dir"
|
||||||
fi
|
fi
|
||||||
tmp_dir="$BUILD_HOME/tmp/archive-misc"
|
tmp_dir="$BUILD_HOME/tmp/archive-misc"
|
||||||
|
rm -rf "$tmp_dir/$id"
|
||||||
mkdir -p "$tmp_dir/$id"
|
mkdir -p "$tmp_dir/$id"
|
||||||
cp -a "$THIS_DIR/helpers/archive-dir.sh" "$tmp_dir/"
|
cp -a "$THIS_DIR/helpers/archive-dir.sh" "$tmp_dir/"
|
||||||
local archive_args=()
|
local archive_args=()
|
||||||
@ -139,6 +121,9 @@ do_archive_dir() {
|
|||||||
print_regfile_name_if_exists "$extra_checksums_file"
|
print_regfile_name_if_exists "$extra_checksums_file"
|
||||||
done >>"$old_checksums_file_list"
|
done >>"$old_checksums_file_list"
|
||||||
fi
|
fi
|
||||||
|
if $SHELL_XTRACE ; then
|
||||||
|
archive_args+=("--xtrace")
|
||||||
|
fi
|
||||||
|
|
||||||
#local egid
|
#local egid
|
||||||
#egid=$(id -g)
|
#egid=$(id -g)
|
||||||
@ -149,7 +134,7 @@ do_archive_dir() {
|
|||||||
maybe_run mkdir -p "$dst_dir"
|
maybe_run mkdir -p "$dst_dir"
|
||||||
safe_docker_run $DRY_RUN_ARG --writeable-archive-root --rm "$COREUTILS_DOCKER_IMG" "$tmp_dir/archive-dir.sh" \
|
safe_docker_run $DRY_RUN_ARG --writeable-archive-root --rm "$COREUTILS_DOCKER_IMG" "$tmp_dir/archive-dir.sh" \
|
||||||
"${archive_args[@]}" \
|
"${archive_args[@]}" \
|
||||||
-j ${BUILD_PACKAGES_PARALLEL_JOBS:-1} \
|
-j ${PARALLEL_CMD_JOBS:-1} \
|
||||||
--output-checksums "$BUILD_OUTPUT_HOME/$dir/$CHECKSUMS_FILENAME" \
|
--output-checksums "$BUILD_OUTPUT_HOME/$dir/$CHECKSUMS_FILENAME" \
|
||||||
"$src_dir" \
|
"$src_dir" \
|
||||||
"$dst_dir" \
|
"$dst_dir" \
|
||||||
@ -163,6 +148,27 @@ do_archive_dir() {
|
|||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mkdir -p "$BUILD_OUTPUT_HOME"
|
||||||
|
|
||||||
|
# Straight copy the other files
|
||||||
|
notice "archiving misc files"
|
||||||
|
exclude_args=()
|
||||||
|
exclude_args+=(--exclude "/localdisk/designer/**") # symlink inside
|
||||||
|
exclude_args+=(--exclude "/aptly") # see below
|
||||||
|
exclude_args+=(--exclude "/mirrors") # see below
|
||||||
|
exclude_args+=(--exclude "/docker") # see below
|
||||||
|
exclude_args+=(--exclude "/workspace") # symlink
|
||||||
|
exclude_args+=(--exclude "/repo") # symlink
|
||||||
|
exclude_args+=(--exclude "/localdisk/workdir/**") # ostree temp files
|
||||||
|
exclude_args+=(--exclude "/localdisk/sub_workdir/workdir/**") # ostree temp files
|
||||||
|
exclude_args+=(--exclude "/localdisk/deploy/**") # archived by archive-iso.sh
|
||||||
|
exclude_args+=(--exclude "/tmp/*") # some of the files here are quite large, exclude
|
||||||
|
|
||||||
|
safe_copy_dir $DRY_RUN_ARG $VERBOSE_ARG \
|
||||||
|
"${exclude_args[@]}" \
|
||||||
|
"$BUILD_HOME/" "$BUILD_OUTPUT_HOME/"
|
||||||
|
|
||||||
|
# Link or copy big directories
|
||||||
do_archive_dir "mirrors"
|
do_archive_dir "mirrors"
|
||||||
do_archive_dir "aptly" "$BUILD_OUTPUT_HOME/mirrors/$CHECKSUMS_FILENAME"
|
do_archive_dir "aptly" "$BUILD_OUTPUT_HOME/mirrors/$CHECKSUMS_FILENAME"
|
||||||
do_archive_dir "docker"
|
do_archive_dir "docker"
|
||||||
|
@ -138,7 +138,8 @@ misc_rm=(
|
|||||||
"$BUILD_HOME"/workspace/std/build-wheels* \
|
"$BUILD_HOME"/workspace/std/build-wheels* \
|
||||||
"$BUILD_HOME"/workspace/std/build-helm \
|
"$BUILD_HOME"/workspace/std/build-helm \
|
||||||
"$BUILD_HOME"/workspace/"export" \
|
"$BUILD_HOME"/workspace/"export" \
|
||||||
"$BUILD_HOME"/workspace/helm-charts
|
"$BUILD_HOME"/workspace/helm-charts \
|
||||||
|
"$BUILD_HOME"/tmp \
|
||||||
)
|
)
|
||||||
rm_args=()
|
rm_args=()
|
||||||
for path in "${misc_rm[@]}" ; do
|
for path in "${misc_rm[@]}" ; do
|
||||||
|
@ -8,6 +8,7 @@ DST_CHECKSUMS_FILE=
|
|||||||
CHANGE_OWNER=
|
CHANGE_OWNER=
|
||||||
CHANGE_GROUP=
|
CHANGE_GROUP=
|
||||||
JOBS=1
|
JOBS=1
|
||||||
|
XTRACE=0
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
echo -n "\
|
echo -n "\
|
||||||
@ -30,6 +31,8 @@ Archive SRC_DIR in DST_DIR, using TMP_DIR for temporary files.
|
|||||||
We will use the files with matching properties & checksums
|
We will use the files with matching properties & checksums
|
||||||
to create hard links in DST_DIR.
|
to create hard links in DST_DIR.
|
||||||
|
|
||||||
|
--xtrace Enable debug output
|
||||||
|
|
||||||
If executed by root, we will preserve owners/groups of the copied files,
|
If executed by root, we will preserve owners/groups of the copied files,
|
||||||
unless they are overridden on the command line.
|
unless they are overridden on the command line.
|
||||||
|
|
||||||
@ -58,7 +61,7 @@ check_pipe_status() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Process command line
|
# Process command line
|
||||||
temp=$(getopt -o h,j: --long help,jobs:,owner:,group:,output-checksums:,checksum-hardlink: -n "$PROGNAME" -- "$@") || cmdline_error
|
temp=$(getopt -o h,j: --long help,jobs:,owner:,group:,output-checksums:,checksum-hardlink:,xtrace -n "$PROGNAME" -- "$@") || cmdline_error
|
||||||
eval set -- "$temp"
|
eval set -- "$temp"
|
||||||
while [[ "$#" -gt 0 ]] ; do
|
while [[ "$#" -gt 0 ]] ; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
@ -89,6 +92,10 @@ while [[ "$#" -gt 0 ]] ; do
|
|||||||
DST_CHECKSUMS_FILE="$2"
|
DST_CHECKSUMS_FILE="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
|
--xtrace)
|
||||||
|
XTRACE=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
--)
|
--)
|
||||||
shift
|
shift
|
||||||
break
|
break
|
||||||
@ -108,6 +115,23 @@ if [[ ! "$EGID" ]] ; then
|
|||||||
EGID="$(id -g)" || exit 1
|
EGID="$(id -g)" || exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ $XTRACE -eq 1 ]] ; then
|
||||||
|
set -x
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Make sure BSD look is installed
|
||||||
|
if ! look --help >/dev/null ; then
|
||||||
|
echo "This script requires \"look\" to be installed" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check for GNU parallel
|
||||||
|
if parallel --help >/dev/null 2>&1 ; then
|
||||||
|
GNU_PARALLEL_EXISTS=1
|
||||||
|
else
|
||||||
|
GNU_PARALLEL_EXISTS=0
|
||||||
|
fi
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
#
|
#
|
||||||
@ -138,46 +162,82 @@ fi
|
|||||||
# Cretate a list file with each source file or dir + their stat properties
|
# Cretate a list file with each source file or dir + their stat properties
|
||||||
echo $'\n## Compiling file list: '"$SRC_DIR" >&2
|
echo $'\n## Compiling file list: '"$SRC_DIR" >&2
|
||||||
full_list_file="$TMP_DIR/full.list"
|
full_list_file="$TMP_DIR/full.list"
|
||||||
( cd "$SRC_DIR" && find -printf 'type=%y owner=%U group=%G mode=%#m size=%s mtime=%T@ checksum= name=%p\n' ) \
|
( cd "$SRC_DIR" && find -printf 'type=%y owner=%U group=%G mode=%#m size=%s mtime=%T@ name=%p\n' ) \
|
||||||
| sed 's#name=[.]/#name=#' \
|
| sed 's#name=[.]/#name=#' \
|
||||||
| sed 's#\(mtime=[0-9]\+\)[.][0-9]\+#\1#g' \
|
| sed 's#\(mtime=[0-9]\+\)[.][0-9]\+#\1#g' \
|
||||||
>"${full_list_file}"
|
>"${full_list_file}"
|
||||||
check_pipe_status
|
check_pipe_status
|
||||||
|
|
||||||
# Create another list file that contains only regular files, and fill in the
|
# Create another list file that contains only regular files
|
||||||
# "checksum=" field.
|
|
||||||
# Use "flock" when printing in xarg's sub-jobs, to avoid interleaved output.
|
|
||||||
echo $'\n## Calculating checksums: '"$SRC_DIR" >&2
|
|
||||||
regfile_list_file="$TMP_DIR/regfile.list"
|
regfile_list_file="$TMP_DIR/regfile.list"
|
||||||
if [[ "$JOBS" -eq 1 ]] ; then
|
\grep '^type=f' "$full_list_file" | sort -k 7 >"$regfile_list_file" || exit 1
|
||||||
let xargs_max_args=256
|
|
||||||
else
|
|
||||||
let xargs_max_args="8" # calculate checksums in chunks of 8 files in parallel
|
|
||||||
fi
|
|
||||||
export SRC_DIR
|
|
||||||
\grep '^type=f' "$full_list_file" | xargs -r -d '\n' -n $xargs_max_args --process-slot-var=OUT_SUFFIX -P $JOBS bash -c '
|
|
||||||
for line in "$@" ; do
|
|
||||||
name="${line##*name=}"
|
|
||||||
flock -s "$SRC_DIR" echo " SHA256 $name" >&2
|
|
||||||
checksum="$(sha256sum "$SRC_DIR/$name" | awk "{print \$1}")"
|
|
||||||
[[ -n "$checksum" ]] || exit 1
|
|
||||||
output_line="${line/ checksum= / checksum=$checksum }"
|
|
||||||
flock -s "$SRC_DIR" echo "$output_line"
|
|
||||||
done
|
|
||||||
' unused_arg | sort -k 8 >"$regfile_list_file" || exit 1 # sort by the last field "name=..."
|
|
||||||
[[ "${PIPESTATUS[1]}" -eq 0 ]] || exit 1
|
|
||||||
|
|
||||||
# Create a list file that contains only directories
|
# Create a list file that contains only directories
|
||||||
# Sort by the last field "name=..."
|
# Sort by the last field "name=..."
|
||||||
dir_list_file="$TMP_DIR/dir.list"
|
dir_list_file="$TMP_DIR/dir.list"
|
||||||
\grep '^type=d' "$full_list_file" | sort -k 8 >"$dir_list_file"
|
\grep '^type=d' "$full_list_file" | sort -k 7 >"$dir_list_file" || exit 1
|
||||||
|
|
||||||
# Create a list file that contains all other entries (non-dirs & non-files)
|
# Create a list file that contains all other entries (non-dirs & non-files)
|
||||||
other_list_file="$TMP_DIR/other.list"
|
other_list_file="$TMP_DIR/other.list"
|
||||||
\grep '^type=[^df]' "$full_list_file" | sort -k 8 >"$other_list_file"
|
\grep '^type=[^df]' "$full_list_file" | sort -k 7 >"$other_list_file" || exit 1
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# create directories
|
# Usage: process_lines MESSAGE INPUT_FILE FUNC ARGS...
|
||||||
|
#
|
||||||
|
# Call shell function FUNC in parallel, similar to xargs.
|
||||||
|
# We will read lines from INPUT_FILE, then pass some subset of lines
|
||||||
|
# to FUNC many times in parallel, until all lines have been processed.
|
||||||
|
# Input lines will be appended as additional arguments to FUNC calls.
|
||||||
|
#
|
||||||
|
# FUNC and any global vars it references must be exported before
|
||||||
|
# calling process_lines().
|
||||||
|
#
|
||||||
|
# MESSAGE will be printed to STDERR before starting
|
||||||
|
#
|
||||||
|
process_lines() {
|
||||||
|
|
||||||
|
local message="$1" ; shift
|
||||||
|
local input_file="$1" ; shift
|
||||||
|
|
||||||
|
# how many input lines? bail out if 0
|
||||||
|
local line_count
|
||||||
|
line_count="$(cat "$input_file" | wc -l)" || exit 1
|
||||||
|
[[ "$line_count" -gt 0 ]] || return 0
|
||||||
|
|
||||||
|
# How many lines to process at a time. The more the better, but with too
|
||||||
|
# many some child jobs may starve -- cap it at 256
|
||||||
|
local lines_per_job
|
||||||
|
if [[ "$JOBS" -gt 1 ]] ; then
|
||||||
|
let lines_per_job="line_count / JOBS / 2"
|
||||||
|
if [[ "$lines_per_job" -eq 0 ]] ; then
|
||||||
|
lines_per_job=1
|
||||||
|
elif [[ "$lines_per_job" -gt 256 ]] ; then
|
||||||
|
lines_per_job=256
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
lines_per_job=256
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "** $message [JOBS=$JOBS lines_per_job=$lines_per_job]" >&2
|
||||||
|
|
||||||
|
# Prefer GNU parallel because it can exit early
|
||||||
|
local -a cmd
|
||||||
|
if [[ $GNU_PARALLEL_EXISTS -eq 1 ]] ; then
|
||||||
|
cmd=(parallel --halt now,fail=1 -q -r -d '\n' -n $lines_per_job -P $JOBS "$@")
|
||||||
|
else
|
||||||
|
cmd=(xargs -r -d '\n' -n $lines_per_job -P $JOBS $SHELL -c '"$@"' unused_arg "$@")
|
||||||
|
fi
|
||||||
|
if ! "${cmd[@]}" <"$input_file" ; then
|
||||||
|
echo "ERROR: command failed (\"$message\")" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# create directories in sort order, ie create parents before
|
||||||
|
# children
|
||||||
#
|
#
|
||||||
echo $'\n## Creating directories: '"$DST_DIR" >&2
|
echo $'\n## Creating directories: '"$DST_DIR" >&2
|
||||||
while read -r line ; do
|
while read -r line ; do
|
||||||
@ -202,158 +262,249 @@ while read -r line ; do
|
|||||||
\rm "$DST_DIR/$name" || exit 1
|
\rm "$DST_DIR/$name" || exit 1
|
||||||
fi
|
fi
|
||||||
install -d "${install_args[@]}" "$DST_DIR/$name"
|
install -d "${install_args[@]}" "$DST_DIR/$name"
|
||||||
done <"$dir_list_file"
|
done <"$dir_list_file" || exit 1
|
||||||
|
|
||||||
#
|
#
|
||||||
# Copy or hardlink regular files
|
# Copy or hardlink regular files
|
||||||
#
|
#
|
||||||
echo $'\n## Copying regular files: '"$SRC_DIR" >&2
|
echo $'\n## Copying regular files: '"$SRC_DIR" >&2
|
||||||
if [[ "$DST_CHECKSUMS_FILE" ]] ; then
|
|
||||||
DST_CHECKSUMS_FD=5
|
|
||||||
exec 5<>"$DST_CHECKSUMS_FILE" || exit 1
|
|
||||||
else
|
|
||||||
DST_CHECKSUMS_FD=1
|
|
||||||
fi
|
|
||||||
# read the list of regular files
|
|
||||||
while read -r line ; do
|
|
||||||
[[ -n "$line" ]] || continue
|
|
||||||
|
|
||||||
# source file name relative to SRC_DIR
|
# helper function to process regular files
|
||||||
name="${line##*name=}"
|
# global vars used:
|
||||||
|
# SRC_DIR
|
||||||
# source checksum
|
# DST_DIR
|
||||||
checksum="$(echo "$line" | sed -n -r 's#.* checksum=([^[:space:]]+).*#\1#p')"
|
# CHANGE_OWNER
|
||||||
[[ -n "$name" && -n "$checksum" ]] || continue
|
# CHANGE_GROUP
|
||||||
|
# EUID (always definedby bash)
|
||||||
# source owner; or a user-provided override
|
# EGID
|
||||||
install_args=()
|
# TMP_DIR
|
||||||
if [[ "$CHANGE_OWNER" ]] ; then
|
# XTRACE
|
||||||
owner="$CHANGE_OWNER"
|
# combined_checksums_file
|
||||||
install_args+=("--owner" "$owner")
|
process_regfiles() {
|
||||||
elif [[ $EUID -eq 0 ]] ; then
|
if [[ $XTRACE -eq 1 ]] ; then
|
||||||
owner="$(echo "$line" | sed -n -r 's#.* owner=([0-9]+).*#\1#p')"
|
set -x
|
||||||
install_args+=("--owner" "$owner")
|
|
||||||
else
|
|
||||||
owner=$EUID
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# source group; or a user-provided override
|
# Temp file generated by this function. Its name must be unique to
|
||||||
if [[ "$CHANGE_GROUP" ]] ; then
|
# prevent interference from other jobs with -j N.
|
||||||
group="$CHANGE_GROUP"
|
local matching_checksums_file
|
||||||
install_args+=("--group" "$group")
|
matching_checksums_file="$TMP_DIR/matching_checksums-$$.list"
|
||||||
elif [[ $EGID -eq 0 ]] ; then
|
|
||||||
group="$(echo "$line" | sed -n -r 's#.* group=([0-9]+).*#\1#p')"
|
|
||||||
install_args+=("--group" "$group")
|
|
||||||
else
|
|
||||||
group=$EGID
|
|
||||||
fi
|
|
||||||
|
|
||||||
# source file's mode/permissions
|
local line
|
||||||
mode="$(echo "$line" | sed -n -r 's#.* mode=([^[:space:]]+).*#\1#p')"
|
for line in "$@" ; do
|
||||||
|
|
||||||
# Search for the checksum in an older StxChecksums file
|
# source file name relative to SRC_DIR
|
||||||
if [[ "$CHECKSUM_FILES_LIST_FILE" ]] ; then
|
local name
|
||||||
matching_checksums_file="$TMP_DIR/matching_checksums.list"
|
name="${line##*name=}"
|
||||||
if \grep "^$checksum " "$combined_checksums_file" >"$matching_checksums_file" ; then
|
[[ "$name" ]] || continue
|
||||||
(
|
|
||||||
# As we read previosuly-archived files properties from StxChecksums,
|
|
||||||
# make sure they have not changed compared to the actual files on disk.
|
|
||||||
while read -r ref_checksum ref_name ref_size ref_mtime ref_dev ref_inode ref_path x_rest ; do
|
|
||||||
[[ -f "$ref_path" ]] || continue
|
|
||||||
# read on-disk file properties
|
|
||||||
ref_stat=($(stat -c '%s %Y %u %g %#04a' "$ref_path" || true))
|
|
||||||
[[ "${#ref_stat[@]}" -eq 5 ]] || continue
|
|
||||||
|
|
||||||
# on-disk size does not match StxChecksums
|
# source checksum
|
||||||
ref_ondisk_size="${ref_stat[0]}"
|
local checksum
|
||||||
[[ "$ref_size" == "$ref_ondisk_size" ]] || continue
|
#flock -s "$DST_DIR" echo " SHA256 $name" >&2
|
||||||
|
checksum="$(sha256sum "$SRC_DIR/$name" | awk '{print $1}')"
|
||||||
# on-disk mtime does not match StxChecksums
|
if [[ ! "$checksum" ]] ; then
|
||||||
ref_ondisk_mtime="${ref_stat[1]}"
|
flock -s "$DST_DIR" echo "$SRC_DIR/$name: failed to calculate checksum" >&2
|
||||||
[[ "${ref_mtime}" == "$ref_ondisk_mtime" ]] || continue
|
return 1
|
||||||
|
|
||||||
# on-disk owner does not match requested owner
|
|
||||||
ref_ondisk_owner="${ref_stat[2]}"
|
|
||||||
[[ "${owner}" == "$ref_ondisk_owner" ]] || continue
|
|
||||||
|
|
||||||
# on-disk group does not match requested group
|
|
||||||
ref_ondisk_group="${ref_stat[3]}"
|
|
||||||
[[ "${group}" == "$ref_ondisk_group" ]] || continue
|
|
||||||
|
|
||||||
# on-disk mode does not match the mode of the source file
|
|
||||||
ref_ondisk_mode="${ref_stat[4]}"
|
|
||||||
[[ "${mode}" == "$ref_ondisk_mode" ]] || continue
|
|
||||||
|
|
||||||
# At this point checksum, size, mtime, mode, owner, group and checksums of the
|
|
||||||
# exsiting file match with the file we are trying to copy.
|
|
||||||
# Use that file to create a hardlink.
|
|
||||||
echo " LINK $name (from $ref_name)" >&2
|
|
||||||
if ln -f "$ref_name" "${DST_DIR}/$name" ; then
|
|
||||||
echo "$checksum $name $ref_size $ref_mtime $ref_dev $ref_inode $DST_DIR/$name"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
done <"$matching_checksums_file"
|
|
||||||
# checksum not found in older archives
|
|
||||||
exit 1
|
|
||||||
) && continue || true
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# source owner; or a user-provided override
|
||||||
|
local -a install_args=()
|
||||||
|
local owner
|
||||||
|
if [[ "$CHANGE_OWNER" ]] ; then
|
||||||
|
owner="$CHANGE_OWNER"
|
||||||
|
install_args+=("--owner" "$owner")
|
||||||
|
elif [[ $EUID -eq 0 ]] ; then
|
||||||
|
owner="$(echo "$line" | sed -n -r 's#.* owner=([0-9]+).*#\1#p')"
|
||||||
|
install_args+=("--owner" "$owner")
|
||||||
|
else
|
||||||
|
owner=$EUID
|
||||||
|
fi
|
||||||
|
|
||||||
|
# source group; or a user-provided override
|
||||||
|
local group
|
||||||
|
if [[ "$CHANGE_GROUP" ]] ; then
|
||||||
|
group="$CHANGE_GROUP"
|
||||||
|
install_args+=("--group" "$group")
|
||||||
|
elif [[ $EGID -eq 0 ]] ; then
|
||||||
|
group="$(echo "$line" | sed -n -r 's#.* group=([0-9]+).*#\1#p')"
|
||||||
|
install_args+=("--group" "$group")
|
||||||
|
else
|
||||||
|
group=$EGID
|
||||||
|
fi
|
||||||
|
|
||||||
|
# source file's mode/permissions
|
||||||
|
local mode
|
||||||
|
mode="$(echo "$line" | sed -n -r 's#.* mode=([^[:space:]]+).*#\1#p')"
|
||||||
|
|
||||||
|
# Search for the checksum in an older StxChecksums file
|
||||||
|
if [[ "$combined_checksums_file" ]] ; then
|
||||||
|
if look "$checksum " "$combined_checksums_file" >"$matching_checksums_file" ; then
|
||||||
|
(
|
||||||
|
# As we read previosuly-archived files properties from StxChecksums,
|
||||||
|
# make sure they have not changed compared to the actual files on disk.
|
||||||
|
while read -r ref_checksum ref_name ref_size ref_mtime ref_dev ref_inode ref_path x_rest ; do
|
||||||
|
[[ -f "$ref_path" ]] || continue
|
||||||
|
# read on-disk file properties
|
||||||
|
local ref_stat
|
||||||
|
ref_stat=($(stat -c '%s %Y %u %g %#04a' "$ref_path" || true))
|
||||||
|
[[ "${#ref_stat[@]}" -eq 5 ]] || continue
|
||||||
|
|
||||||
|
# on-disk size does not match StxChecksums
|
||||||
|
local ref_ondisk_size
|
||||||
|
ref_ondisk_size="${ref_stat[0]}"
|
||||||
|
[[ "$ref_size" == "$ref_ondisk_size" ]] || continue
|
||||||
|
|
||||||
|
# on-disk mtime does not match StxChecksums
|
||||||
|
local ref_ondisk_mtime
|
||||||
|
ref_ondisk_mtime="${ref_stat[1]}"
|
||||||
|
[[ "${ref_mtime}" == "$ref_ondisk_mtime" ]] || continue
|
||||||
|
|
||||||
|
# on-disk owner does not match requested owner
|
||||||
|
local ref_ondisk_owner
|
||||||
|
ref_ondisk_owner="${ref_stat[2]}"
|
||||||
|
[[ "${owner}" == "$ref_ondisk_owner" ]] || continue
|
||||||
|
|
||||||
|
# on-disk group does not match requested group
|
||||||
|
local ref_ondisk_group
|
||||||
|
ref_ondisk_group="${ref_stat[3]}"
|
||||||
|
[[ "${group}" == "$ref_ondisk_group" ]] || continue
|
||||||
|
|
||||||
|
# on-disk mode does not match the mode of the source file
|
||||||
|
ref_ondisk_mode="${ref_stat[4]}"
|
||||||
|
[[ "${mode}" == "$ref_ondisk_mode" ]] || continue
|
||||||
|
|
||||||
|
# At this point checksum, size, mtime, mode, owner, group and checksums of the
|
||||||
|
# exsiting file match with the file we are trying to copy.
|
||||||
|
# Use that file to create a hardlink.
|
||||||
|
flock -s "$DST_DIR" echo " LINK $name (from $ref_name)" >&2
|
||||||
|
if ln -f "$ref_name" "${DST_DIR}/$name" ; then
|
||||||
|
flock -s "$DST_DIR" echo "$checksum $name $ref_size $ref_mtime $ref_dev $ref_inode $DST_DIR/$name"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
done <"$matching_checksums_file"
|
||||||
|
# checksum not found in older archives
|
||||||
|
exit 1
|
||||||
|
) && continue || true
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# No matching files found: really copy it
|
||||||
|
|
||||||
|
if [[ -e "$DST_DIR/$name" ]] ; then
|
||||||
|
\rm "$DST_DIR/$name" || exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# source file's size & mtime
|
||||||
|
local size mtime
|
||||||
|
size="$(echo "$line" | sed -n -r 's#.* size=([^[:space:]]+).*#\1#p')"
|
||||||
|
mtime="$(echo "$line" | sed -n -r 's#.* mtime=([^[:space:]]+).*#\1#p')"
|
||||||
|
|
||||||
|
# copy it to $DST_DIR
|
||||||
|
flock -s "$DST_DIR" echo " COPY $name" >&2
|
||||||
|
rm -f "$DST_DIR/$name" || exit 1
|
||||||
|
install --preserve-timestamps "${install_args[@]}" --mode="$mode" -T "$SRC_DIR/$name" "$DST_DIR/$name" || exit 1
|
||||||
|
|
||||||
|
# check destination file properties
|
||||||
|
local dst_stat dst_size dst_dev dst_ino
|
||||||
|
dst_stat=($(stat -c '%s %d %i' "$DST_DIR/$name")) || exit 1
|
||||||
|
dst_size="${dst_stat[0]}"
|
||||||
|
dst_dev="${dst_stat[1]}"
|
||||||
|
dst_ino="${dst_stat[2]}"
|
||||||
|
|
||||||
|
# file changed while copying
|
||||||
|
if [[ "$dst_size" != "$size" ]] ; then
|
||||||
|
flock -s "$DST_DIR" echo "ERROR: $SRC_DIR/$name changed while copying!" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# print out a line for StxChecksums using source file properties (preserved
|
||||||
|
# during copying), but with destination file's dev & ino.
|
||||||
|
flock -s "$DST_DIR" echo "$checksum $name $size $mtime $dst_dev $dst_ino $DST_DIR/$name"
|
||||||
|
done
|
||||||
|
|
||||||
|
rm -f "$matching_checksums_file"
|
||||||
|
}
|
||||||
|
|
||||||
|
# process files in parallel
|
||||||
|
(
|
||||||
|
if [[ "$DST_CHECKSUMS_FILE" ]] ; then
|
||||||
|
dst_checksums_fd=5
|
||||||
|
exec 5<>"$DST_CHECKSUMS_FILE" || exit 1
|
||||||
|
else
|
||||||
|
dst_checksums_fd=1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# No matching files found: really copy it
|
export SRC_DIR \
|
||||||
|
DST_DIR \
|
||||||
|
CHANGE_OWNER \
|
||||||
|
CHANGE_GROUP \
|
||||||
|
EGID \
|
||||||
|
TMP_DIR \
|
||||||
|
XTRACE \
|
||||||
|
combined_checksums_file
|
||||||
|
|
||||||
if [[ -e "$DST_DIR/$name" ]] ; then
|
export -f process_regfiles
|
||||||
\rm "$DST_DIR/$name" || exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# source file's size & mtime
|
message="processing regular files"
|
||||||
size="$(echo "$line" | sed -n -r 's#.* size=([^[:space:]]+).*#\1#p')"
|
process_lines "$message" "$regfile_list_file" process_regfiles | sort >&$dst_checksums_fd
|
||||||
mtime="$(echo "$line" | sed -n -r 's#.* mtime=([^[:space:]]+).*#\1#p')"
|
[[ "${PIPESTATUS[0]}" -eq 0 && "${PIPESTATUS[1]}" -eq 0 ]] || exit 1
|
||||||
|
) || exit 1
|
||||||
|
|
||||||
# copy it to $DST_DIR
|
|
||||||
echo " COPY $name" >&2
|
|
||||||
rm -f "$DST_DIR/$name"
|
|
||||||
install --preserve-timestamps "${install_args[@]}" --mode="$mode" -T "$SRC_DIR/$name" "$DST_DIR/$name" || exit 1
|
|
||||||
|
|
||||||
# check destination file properties
|
|
||||||
dst_stat=($(stat -c '%s %d %i' "$DST_DIR/$name")) || exit 1
|
|
||||||
dst_size="${dst_stat[0]}"
|
|
||||||
dst_dev="${dst_stat[1]}"
|
|
||||||
dst_ino="${dst_stat[2]}"
|
|
||||||
|
|
||||||
# file changed while copying
|
|
||||||
if [[ "$dst_size" != "$size" ]] ; then
|
|
||||||
echo "ERROR: $SRC_DIR/$name changed while copying!" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# print out a line for StxChecksums using source file properties (preserved
|
|
||||||
# during copying), but with destination file's dev & ino.
|
|
||||||
echo "$checksum $name $size $mtime $dst_dev $dst_ino $DST_DIR/$name"
|
|
||||||
done <"$regfile_list_file" >&$DST_CHECKSUMS_FD
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# copy special files
|
# copy special files
|
||||||
#
|
#
|
||||||
echo $'\n## Copying special files: '"$DST_DIR" >&2
|
echo $'\n## Copying special files: '"$DST_DIR" >&2
|
||||||
while read -r line ; do
|
|
||||||
[[ -n "$line" ]] || continue
|
|
||||||
name="${line##*name=}"
|
|
||||||
type="$(echo "$line" | sed 's#^type=\(.\) .*#\1#g')"
|
|
||||||
[[ -n "$name" && -n "$type" ]] || continue
|
|
||||||
echo " CREATE type=$type $name" >&2
|
|
||||||
if [[ -e "$DST_DIR/$name" ]] ; then
|
|
||||||
rm "$DST_DIR/$name" || exit 1
|
|
||||||
fi
|
|
||||||
cp -a --no-dereference "$SRC_DIR/$name" "$DST_DIR/$name" || exit 1
|
|
||||||
if [[ "$CHANGE_OWNER" || "$CHANGE_GROUP" ]] ; then
|
|
||||||
chown_arg=
|
|
||||||
if [[ "$CHANGE_OWNER" ]] ; then
|
|
||||||
chown_arg="$CHANGE_OWNER"
|
|
||||||
fi
|
|
||||||
if [[ "$CHANGE_GROUP" ]] ; then
|
|
||||||
chown_arg+=":$CHANGE_GROUP"
|
|
||||||
fi
|
|
||||||
chown --no-dereference "$chown_arg" "$DST_DIR/$name" || exit 1
|
|
||||||
fi
|
|
||||||
done <"$other_list_file"
|
|
||||||
|
|
||||||
|
# helper function for processing special files
|
||||||
|
# global vars used:
|
||||||
|
# SRC_DIR
|
||||||
|
# DST_DIR
|
||||||
|
# CHANGE_OWNER
|
||||||
|
# CHANGE_GROUP
|
||||||
|
# XTRACE
|
||||||
|
process_other() {
|
||||||
|
if [[ $XTRACE -eq 1 ]] ; then
|
||||||
|
set -x
|
||||||
|
fi
|
||||||
|
local line
|
||||||
|
for line in "$@" ; do
|
||||||
|
local name
|
||||||
|
name="${line##*name=}"
|
||||||
|
[[ -n "$name" ]] || continue
|
||||||
|
|
||||||
|
local type
|
||||||
|
type="$(echo "$line" | sed 's#^type=\(.\) .*#\1#g')"
|
||||||
|
[[ -n "$type" ]] || continue
|
||||||
|
|
||||||
|
flock -s "$DST_DIR" echo " CREATE type=$type $name" >&2
|
||||||
|
if [[ -e "$DST_DIR/$name" ]] ; then
|
||||||
|
rm "$DST_DIR/$name" || exit 1
|
||||||
|
fi
|
||||||
|
cp -a --no-dereference "$SRC_DIR/$name" "$DST_DIR/$name" || exit 1
|
||||||
|
if [[ "$CHANGE_OWNER" || "$CHANGE_GROUP" ]] ; then
|
||||||
|
local chown_arg=
|
||||||
|
if [[ "$CHANGE_OWNER" ]] ; then
|
||||||
|
chown_arg="$CHANGE_OWNER"
|
||||||
|
fi
|
||||||
|
if [[ "$CHANGE_GROUP" ]] ; then
|
||||||
|
chown_arg+=":$CHANGE_GROUP"
|
||||||
|
fi
|
||||||
|
chown --no-dereference "$chown_arg" "$DST_DIR/$name" || exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# process them in parallel
|
||||||
|
(
|
||||||
|
export SRC_DIR \
|
||||||
|
DST_DIR \
|
||||||
|
CHANGE_OWNER \
|
||||||
|
CHANGE_GROUP \
|
||||||
|
XTRACE
|
||||||
|
|
||||||
|
export -f process_other
|
||||||
|
|
||||||
|
message="processing other files"
|
||||||
|
process_lines "$message" "$other_list_file" process_other || exit 1
|
||||||
|
) || exit 1
|
||||||
|
@ -35,7 +35,7 @@ export REPO_TRACE=0
|
|||||||
|
|
||||||
# docker images
|
# docker images
|
||||||
SAFE_RSYNC_DOCKER_IMG="servercontainers/rsync:3.1.3"
|
SAFE_RSYNC_DOCKER_IMG="servercontainers/rsync:3.1.3"
|
||||||
COREUTILS_DOCKER_IMG="debian:bullseye-20220509"
|
COREUTILS_DOCKER_IMG="starlingx/jenkins-pipelines-coreutils:20230529"
|
||||||
|
|
||||||
notice() {
|
notice() {
|
||||||
( set +x ; print_log -i --notice "$@" ; )
|
( set +x ; print_log -i --notice "$@" ; )
|
||||||
|
Loading…
x
Reference in New Issue
Block a user