Michel Thebeau 464f9d0e76 Conversion of storage during application update
Add lifecycle code to read secrets from PVC mounted to running
vault-manager, and vault-manager code for conversion of storage from PVC
to k8s secrets.

The lifecycle code is added because the previous version of
vault-manager does not respond to SIGTERM from kubernetes for
termination.  And yet the pod will be terminating when the new
vault-manager pod runs.  Reading the PVC data in lifecycle code before
helm updates the charts simplifies the process when vault-manager is
running during application-update.

The new vault-manager also handles the case where the application is not
running at the time the application is updated, such as if the
application is removed, deleted, uploaded and applied.

In general the procedure for conversion of the storage from PVC to k8s
secrets is:
 - read the data from PVC
 - store the data in k8s secrets
 - validate the data
 - confirm the stored data is the same as what was in PVC
 - delete the original data only when the copy is confirmed

The solution employs a 'mount-helper', an incarnation of init.sh,
that mounts the PVC resource so that vault-manager can read it.  The
mount-helper mounts the PVC resource and waits to be terminated.

Test plan:
PASS  vault sanity
PASS  vault sanity via application-update
PASS  vault sanity update via application remove, delete, upload, apply
      (update testing requires version bump similar to change 881754)
PASS  unit test of the code
PASS  bashate, flake8, bandit
PASS  tox

Story: 2010930
Task: 48846

Change-Id: Iace37dad256b50f8d2ea6741bca070b97ec7d2d2
Signed-off-by: Michel Thebeau <Michel.Thebeau@windriver.com>
2023-11-02 15:12:47 +00:00

1263 lines
40 KiB
YAML

apiVersion: v1
data:
init.sh: |
#!/bin/bash
# Get the CA path from environment vars
CERT=$CA_CERT
# Store cert as a oneliner for curl purposes
CA_ONELINE=$(awk '{printf "%s\\n", $0}' $CERT)
# Template vaules from helm
VAULT_NS={{ .Release.Namespace }}
VAULT_NAME={{ template "vault.name" .}}
VAULT_FN={{ template "vault.fullname" . }}
HA_REPLICAS={{ .Values.server.ha.replicas }}
# Set the domain for resolving pod names
DOMAIN="${VAULT_NS}.pod.cluster.local"
SVCDOMAIN="${VAULT_NS}.svc.cluster.local"
# impermanent location to store files while running
WORKDIR=/workdir
mkdir $WORKDIR
# Trap and trap notification file. When SIGTERM is sent to this pod
# we want to exit promptly and gracefully.
TRAPFILE=$WORKDIR/exit_on_trap
trap "touch $TRAPFILE" SIGTERM
# when specifying a trap for debug, remember it with this variable
# reserve trap '0' for disabling a debugging trap request
DEBUGGING_TRAP=0
# Maximum sleep seconds for mount-helper before exiting
MOUNT_HELPER_MAX_TIME=60
# Maximum seconds to wait for mount-helper pod to start
MAX_POD_RUN_TRIES=10
# Maximum seconds to wait for vault-manager pod to exit
# Vault-manager is not responding to SIGTERM, so will take 30
# seconds
TERMINATE_TRIES_MAX=6
TERMINATE_TRIES_SLEEP=5
# Vault key share configuration
KEY_SECRET_SHARES=5
KEY_REQUIRED_THRESHOLD=3
# Records for seal status state machine:
PODREC_F="$WORKDIR/previous_pods_status.txt"
PODREC_TMP_F="$WORKDIR/new_pods_status.txt"
# Vault server health query timeout during HA recovery scenario
QUERY_TMOUT={{ .Values.manager.healthQueryTimeout }}
STATEFULSET_RATE=5
INIT_CONVERGE_TIME=10
JOIN_RATE=5
JOIN_CONVERGE_TIME=1
UNSEAL_RATE=10
UNSEAL_CONVERGE_TIME=3
STATUS_RATE={{ .Values.manager.statusCheckRate }}
if [ -z "$STATUS_RATE" ]; then
STATUS_RATE=5
fi
# with STATUS_RATE, the period to delay unseal
# STATUS_RATE * STATEMACH_START seconds
STATEMACH_START={{ .Values.manager.unsealWaitIntervals }}
if [ -z "$STATEMACH_START" ]; then
STATEMACH_START=3
fi
# Log levels
DEBUG=1
INFO=2
WARNING=3
ERROR=4
FATAL=5
# Default log level and the set log level (Initially set as default).
# If the log function detects an override file, then it will switch
# the set log level and then delete it.
DEFAULT_LOG_LEVEL=$INFO
LOG_LEVEL={{ .Values.manager.log.defaultLogLevel }}
LOG_OVERRIDE_FILE="$WORKDIR/log_level"
# FUNCTIONS
# Convert log level to text for log message
function log_to_str {
local level="$1"
local logStr
case "$level" in
$INFO)
logStr="INFO"
;;
$DEBUG)
logStr="DEBUG"
;;
$WARNING)
logStr="WARNING"
;;
$ERROR)
logStr="ERROR"
;;
$FATAL)
logStr="FATAL"
;;
esac
echo "$logStr"
}
# Print the specified message to stdout if the call's specified
# level is at least the configured log level
function log {
local lvl="$1"
local logStr
local newLogLevel
# check if log override file "Exists"
if [ -f $LOG_OVERRIDE_FILE ]; then
newLogLevel=$(cat $LOG_OVERRIDE_FILE)
# validation for newLogLevel
if [[ "$newLogLevel" =~ ^[1-5]$ ]]; then
LOG_LEVEL=$newLogLevel
logStr="$( log_to_str "$LOG_LEVEL" )"
echo "$(date +%Y-%m-%dT%H-%M-%S) DEBUG" \
"Log level set to $logStr"
else
echo "$(date +%Y-%m-%dT%H-%M-%S) DEBUG" \
"Invalid log level read from $LOG_OVERRIDE_FILE."
fi
rm $LOG_OVERRIDE_FILE
fi
# validate LOG_LEVEL. If it is not valid, then use
# DEFAULT_LOG_LEVEL instead.
if [[ ! "$LOG_LEVEL" =~ ^[1-5]$ ]]; then
echo "$(date +%Y-%m-%dT%H-%M-%S) DEBUG" \
"Invalid log level detected, will be set to" \
"$( log_to_str "$DEFAULT_LOG_LEVEL" )"
LOG_LEVEL=$DEFAULT_LOG_LEVEL
fi
# check if the log level for this call is equal to or higher
# than the set log level
if [ "$lvl" -ge "$LOG_LEVEL" ]; then
# print log
logStr="$( log_to_str "$lvl" )"
echo "$(date +%Y-%m-%dT%H-%M-%S) $logStr ${@:2}"
fi
}
if ! [[ "$QUERY_TMOUT" =~ ^[0-9]+$ ]]; then
log $WARNING ".Values.manager.healthQueryTimeout not an integer"
QUERY_TMOUT=""
fi
function exit_on_trap {
local trap="$1"
local tfnum=""
if [ -e "$TRAPFILE" ]; then
tfnum=$(cat $TRAPFILE)
log $DEBUG "exit_on_trap: removing $TRAPFILE"
rm "$TRAPFILE" # for workdir on PVC
if [ -z "$tfnum" ]; then
# an empty trap file is the default expected behaviour
log $INFO "exit_on_trap: ($trap)"
exit
# handle trap debugging feature - a developer specifies the
# trap number to target a specific exit_on_trap call.
# Setting a value of 0 (zero) disables the debugging trap
elif [ "$tfnum" -eq 0 ]; then
log $DEBUG "exit_on_trap: ($trap):" \
"disable debug trap ($DEBUGGING_TRAP)"
DEBUGGING_TRAP=0
# there is no trap with value zero
return
else
DEBUGGING_TRAP="$tfnum"
log $DEBUG "exit_on_trap: ($trap): " \
"enable debug trap ($DEBUGGING_TRAP)"
# check now just in case it matches
if [ "$DEBUGGING_TRAP" -eq "$trap" ]; then
log $INFO "exit_on_trap: ($trap): matching"
exit
fi
fi
# check if there is a matching debug trap set
elif [ "$DEBUGGING_TRAP" -eq "$trap" ]; then
log $INFO "exit_on_trap: ($trap): matching"
exit
else
log $DEBUG "exit_on_trap: ($trap): no trap file, no exit"
fi
}
# splits keys into separate files. Each file contains the key and the base64 encoded version.
# root token will be stored separately
function splitShard {
local index="$1"
jq '{"keys": [.keys['$index']], "keys_base64": [.keys_base64['$index']]}'
}
# merges two split keys
function mergeKeyJson {
# the two parameters are names for variables
local jstr1="$1"
local jstr2="$2"
mkfifo "$WORKDIR"/s1
mkfifo "$WORKDIR"/s2
(
jq -Mn --argfile file1 $WORKDIR/s1 --argfile file2 $WORKDIR/s2 '
def mergek: ($file1, $file2) | .keys as $k | $k;
def mergeb: ($file1, $file2) | .keys_base64 as $b | $b;
{keys: (reduce mergek as $x ([]; . + $x)),
keys_base64: (reduce mergeb as $x ([]; . + $x))}
' & ) 2>/dev/null
echo -n "${!jstr1}" > "$WORKDIR"/s1
echo -n "${!jstr2}" > "$WORKDIR"/s2
rm -f "$WORKDIR"/s1 "$WORKDIR"/s2
}
# Check the structure of json data and confirm equivalence of
# the stdin with stored secrets
#
# Returns the normal linux success=0, failure!=0
function validateSecrets {
local index
local text
local keys
local keys_base64
local root_token
local root_saved
local count
local saved
local shaA
local shaB
text=$( cat )
keys=$( echo "$text" | jq '.keys' )
keys_base64=$( echo "$text" | jq '.keys_base64' )
root_token=$( echo "$text" | jq -r '.root_token' )
# response is 'null' if the dict key is missing
# response is empty (-z) is the source document is empty
if [ -z "$keys" -o "$keys" == "null" \
-o -z "$keys_base64" -o "$keys_base64" == "null" \
-o -z "$root_token" -o "$root_token" == "null" ]; then
log $ERROR "one or more missing keys"
return 1
fi
count=$( echo "$keys" | jq '. | length' )
if [ $? -ne 0 ]; then
log $ERROR "jq did not parse keys length"
return 1
fi
if [ -z "$count" ] || [ "$count" -ne "$KEY_SECRET_SHARES" ]; then
log $ERROR "Incorrect array length for keys:" \
"$count instead of $KEY_SECRET_SHARES"
return 1
fi
count=$( echo "$keys_base64" | jq '. | length' )
if [ $? -ne 0 ]; then
log $ERROR "jq did not parse keys_base64 length"
return 1
fi
if [ -z "$count" ] || [ "$count" -ne "$KEY_SECRET_SHARES" ]; then
log $ERROR "Incorrect array length for keys_base64:" \
"$count instead of $KEY_SECRET_SHARES"
return 1
fi
# pull secrets from k8s and merge into one json file.
for index in $( seq 0 $(( KEY_SECRET_SHARES - 1 )) ); do
keys="$(get_secret cluster-key-$index)"
if [ "$index" -eq 0 ]; then
saved="$keys"
continue
fi
saved=$( mergeKeyJson saved keys )
done
root_saved="$( get_secret cluster-key-root )"
saved=$( echo "$saved" | jq \
-c '{keys: .keys, keys_base64: .keys_base64, root_token: "'$root_saved'"}' )
# finally ensure that the saved secrets are the same as the
# supplied text
shaA=$( echo "$text" | sha256sum )
shaB=$( echo "$saved" | sha256sum )
if [ "$shaA" != "$shaB" ]; then
log $ERROR "saved data differs from source data"
return 1
fi
log $INFO "Verified stored secrets are the same as supplied data"
return 0
}
# Creates a list of all k8s vault pods and stores in text file.
# Converts ips from X.X.X.X or a:b:c::d to X-X-X-X for use as pod
# dns names
function getVaultPods {
kubectl get pods \
-n "$VAULT_NS" \
-l component=server,app.kubernetes.io/name=vault \
-o=jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.status.podIPs[].ip}{"\n"}{end}' \
> $WORKDIR/pods.txt
sed -i 's/\.\|:/-/g' $WORKDIR/pods.txt
}
# Wait for the vault servers in the stateful set to be
# created before initializing
function waitForPods {
local jsonPath='{range .items[*]}{.metadata.name}{"\t"} \
{.status.podIPs[].ip}{"\t"}{.status.phase}{"\n"} \
{end}'
CURRENT_PODS=$(kubectl get pods \
-l component=server,app.kubernetes.io/name=vault \
-o=jsonpath="$jsonPath" \
| grep Running \
| wc -l)
DESIRED_PODS=$1
if ! [[ "$CURRENT_PODS" =~ ^[0-9]+$ ]]; then
log $ERROR "Invalid Running pod number ($CURRENT_PODS) from kubectl get pods"
CURRENT_PODS=0
fi
while [ $CURRENT_PODS -lt $DESIRED_PODS ]; do
sleep "$STATEFULSET_RATE"
log $INFO "Waiting for ${VAULT_FN}" \
"statefulset running pods ($CURRENT_PODS) to equal" \
"desired pods ($DESIRED_PODS)"
CURRENT_PODS=$(kubectl get pods \
-l component=server,app.kubernetes.io/name=vault \
-o=jsonpath="$jsonPath" \
| grep Running \
| wc -l)
done
}
# Takes the json document output from vault initialization
# and stores it into secrets for key shards and the root token
#
# This only works if the secrets are not pre-existing. An error
# is printed by set_secrets.
function storeVaultInitSecrets {
local secrets="$1"
local index
local split_json
for index in $(seq 0 $((KEY_SECRET_SHARES - 1 ))); do
split_json=$( echo -n "$secrets" | splitShard "$index" )
set_secret "cluster-key-$index" /dev/stdin <<< "$split_json"
done
split_json=$( echo "$secrets" | jq -r '.root_token' )
set_secret "cluster-key-root" /dev/stdin <<< "$split_json"
}
# Initializes the first vault pod, only needs to be performed once
# after deploying the helm chart
# Stores the root token and master key shards in k8s secrets
function initVault {
local V0
local keys
local key_error
local shares
local threshold
V0=$(awk 'NR==1{print $2}' $WORKDIR/pods.txt)
log $INFO "Initializing $V0"
shares='"secret_shares": '$KEY_SECRET_SHARES
threshold='"secret_threshold": '$KEY_REQUIRED_THRESHOLD
keys=$(
curl -s \
--cacert $CERT \
--request POST \
--data "{$shares, $threshold}" \
https://$V0.$DOMAIN:8200/v1/sys/init
)
key_error=$(echo -n "$keys"| jq -r '.errors[]?')
if [ -n "$key_error" ]; then
log $ERROR "vault init request failed: $key_error"
fi
storeVaultInitSecrets "$keys"
# check if the secrets match vault's REST API response
echo "$keys" | validateSecrets
}
# Uses the master key shards to unseal vault
function unsealVault {
local index
local b64key
local key_pair
local key_response
local key_error
for index in $(seq 0 $((KEY_SECRET_SHARES - 1 ))); do
b64key=$( get_secret "cluster-key-$index" | jq -r '.keys_base64[]' )
key_pair="{\"key\": \"$b64key\"}"
key_response=$(curl -s --cacert $CERT --request POST -d "$key_pair" \
https://$VAULT.$DOMAIN:8200/v1/sys/unseal
)
key_error=$(echo -n "$key_response"| jq -r '.errors[]?')
if [ -n "$key_error" ]; then
log $ERROR "Vault unseal request failed: $key_error"
fi
# Some sleep is required to allow Raft convergence
sleep "$UNSEAL_CONVERGE_TIME"
done
}
# Takes the address of vault-0 as the cluster leader and
# joins other nodes to raft
function joinRaft {
CLUSTER_LEAD=$(awk 'NR==1{print $2}' $WORKDIR/pods.txt)
ROOT_TOKEN=$( get_secret cluster-key-root )
local activeLink="https://sva-$VAULT_NAME-active.$SVCDOMAIN:8200"
local dataJson="{\"leader_api_addr\": \"$activeLink\", \"leader_ca_cert\": \"$CA_ONELINE\"}"
RAFT_STATUS=""
while [ "$RAFT_STATUS" != "true" ]; do
RAFT_STATUS=$(curl -s \
--cacert $CERT \
-H "X-Vault-Token: $ROOT_TOKEN" \
--request POST \
--data "$dataJson" \
https://$row.$DOMAIN:8200/v1/sys/storage/raft/join)
log $INFO "$row $RAFT_STATUS"
RAFT_STATUS=$(echo $RAFT_STATUS | jq -r .joined)
sleep "$JOIN_CONVERGE_TIME"
done
}
# Simply calls the status check of a vault, used to check if it is
# initialized, unsealed, or part of raft cluster
function vaultServerStatus {
local tmout=""
if [ -n "$1" ]; then
tmout="--connect-timeout $1"
fi
curl $tmout --cacert $CERT -s \
https://$row.$DOMAIN:8200/v1/sys/health
}
function runStateMachine {
local host="$1"
local dns_name="$2"
local sealed="$3"
local VAULT="$dns_name"
local status_rec
local old_rec
local counter
status_rec="/$host/$dns_name/$sealed/"
# log compression: do not print logs when status is unchanged
# omit counter when checking vault server state change
old_rec="$( grep "$status_rec" "$PODREC_F" )"
if [ $? -ne 0 ]; then
log $DEBUG "$( grep "$dns_name" $WORKDIR/pods.txt )"
log $INFO "Sealed status of $dns_name is now: $sealed"
# reread the record by hostname only
old_rec="$( grep "^/$host/" "$PODREC_F" )"
else
log $DEBUG "There is no change in pod seal status"
fi
if [ "$sealed" != "true" ]; then
# There is nothing more to do: the vault is unsealed
# or the sealed status is unclear
echo "$status_rec" >> "$PODREC_TMP_F"
return
fi
# The vault is sealed
#
# Check if there is a countdown in progress
#
# else -z old_rec: "the pod didn't have an IP address the last
# iteration, but now it does" - treat the same as "sealed
# without a countdown"
counter=""
if [ -n "$old_rec" ]; then
counter="$( echo "$old_rec" | awk -F/ '{print $5}' )"
fi
if [ -z "$counter" ]; then
# sealed without a countdown: start counting
log $DEBUG "Sealed vault $host: begin unseal delay:" \
"$( expr "$STATUS_RATE" \* "$STATEMACH_START" )s"
echo "${status_rec}${STATEMACH_START}" >> "$PODREC_TMP_F"
return
fi
# Check for end of period: 1 means "zero at this interval"
# "less than 1" for resilience
if [ "$counter" -le 1 -o "$STATEMACH_START" -eq 0 ]; then
# We've waited (STATUS_RATE * STATEMACH_START) seconds
# Or, STATEMACH_START == 0 means do not delay
log $INFO "Unsealing $dns_name"
unsealVault
echo "$status_rec" >> "$PODREC_TMP_F"
return
fi
# finally, continue to countdown
counter="$( expr "$counter" - 1 )"
echo "${status_rec}${counter}" >> "$PODREC_TMP_F"
}
function vaultInitialized {
row=$(awk 'NR==1{print $2}' $WORKDIR/pods.txt)
vaultServerStatus > $WORKDIR/healthcheck.txt
IS_VAULT_INITIALIZED=$(cat $WORKDIR/healthcheck.txt | jq -r .initialized)
text="$( grep $row $WORKDIR/pods.txt )"
if [ $? -eq 0 ]; then
log $DEBUG "$text"
log $DEBUG "Initialized status is $IS_VAULT_INITIALIZED"
fi
# The empty check is here as a extra safety net, but an investigation into
# in which exact conditions would IS_VAULT_INITIALIZED be empty would be helpful.
if [ ! -z $IS_VAULT_INITIALIZED ] && [ $IS_VAULT_INITIALIZED = false ]; then
return 1
else
return 0
fi
}
function set_secret {
local secret="$1"
local contentf="$2"
local output
local result
output="$( kubectl create secret generic -n "$VAULT_NS" \
"$secret" "--from-file=strdata=$contentf" 2>&1 )"
result=$?
if [ "$result" -ne 0 ]; then
log $ERROR "Failed to create secret $secret"
log $DEBUG "Output: [$output]"
fi
return $result
}
function get_secret {
local secret="$1"
kubectl get secrets -n "$VAULT_NS" "$secret" \
-o jsonpath='{.data.strdata}' \
| base64 -d
}
# When vault-manager is run in "MOUNT_HELPER" mode, this function
# will not return. Instead the function will exit_on_trap or exit
# when it times-out.
#
# Basically: this function doesn't do anything except wait to be
# terminated.
#
# Vault-manager in MOUNT_HELPER has PVC mounted, allowing the real
# vault-manager to read secrets from cluster_keys.json
function mountHelper {
local count
# omit this function if this pod is not the mount helper
if [ -z "$MANAGER_MODE" -o "$MANAGER_MODE" != "MOUNT_HELPER" ]; then
log $INFO "Mode is VAULT_MANAGER"
return
fi
# When vault-manager is running in this mode, it should be
# deleted by vault-manager running in the default mode, which
# is using this pod to read secrets from mounted PVC
log $INFO "Mode is $MANAGER_MODE"
# start with some debug/error logs
if [ -f "$PVC_DIR/cluster_keys.json" ]; then
log $DEBUG "Successfully mounted secrets file"
else
log $WARNING "Secrets file not found"
fi
# sleep for MOUNT_HELPER_MAX_TIME, expecting SIGTERM signal
log $INFO "Waiting for termination request via SIGTERM"
count=0
while [ "$count" -lt "$MOUNT_HELPER_MAX_TIME" ]; do
exit_on_trap
count=$((count+1))
sleep 1
done
# Normally should exit by exit_on_trap, but here we timeout
# waiting for the real vault-manager to delete this job/pod.
log $INFO "Exiting without receiving SIGTERM request"
exit 0
}
# Check if a secret exists
#
# Returns the normal linux success=0, failure!=0
# Prints the name of the secret
function secretExists {
local name="$1"
kubectl get secrets -n vault "$name" \
-o jsonpath='{.metadata.name}' 2>/dev/null \
| grep "$name"
}
# Check if the PVC resource exists
#
# Returns the normal linux success=0, failure!=0
# Prints the name of the PVC resource
function pvcExists {
local text
local jqscript
jqscript='.items
| map(select(.metadata.name | test("^manager-pvc")))
| .[0].metadata.name'
# using jq since kubernetes does not support regex
# the grep makes sure the result contains the 'manager-pvc'
# string (as opposed to 'null' for example)
text="$(
kubectl get persistentvolumeclaims -n vault -o json \
| jq -r "$jqscript" 2>/dev/null \
| grep manager-pvc )"
result=$?
if [ -n "$text" ]; then
echo "$text"
fi
return $result
}
# Check if the PVC is mounted to any pod in vault namespace
#
# Returns the normal linux success=0, failure!=0
# Prints the name of the PVC resource
function testPVCMount {
local result
local cspec
local vspec
cspec=".items[*].spec.containers[*]"
vspec="volumeMounts[?(@.name=='manager-pvc')].name"
# this kubectl query returns zero whether manager-pvc is
# found or not
# result variable is either empty or 'manager-pvc'
result="$( kubectl get pods -n vault \
-o jsonpath="{${cspec}.${vspec}}" )"
if [ -n "$result" ]; then
return 0
fi
return 1 # assertion 'fails'
}
# This function prints a DEBUG log of kubectl delete
function deleteMountHelper {
local text
local result
log $DEBUG "Waiting for delete of mount-helper job"
text="$( kubectl delete --ignore-not-found=true --wait=true \
-f /opt/yaml/pvc-attach.yaml 2>&1 )"
result=$?
log $DEBUG "Output of deleting mount-helper: [$text]"
return $result
}
# Run shred on the file content of PVC
#
# All files a shredded, and the result is an error if
# - command return code is non-zero
# - file comparison shows unchanged file(s)
#
# A warning is issued if shred/kubectl command has any stdout or
# stderr
#
# Returns the normal linux success=0, failure!=0
function securelyWipePVC {
local helper="$1"
if [ -z "$helper" ]; then
log $ERROR "No pod specified for shredding"
return 1
fi
# get profile of the files before shredding
kubectl exec -n vault "$helper" -- \
bash -c 'find /mnt/data -type f \
| sort | xargs wc | head -n-1' \
>/tmp/shred_before.txt 2>&1
log $DEBUG "Original files: [$( cat /tmp/shred_before.txt )]"
# run the shred command
#
# Shred all the files in mounted /mnt/data/
#
# The shred by default has three randomized passes, and with -z
# option will finalize with zeros. -f prompts shred to work
# around any unexpected file permissions
text="$( kubectl exec -n vault "$helper" -- \
bash -c '\
result=0; \
while read fname; do \
shred -f -z "$fname"; \
[ $? -ne 0 ] && result=1; \
done <<<"$(find /mnt/data -type f )"; \
exit $result' 2>&1 )"
result=$?
# get profile of the files after shredding
kubectl exec -n vault "$helper" -- \
bash -c 'find /mnt/data -type f \
| sort | xargs wc | head -n-1' \
>/tmp/shred_after.txt 2>&1
log $DEBUG "Shredded files: [$( cat /tmp/shred_after.txt )]"
# compare the profiles for error reporting
#
# If the file lists, pushed through wc, have files with the same
# character, word, and line counts then report an error: a file
# has not been shred
#
# Ignore files that were empty
difftext="$( diff -wuU100000 /tmp/shred_before.txt \
/tmp/shred_after.txt )"
unchanged="$( echo "$difftext" | grep "^ " \
| grep -v "^\([ ]\{1,\}0\)\{3\} /" )"
# Report the errors/success
if [ "$result" -ne 0 ]; then
log $ERROR "Error on shred: [$text]"
if [ -n "$unchanged" ]; then
log $ERROR "Unchanged: [$unchanged]"
fi
return 1
fi
if [ -n "$text" ]; then
log $WARNING "Output of shred is not empty: [$text]"
fi
if [ -n "$unchanged" ]; then
log $ERROR "Shred did not shred some files"
log $ERROR "Unchanged: [$unchanged]"
return 1
fi
log $INFO "Shredding of PVC data verified"
return 0
}
# Delete the PVC resource
#
# The delete will succeed even if attached to a pod, such as a
# terminating vault-manager or mount-helper - the PVC remains
# in terminating status until the pod is also terminated.
function deletePVC {
local text
local name
name="$( pvcExists )"
if [ $? -eq 0 ] && [[ "$name" =~ ^manager-pvc ]]; then
text="$( kubectl delete persistentvolumeclaims -n vault \
"$name" 2>&1 )"
if [ $? -ne 0 ]; then
log $ERROR "Error deleting PVC: [$text]"
else
log $INFO "$text"
fi
else
log $WARNING "Request to delete PVC but PVC not found"
fi
}
# Delete the bootstrap secret
function deleteBootstrap {
local text
text="$( kubectl delete secrets -n vault \
cluster-key-bootstrap 2>&1 )"
if [ $? -ne 0 ]; then
log $ERROR "Error deleting bootstrap secret: [$text]"
else
log $INFO "$text"
fi
}
# Run a job/pod, to mount the PVC resource, and retrieve the secrets
# from PVC.
#
# See also the function mountHelper and the ConfigMap named:
# {{ include "vault.name" . }}-mount-helper
#
# This function does not support overwriting an existing
# cluster-key-* secret, but it does support validating those secrets
# if they exist
function convertPVC {
local output
local pod
local count
local text
local PVCtext
local result
if testPVCMount; then
log $ERROR "Cannot mount PVC already mounted"
return 1
fi
# run the pod
output="$( kubectl apply -f /opt/yaml/pvc-attach.yaml 2>&1 )"
if [ $? -ne 0 ]; then
log $ERROR "Failed to apply mount-helper"
log $DEBUG "Output: [$output]"
deleteMountHelper
return 1
fi
# wait for pod
pod=''
count=0
log $INFO "Waiting for mount-helper pod to run"
while [ -z "$pod" -a "$count" -le "$MAX_POD_RUN_TRIES" ]; do
count=$((count+1))
text="$( kubectl get pods -n vault | grep "mount-helper" )"
pod="$( echo "$text" | grep "Running" | awk '{print $1}' )"
if [ -z "$pod" ]; then
sleep 1
fi
done
if [ -z "$pod" ]; then
log $ERROR "Failed to run mount-helper pod"
log $DEBUG "Pod state: [$( echo $text )]"
deleteMountHelper
return 1
fi
# get the pvc data
PVCtext="$( kubectl exec -n vault "$pod" \
-- cat /mnt/data/cluster_keys.json )"
if [ $? -ne 0 -o -z "$PVCtext" ]; then
log $ERROR "Failed to read cluster_keys.json"
deleteMountHelper
return 1
fi
log $INFO "Data retrieved from PVC"
# if the Root secret is pre-existing, compare the existing
# shard secrets and root secret before deleting the PVC
kubectl get secrets -n vault cluster-key-root >/dev/null 2>&1
if [ $? -eq 0 ]; then
log $INFO "Cluster secrets exist:" \
"validating"
else
# create a secret from the data
storeVaultInitSecrets "$PVCtext"
fi
# verify the data stored versus text from PVC
echo "$PVCtext" | validateSecrets
result=$?
if [ "$result" -eq 0 ]; then
securelyWipePVC "$pod"
# omit deleting the PVC for manual analysis and shred
# when the wipe fails
if [ $? -eq 0 ]; then
deletePVC
fi
fi
# clean up but do not care about the result
deleteMountHelper
return $result
}
function convertBootstrapSecrets {
local text
local count
text="$( get_secret cluster-key-bootstrap )"
storeVaultInitSecrets "$text"
# verify the split secrets versus the bootstrap text
echo "$text" | validateSecrets
if [ $? -ne 0 ]; then
# an error is already printed
return 1
fi
deleteBootstrap
# Also validate and delete the PVC resource
# This procedure depends on waiting for the old version
# of vault-manager pod to exit
count="$TERMINATE_TRIES_MAX"
log $INFO "Waiting for vault-manager pod to exit"
while testPVCMount && [ "$count" -gt 0 ]; do
sleep "$TERMINATE_TRIES_SLEEP"
count=$((count-1))
done
convertPVC
}
function runConversion {
if [ -n "$K8S_SECRETS_PREEXIST" ]; then
log $INFO "Cluster secrets exist"
return
elif [ -n "$BOOTSTRAP_PREEXISTS" ]; then
# this is the normal application update procedure; the
# lifecycle code retrieved the secrets from previous version
# of the application.
log $INFO "Using secrets provided in $BOOTSTRAP_PREEXISTS"
convertBootstrapSecrets
return
elif [ -z "$PVC_PREEXISTS" ]; then
log $INFO "No pre-existing secrets exist"
return
fi
# Finally, read the pre-existing PVC. This occurs if the
# application updates outside of application-update. For
# example if the old application is removed and deleted, and the
# new application is uploaded and applied.
convertPVC
}
#
# LOGIC
#
exit_on_trap 1
# check if this pod is helping to convert storage from pvc to k8s
# secrets
mountHelper
exit_on_trap 15
# check if there are existing key shard secrets, boot strap secret,
# or pre-existing resource
K8S_SECRETS_PREEXIST="$( secretExists cluster-key-root )"
exit_on_trap 16
BOOTSTRAP_PREEXISTS="$( secretExists cluster-key-bootstrap )"
exit_on_trap 17
PVC_PREEXISTS="$( pvcExists )"
exit_on_trap 18
runConversion
exit_on_trap 19
# Waiting for at least one vault server, to check initialization
waitForPods 1
exit_on_trap 2
log $DEBUG "Putting a list of vault pods and ip in $WORKDIR/pods.txt"
getVaultPods
exit_on_trap 3
vaultInitialized
IS_VAULT_INITIALIZED=$?
if [ $IS_VAULT_INITIALIZED -eq 1 ]; then
exit_on_trap 4
desired_pods=$HA_REPLICAS
# Waiting for vault servers to come up
waitForPods $desired_pods
exit_on_trap 5
log $INFO "Putting a list of vault pods and IPs in $WORKDIR/pods.txt"
getVaultPods
exit_on_trap 6
log $DEBUG "Initializing the vault on vault-0 and" \
"storing keys in k8s secrets"
initVault
#Some sleep required to allow convergence"
sleep "$INIT_CONVERGE_TIME"
log $DEBUG "Unsealing vault-0 using the init shards"
for row in $(awk 'NR==1{print $2}' $WORKDIR/pods.txt); do
VAULT=$row
unsealVault
done
log $DEBUG "Joining other vault servers to the HA Raft cluster"
for row in $(awk 'NR>1{print $2}' $WORKDIR/pods.txt); do
log $DEBUG "$( grep $row $WORKDIR/pods.txt )"
joinRaft
sleep "$JOIN_RATE"
done
exit_on_trap 7
log $INFO "Unsealing the remaining vaults"
for row in $(awk 'NR>1{print $2}' $WORKDIR/pods.txt); do
log $DEBUG "$( grep $row $WORKDIR/pods.txt )"
VAULT=$row
unsealVault
sleep "$UNSEAL_RATE"
exit_on_trap 8
done
else
log $INFO "Vault is initialized"
fi
exit_on_trap 9
# initialize the state machine - vault server status records
echo "" > "$PODREC_F"
while read host dns_name; do
if [ -z "$host" ]; then
continue
fi
status_rec="/$host/$dns_name//"
echo "$status_rec" >> "$PODREC_F"
done <$WORKDIR/pods.txt
# Loop forever to check the seal status of vaults and
# unseal if required
log $INFO "Checking vault pods seal status in perpetuity..."
while true; do
exit_on_trap 10
sleep "$STATUS_RATE"
rm $WORKDIR/pods.txt
echo "" > "$PODREC_TMP_F"
exit_on_trap 11
getVaultPods
exit_on_trap 12
while read host row; do
if [ -z "$row" ]; then
# probably a recovering pod waiting for an IP address
log $DEBUG "pod list has empty data: [$host] [$row]"
continue
fi
vaultServerStatus $QUERY_TMOUT > $WORKDIR/healthcheck.txt
TEMP=$(cat $WORKDIR/healthcheck.txt | jq -r .sealed)
exit_on_trap 13
# Decide when to unseal the vault server; includes
# Adding records to new_pods_status.txt
runStateMachine "$host" "$row" "$TEMP"
exit_on_trap 14
done <$WORKDIR/pods.txt
mv "$PODREC_TMP_F" "$PODREC_F"
done
kind: ConfigMap
metadata:
managedFields:
- apiVersion: v1
fieldsType: FieldsV1
fieldsV1:
f:data:
.: {}
f:init.sh: {}
manager: vault-init-unseal
name: vault-init-unseal-2
namespace: {{ .Release.Namespace }}
---
apiVersion: v1
kind: ConfigMap
metadata:
managedFields:
- apiVersion: v1
fieldsType: FieldsV1
fieldsV1:
f:data:
.: {}
f:pvc-attach.yaml: {}
manager: {{ include "vault.name" . }}-mount-helper
name: {{ include "vault.name" . }}-mount-helper
namespace: {{ .Release.Namespace }}
data:
pvc-attach.yaml: |
---
apiVersion: batch/v1
kind: Job
metadata:
name: {{ template "vault.fullname" . }}-mount-helper
namespace: vault
spec:
activeDeadlineSeconds: 600
completions: 1
parallelism: 1
ttlSecondsAfterFinished: 0
template:
spec:
restartPolicy: Never
serviceAccountName: "{{ template "vault.fullname" . }}-vault-manager"
{{- if .Values.global.imagePullSecrets }}
imagePullSecrets:
{{- toYaml .Values.global.imagePullSecrets | nindent 12 }}
{{- end }}
{{- if .Values.manager.tolerations }}
tolerations:
{{- tpl .Values.manager.tolerations . | nindent 12 }}
{{- end }}
containers:
- name: mount
image: "{{ .Values.manager.image.repository }}:{{ .Values.manager.image.tag }}"
imagePullPolicy: "{{ .Values.injector.image.pullPolicy }}"
args:
- bash
- /opt/script/init.sh
env:
- name: MANAGER_MODE
value: MOUNT_HELPER
- name: PVC_DIR
value: /mnt/data
volumeMounts:
- name: mount-helper
mountPath: /opt/script
readOnly: true
- name: manager-pvc
mountPath: /mnt/data
readOnly: false
volumes:
- name: mount-helper
configMap:
name: vault-init-unseal-2
- name: manager-pvc
persistentVolumeClaim:
claimName: manager-pvc-sva-vault-manager-0
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
namespace: {{ .Release.Namespace }}
name: {{ template "vault.fullname" . }}-vault-manager
rules:
- apiGroups: [""] # "" indicates the core API group
resources: ["pods"]
verbs: ["get", "watch", "list"]
- apiGroups: [""] # "" indicates the core API group
resources: ["pods/exec"]
verbs: ["create"]
- apiGroups: [""] # "" indicates the core API group
resources: ["secrets"]
verbs: ["get", "create", "delete"]
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["get", "create", "delete"]
- apiGroups: [""] # "" indicates the core API group
resources: ["persistentvolumeclaims"]
verbs: ["list", "delete"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ template "vault.fullname" . }}-vault-manager
namespace: {{ .Release.Namespace }}
labels:
helm.sh/chart: {{ include "vault.chart" . }}
app.kubernetes.io/name: {{ include "vault.name" . }}-vault-manager
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ template "vault.fullname" . }}-vault-manager
namespace: {{ .Release.Namespace }}
subjects:
- kind: ServiceAccount
name: {{ template "vault.fullname" . }}-vault-manager
roleRef:
kind: Role
name: {{ template "vault.fullname" . }}-vault-manager
apiGroup: rbac.authorization.k8s.io
---
{{- if and (eq (.Values.injector.enabled | toString) "true" ) (eq (.Values.global.enabled | toString) "true") }}
# Deployment for the unsealer
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: {{ template "vault.fullname" . }}-manager2
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: {{ include "vault.name" . }}-manager
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
component: webhook
spec:
serviceName: {{ template "vault.fullname" . }}
replicas: 1
selector:
matchLabels:
app.kubernetes.io/instance: {{ .Release.Name }}
component: webhook
template:
metadata:
labels:
app.kubernetes.io/name: {{ template "vault.name" . }}-manager
app.kubernetes.io/instance: {{ .Release.Name }}
component: webhook
{{- if .Values.manager.extraLabels }}
{{- toYaml .Values.manager.extraLabels | nindent 8 -}}
{{- end }}
spec:
serviceAccountName: "{{ template "vault.fullname" . }}-vault-manager"
{{- if .Values.global.imagePullSecrets }}
imagePullSecrets:
{{- toYaml .Values.global.imagePullSecrets | nindent 8 }}
{{- end }}
{{- if .Values.manager.tolerations }}
tolerations:
{{- tpl .Values.manager.tolerations . | nindent 8 }}
{{- end }}
containers:
- name: manager
image: "{{ .Values.manager.image.repository }}:{{ .Values.manager.image.tag }}"
imagePullPolicy: "{{ .Values.injector.image.pullPolicy }}"
args:
- bash
- /opt/script/init.sh
env:
- name: CA_CERT
value: /mnt/data/ca/tls.crt
volumeMounts:
- name: vault-init-unseal-2
mountPath: /opt/script
readOnly: false
- name: mount-helper-yaml
mountPath: /opt/yaml
readOnly: true
- name: vault-ca
mountPath: /mnt/data/ca
readOnly: true
volumes:
- name: vault-init-unseal-2
configMap:
name: vault-init-unseal-2
- name: mount-helper-yaml
configMap:
name: {{ include "vault.name" . }}-mount-helper
- name: vault-ca
secret:
secretName: vault-ca
{{ end }}