From a3f8b7adda2bcedf7db6220864b062f1d076becb Mon Sep 17 00:00:00 2001 From: Salman Rana Date: Mon, 10 Jun 2024 22:57:54 -0400 Subject: [PATCH] Enable enroll-init: post factory-install reconfiguration These changes ensure that a standalone node is in the correct state for reconfiguration (for DC enrollment) after a successful factory-install. This is achieved by ensuring: - Factory install services are cleaned up. - Cloud-init services remain enabled and active. To facilitate enrollment initialization, two new scripts are being introduced in the platform-util package: 1. enroll-init-cleanup: Removes the cloud-init preset set by factory-install and sets service disabled flag. 2. enroll-init-reconfigure: A newly introduced script that allows OAM and password reconfiguration, ensuring: - The password change is done after system services are active. - OAM reconfiguration is done via system commands. This is required as outlined in https://review.opendev.org/c/starlingx/distcloud/+/921719 These scripts must be independent of the factory-install services and available on the platform for DCManager operations. Hence, they are not part of factory-install services but are included in the platform-util package. Additionally, a minor restructuring of factory-install services is done as part of these changes: - Moved the host config folder to the parent folder. This is more appropriate as the top-level folder already holds the cloud-init configurable files. The factory-install folder is meant for the static service files. - Introduced a utils folder/scripts for the factory-install services. Test plan: - PASS: Validate factory-install services: - config files copied correctly to home dir - factory install utils copied to /var/lib/factory-install dir - cloud-init preset after successful install - factory install services cleaned up - PASS: Validate full factory install - PASS: Build iso and install load to ensure platform-util package is installed with enroll-init-cleanup and enroll-init-reconfigure scripts in /usr/local/bin - PASS: Validate enroll-init-cleanup and enroll-init-reconfigure scripts: - Password persisted and OAM reconfiguration verified with system oam-show + endpoints updated - cloud-init services disabled (not restarted on reboot) Story: 2011100 Task: 50164 Change-Id: I9a99c53c6fe6590716ad3d5d59663c8e6c475db5 Signed-off-by: Salman Rana --- .../config/localhost.yml | 0 .../factory-install/scripts/10-init-setup | 9 +- .../factory-install/scripts/20-hardware-check | 4 +- .../factory-install/scripts/90-init-final | 4 +- .../systemd/factory-install-tests.service | 3 +- .../systemd/utils/20-cloud-init.preset | 6 + .../{ => utils}/20-factory-install.preset | 0 .../systemd/utils/disable-factory-install | 14 ++ .../factory-install/tests/10-system-health | 12 +- .../debian/deb_folder/controller.install | 2 + .../platform-util-controller.install | 2 + .../platform-util/debian/deb_folder/rules | 4 +- .../platform-util/scripts/enroll-init-cleanup | 14 ++ .../scripts/enroll-init-reconfigure | 171 ++++++++++++++++++ 14 files changed, 232 insertions(+), 13 deletions(-) rename tools/nocloud-factory-install/{factory-install => }/config/localhost.yml (100%) create mode 100644 tools/nocloud-factory-install/factory-install/systemd/utils/20-cloud-init.preset rename tools/nocloud-factory-install/factory-install/systemd/{ => utils}/20-factory-install.preset (100%) create mode 100644 tools/nocloud-factory-install/factory-install/systemd/utils/disable-factory-install create mode 100644 utilities/platform-util/scripts/enroll-init-cleanup create mode 100755 utilities/platform-util/scripts/enroll-init-reconfigure diff --git a/tools/nocloud-factory-install/factory-install/config/localhost.yml b/tools/nocloud-factory-install/config/localhost.yml similarity index 100% rename from tools/nocloud-factory-install/factory-install/config/localhost.yml rename to tools/nocloud-factory-install/config/localhost.yml diff --git a/tools/nocloud-factory-install/factory-install/scripts/10-init-setup b/tools/nocloud-factory-install/factory-install/scripts/10-init-setup index 21dd349b..1d90e0d6 100755 --- a/tools/nocloud-factory-install/factory-install/scripts/10-init-setup +++ b/tools/nocloud-factory-install/factory-install/scripts/10-init-setup @@ -35,13 +35,15 @@ check_rc_die $? "mkdir failed" cp -r "${NOCLOUD}"/factory-install/scripts "${FACTORY_INSTALL}"/scripts && \ cp -r "${NOCLOUD}"/factory-install/setup "${FACTORY_INSTALL}"/setup && \ cp -r "${NOCLOUD}"/factory-install/tests "${FACTORY_INSTALL}"/tests && \ - cp -r "${NOCLOUD}"/factory-install/config "${FACTORY_INSTALL}"/config + cp -r "${NOCLOUD}"/factory-install/systemd/utils "${FACTORY_INSTALL}"/utils && \ + cp -r "${NOCLOUD}"/config "${FACTORY_INSTALL}"/config check_rc_die $? "copy failed" # Ensure files are executable for run-parts chmod a+x "${FACTORY_INSTALL}"/scripts/* && \ chmod a+x "${FACTORY_INSTALL}"/setup/* && \ - chmod a+x "${FACTORY_INSTALL}"/tests/* + chmod a+x "${FACTORY_INSTALL}"/tests/* && \ + chmod a+x "${FACTORY_INSTALL}"/utils/* check_rc_die $? "chmod failed" # Copy configuration files required for running bootstrap and deployment configuration services @@ -60,8 +62,7 @@ check_rc_die $? "mkdir failed (factory-install.target.wants)" cp "${NOCLOUD}"/factory-install/systemd/*.{path,service,target} /etc/systemd/system/ check_rc_die $? "Copy failed (systemd path,service,target)" -cp "${NOCLOUD}"/factory-install/systemd/20-factory-install.preset /etc/systemd/system-preset/ && \ - chmod a+x /etc/systemd/system-preset/20-factory-install.preset +cp "${FACTORY_INSTALL}"/utils/*.preset /etc/systemd/system-preset/ check_rc_die $? "Copy failed (systemd preset)" echo "Factory Install Setup - Complete" diff --git a/tools/nocloud-factory-install/factory-install/scripts/20-hardware-check b/tools/nocloud-factory-install/factory-install/scripts/20-hardware-check index d85c4bc8..d2ac7c54 100755 --- a/tools/nocloud-factory-install/factory-install/scripts/20-hardware-check +++ b/tools/nocloud-factory-install/factory-install/scripts/20-hardware-check @@ -5,7 +5,9 @@ # SPDX-License-Identifier: Apache-2.0 # # cloud-init script to Perform hardware and firmware checks -# TODO: Sample only. Replace with real hardware checks +# +# SAMPLE ONLY - REPLACE WITH REAL HARDWARE CHECKS +# echo "Hardware Check - Start" diff --git a/tools/nocloud-factory-install/factory-install/scripts/90-init-final b/tools/nocloud-factory-install/factory-install/scripts/90-init-final index bcdd2edc..4831d64e 100755 --- a/tools/nocloud-factory-install/factory-install/scripts/90-init-final +++ b/tools/nocloud-factory-install/factory-install/scripts/90-init-final @@ -4,8 +4,8 @@ # # SPDX-License-Identifier: Apache-2.0 # -# cloud-init script to Perform hardware and firmware checks -# TODO: Sample only. Replace with real hardware checks +# cloud-init script to finish the factory install setup and +# trigger the first stage (booststrap) # FACTORY_INSTALL=/var/lib/factory-install diff --git a/tools/nocloud-factory-install/factory-install/systemd/factory-install-tests.service b/tools/nocloud-factory-install/factory-install/systemd/factory-install-tests.service index ed3849fb..21b5607f 100644 --- a/tools/nocloud-factory-install/factory-install/systemd/factory-install-tests.service +++ b/tools/nocloud-factory-install/factory-install/systemd/factory-install-tests.service @@ -11,7 +11,8 @@ User=sysadmin ExecStart=/usr/bin/run-parts --verbose --exit-on-error /var/lib/factory-install/tests ExecStartPost=+/usr/bin/touch /var/lib/factory-install/state/tests ExecStartPost=+/usr/bin/touch /var/lib/factory-install/stage/final -ExecStartPost=+/usr/bin/rm /var/lib/factory-install/enabled +ExecStartPost=+/var/lib/factory-install/utils/disable-factory-install + StandardOutput=append:/var/log/factory-install.log RemainAfterExit=yes Restart=no diff --git a/tools/nocloud-factory-install/factory-install/systemd/utils/20-cloud-init.preset b/tools/nocloud-factory-install/factory-install/systemd/utils/20-cloud-init.preset new file mode 100644 index 00000000..c769cde3 --- /dev/null +++ b/tools/nocloud-factory-install/factory-install/systemd/utils/20-cloud-init.preset @@ -0,0 +1,6 @@ +enable cloud-init.target + +enable cloud-init-local.service +enable cloud-init.service +enable cloud-config.service +enable cloud-final.service diff --git a/tools/nocloud-factory-install/factory-install/systemd/20-factory-install.preset b/tools/nocloud-factory-install/factory-install/systemd/utils/20-factory-install.preset similarity index 100% rename from tools/nocloud-factory-install/factory-install/systemd/20-factory-install.preset rename to tools/nocloud-factory-install/factory-install/systemd/utils/20-factory-install.preset diff --git a/tools/nocloud-factory-install/factory-install/systemd/utils/disable-factory-install b/tools/nocloud-factory-install/factory-install/systemd/utils/disable-factory-install new file mode 100644 index 00000000..f9d0b225 --- /dev/null +++ b/tools/nocloud-factory-install/factory-install/systemd/utils/disable-factory-install @@ -0,0 +1,14 @@ +#!/bin/bash +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# script to disable the factory install services after the installation is complete +# + +rm -f /var/lib/factory-install/enabled +rm -f /etc/systemd/system-preset/20-factory-install.preset + +systemctl daemon-reload +systemctl preset-all diff --git a/tools/nocloud-factory-install/factory-install/tests/10-system-health b/tools/nocloud-factory-install/factory-install/tests/10-system-health index 0f669146..a44b2300 100755 --- a/tools/nocloud-factory-install/factory-install/tests/10-system-health +++ b/tools/nocloud-factory-install/factory-install/tests/10-system-health @@ -6,20 +6,24 @@ # # Factory install system health checks triggered during the tests stage # +# SAMPLE ONLY - REPLACE WITH REAL SYSTEM HEALTH CHECKS +# echo "System Health Checks - Start" -fail () { - echo "FAIL: $1" && exit 1 +log_failure () { + echo "FAIL: $1" + exit ${2} } # check for service impacting alarms -# TODO update alarm check - currently checks for any alarms source /etc/platform/openrc fm --timeout 10 alarm-list --nowrap|grep -e "major\|minor\|warning\|critical" if [ $? == 0 ]; then - fail "service impacting alarms present" + # Log the health check failure and exit 0 to allow factory-install to finish up. + # Modify to exit 1 if factory-install should retry check until success. + log_failure "service impacting alarms present" 0 fi echo "System Health Checks - Complete" diff --git a/utilities/platform-util/debian/deb_folder/controller.install b/utilities/platform-util/debian/deb_folder/controller.install index 043b2c5c..b167502c 100644 --- a/utilities/platform-util/debian/deb_folder/controller.install +++ b/utilities/platform-util/debian/deb_folder/controller.install @@ -10,3 +10,5 @@ scripts/update_docker_registry_auth.sh usr/local/bin scripts/change_system_private_registry.sh usr/local/bin scripts/local_starlingxrc usr/local/bin scripts/kubeconfig-setup usr/local/bin +scripts/enroll-init-cleanup usr/local/bin +scripts/enroll-init-reconfigure usr/local/bin diff --git a/utilities/platform-util/debian/deb_folder/platform-util-controller.install b/utilities/platform-util/debian/deb_folder/platform-util-controller.install index 4ecf9b65..4974d686 100644 --- a/utilities/platform-util/debian/deb_folder/platform-util-controller.install +++ b/utilities/platform-util/debian/deb_folder/platform-util-controller.install @@ -10,3 +10,5 @@ /usr/local/bin/change_system_private_registry.sh /usr/local/bin/local_starlingxrc /usr/local/bin/kubeconfig-setup +/usr/local/bin/enroll-init-cleanup +/usr/local/bin/enroll-init-reconfigure diff --git a/utilities/platform-util/debian/deb_folder/rules b/utilities/platform-util/debian/deb_folder/rules index 1e7c8c1d..8de06ef2 100755 --- a/utilities/platform-util/debian/deb_folder/rules +++ b/utilities/platform-util/debian/deb_folder/rules @@ -44,6 +44,8 @@ override_dh_auto_install: install -m 555 scripts/kubeconfig-setup $(DEBIAN_BUILDDIR)/usr/local/bin/ install -m 755 scripts/connectivity_test $(DEBIAN_BUILDDIR)/usr/local/bin/ install -m 750 scripts/set_keystone_user_option.sh $(DEBIAN_BUILDDIR)/usr/local/bin/ + install -m 750 scripts/enroll-init-cleanup $(DEBIAN_BUILDDIR)/usr/local/bin/ + install -m 750 scripts/enroll-init-reconfigure $(DEBIAN_BUILDDIR)/usr/local/bin/ install -d $(DEBIAN_BUILDDIR)/usr/local/sbin/ install -m 700 -p -D scripts/patch-restart-mtce $(DEBIAN_BUILDDIR)/usr/local/sbin/ @@ -56,4 +58,4 @@ override_dh_fixperms: dh_fixperms -Xupdate-iso.sh -Xpatch-dm.sh -Xgen-bootloader-iso.sh -Xstx-iso-utils.sh \ -Xshow-certs.sh -Xupdate_docker_registry_auth.sh -Xchange_system_private_registry.sh \ -Xis-rootdisk-device.sh -Xlocal_starlingxrc -Xkubeconfig-setup -Xpatch-restart-* \ - -Xconnectivity_test -Xset_keystone_user_option.sh + -Xconnectivity_test -Xset_keystone_user_option.sh -Xenroll-init-* diff --git a/utilities/platform-util/scripts/enroll-init-cleanup b/utilities/platform-util/scripts/enroll-init-cleanup new file mode 100644 index 00000000..99625fb3 --- /dev/null +++ b/utilities/platform-util/scripts/enroll-init-cleanup @@ -0,0 +1,14 @@ +#!/bin/bash +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# script to cleanup the cloud-init services +# + +touch /etc/cloud/cloud-init.disabled +rm -f /etc/systemd/system-preset/20-cloud-init.preset + +systemctl daemon-reload +systemctl preset-all diff --git a/utilities/platform-util/scripts/enroll-init-reconfigure b/utilities/platform-util/scripts/enroll-init-reconfigure new file mode 100755 index 00000000..ff604b12 --- /dev/null +++ b/utilities/platform-util/scripts/enroll-init-reconfigure @@ -0,0 +1,171 @@ +#!/bin/bash +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# Utility to reconfigure OAM and update sysadmin password +# by first ensuring sys-inv and mtc services are active +# and ready to accept the password change. +# + +function check_rc_die { + local -i rc=${1} + msg=${2} + if [ ${rc} -ne 0 ]; then + log_fatal "${msg} [rc=${rc}]" + fi +} + +function log_fatal { + echo "$(tput setaf 1)$(date "+%F %H:%M:%S") FATAL: ${*}$(tput sgr0)" >&2 + exit 1 +} + +function log_warn { + echo "$(tput setaf 3)$(date "+%F %H:%M:%S"): WARN: ${*}$(tput sgr0)" >&2 +} + +function log_info { + echo "$(date "+%F %H:%M:%S"): INFO: $*" >&2 +} + +function usage { + cat < + --oam_gateway_ip + --oam_ip + --new_password + + --oam_subnet : Specify OAM subnet + --oam_gateway_ip : Specify OAM gateway IP + --oam_ip : Specify OAM IP + --new_password : Specify new password for sysadmin user +ENDUSAGE +} + +function verify_factory_install { + log_info "Checking factory-install..." + + if [ ! -f /var/lib/factory-install/stage/final ]; then + log_fatal "/var/lib/factory-install/stage/final does not exist. Ensure factory-install was successful." + fi + + log_info "factory-install check successfull." +} + +function check_services_status { + log_info "Checking services status..." + max_retries=10 + retries=0 + + while [ $retries -lt $max_retries ]; do + sm_output=$(sm-dump) + + sysinv_status=$(echo "$sm_output" | awk '/sysinv-inv/ {print $2}') + mtc_status=$(echo "$sm_output" | awk '/mtc-agent/ {print $2}') + + if [ "$sysinv_status" == "enabled-active" ] && [ "$mtc_status" == "enabled-active" ]; then + log_info "Required services are now enabled-active." + return 0 + else + log_warn "Required services are not yet enabled-active. Retrying... " + fi + + retries=$((retries + 1)) + sleep 30 + done + + log_fatal "Required services are not enabled-active after $max_retries attempts." +} + +function load_credentials { + log_info "Loading credentials..." + max_retries=10 + retries=0 + + if [ ! -f /etc/platform/openrc ]; then + log_fatal "/etc/platform/openrc does not exist." + fi + + while [ $retries -lt $max_retries ]; do + if source /etc/platform/openrc; then + log_info "Credentials loaded successfully." + return 0 + else + log_warn "Failed to load credentials (waiting for the host to become active). Retrying..." + fi + + retries=$((retries + 1)) + sleep 30 + done + + log_fatal "Failed to load credentials." +} + +function reconfigure_OAM { + log_info "Reconfiguring OAM with subnet: $OAM_SUBNET, gateway IP: $OAM_GATEWAY_IP, OAM IP: $OAM_IP..." + system oam-modify oam_subnet="$OAM_SUBNET" oam_gateway_ip="$OAM_GATEWAY_IP" oam_ip="$OAM_IP" + check_rc_die $? "system oam-modify failed" +} + +function reconfigure_password { + log_info "Reconfiguring sysadmin password..." + echo "sysadmin:$NEW_PASSWORD" | sudo chpasswd -e + check_rc_die $? "chpasswd failed" +} + +# Declare required variables +OAM_SUBNET="" +OAM_GATEWAY_IP="" +OAM_IP="" +NEW_PASSWORD="" + +log_info "Starting enroll-init reconfiguration..." + +# Parse command line arguments +while [[ "$#" -gt 0 ]]; do + case $1 in + --help) + usage + exit 0 + ;; + --oam_subnet) + OAM_SUBNET="$2" + shift 2 + ;; + --oam_gateway_ip) + OAM_GATEWAY_IP="$2" + shift 2 + ;; + --oam_ip) + OAM_IP="$2" + shift 2 + ;; + --new_password) + NEW_PASSWORD="$2" + shift 2 + ;; + *) + log_fatal "Unexpected option: $1" + ;; + esac +done + +# Ensure all required arguments are provided +if [ -z "$OAM_SUBNET" ] || [ -z "$OAM_GATEWAY_IP" ] || [ -z "$OAM_IP" ] || [ -z "$NEW_PASSWORD" ]; then + usage + log_fatal "Missing required arguments" +fi + +# Main execution flow +verify_factory_install +load_credentials +check_services_status +reconfigure_OAM +reconfigure_password + +log_info "Successfully reconfigured OAM network and system password."