From 3c5e05a8a4dbc2b7ee1b61ba61393d82dd7917e5 Mon Sep 17 00:00:00 2001
From: Bifrost <vilouskubajj@gmail.com>
Date: Tue, 8 Aug 2023 19:38:12 +0100
Subject: [PATCH] Introduce default kernel/ramdisks by arch

Introduce config to allow setting default ramdisks per-architecture.
The hierarchy of the parameters is:
Node config > config by architecture > general config

Change-Id: I95dfece3e8f7bcd3121ac808985cb61997877a51
---
 .../install/configure-glance-images.rst       |  24 ++-
 ironic/conf/conductor.py                      |  52 ++++++-
 ironic/drivers/utils.py                       |  19 ++-
 ironic/tests/unit/common/test_pxe_utils.py    |  33 ++++-
 .../unit/drivers/modules/ilo/test_boot.py     |  22 +++
 .../unit/drivers/modules/redfish/test_boot.py | 140 ++++++++++++++++--
 ...rnel-ramdisk-by-arch-c35cd2fe09f6ee98.yaml |  13 ++
 7 files changed, 273 insertions(+), 30 deletions(-)
 create mode 100644 releasenotes/notes/kernel-ramdisk-by-arch-c35cd2fe09f6ee98.yaml

diff --git a/doc/source/install/configure-glance-images.rst b/doc/source/install/configure-glance-images.rst
index 4b8300678f..62d1bd8e57 100644
--- a/doc/source/install/configure-glance-images.rst
+++ b/doc/source/install/configure-glance-images.rst
@@ -83,7 +83,8 @@ Deploy ramdisk images
         --disk-format aki --container-format aki \
         --file ironic-python-agent.vmlinuz
 
-   Store the image UUID obtained from the above step as ``DEPLOY_VMLINUZ_UUID``.
+   Store the image UUID obtained from the above step as ``DEPLOY_VMLINUZ_UUID``
+   (or a different name when using the parameter specified by node architecture).
 
    .. code-block:: console
 
@@ -91,14 +92,29 @@ Deploy ramdisk images
         --disk-format ari --container-format ari \
         --file ironic-python-agent.initramfs
 
-   Store the image UUID obtained from the above step as ``DEPLOY_INITRD_UUID``.
+   Store the image UUID obtained from the above step as ``DEPLOY_INITRD_UUID``
+   (or a different name when using the parameter specified by node architecture).
 
 #. Configure the Bare Metal service to use the produced images. It can be done
-   per node as described in :doc:`enrollment` or globally in the configuration
-   file:
+   per node as described in :doc:`enrollment` or in the configuration
+   file either using a dictionary to specify them by architecture as follows:
+
+   .. code-block:: ini
+
+    [conductor]
+    deploy_kernel_by_arch = {'x86_64': <insert DEPLOY_VMLINUZ_X86_64_UUID>,
+                             'aarch64': <insert DEPLOY_VMLINUZ_AARCH64_UUID>}
+    deploy_ramdisk_by_arch = {'x86_64': <insert DEPLOY_INITRD_X86_64_UUID>,
+                              'aarch64': <insert DEPLOY_INITRD_AARCH64_UUID>}
+
+   or globally using the general configuration parameters:
 
    .. code-block:: ini
 
     [conductor]
     deploy_kernel = <insert DEPLOY_VMLINUZ_UUID>
     deploy_ramdisk = <insert DEPLOY_INITRD_UUID>
+
+   In the case when both general parameters and parameters specified by
+   architecture are defined, the parameters specified by architecture take
+   priority.
diff --git a/ironic/conf/conductor.py b/ironic/conf/conductor.py
index 47e98551b0..77693779b9 100644
--- a/ironic/conf/conductor.py
+++ b/ironic/conf/conductor.py
@@ -205,20 +205,56 @@ opts = [
                        'endpoint via multicast DNS.')),
     cfg.StrOpt('deploy_kernel',
                mutable=True,
-               help=_('Glance ID, http:// or file:// URL of the kernel of '
-                      'the default deploy image.')),
+               deprecated_for_removal=True,
+               deprecated_reason=_('Replaced by deploy_kernel_by_arch which '
+                                   'provides more configuration options.'),
+               help=_('DEPRECATED: Glance ID, http:// or file:// URL of the '
+                      'kernel of the default deploy image.')),
     cfg.StrOpt('deploy_ramdisk',
                mutable=True,
-               help=_('Glance ID, http:// or file:// URL of the initramfs of '
-                      'the default deploy image.')),
+               deprecated_for_removal=True,
+               deprecated_reason=_('Replaced by deploy_ramdisk_by_arch which '
+                                   'provides more configuration options.'),
+               help=_('DEPRECATED: Glance ID, http:// or file:// URL of the '
+                      'initramfs of the default deploy image.')),
+    cfg.DictOpt('deploy_kernel_by_arch',
+                default={},
+                mutable=True,
+                help=_('A dictionary of key-value pairs of each architecture '
+                       'with the Glance ID, http:// or file:// URL of the '
+                       'kernel of the default deploy image.')),
+    cfg.DictOpt('deploy_ramdisk_by_arch',
+                default={},
+                mutable=True,
+                help=_('A dictionary of key-value pairs of each architecture '
+                       'with the Glance ID, http:// or file:// URL of the '
+                       'initramfs of the default deploy image.')),
     cfg.StrOpt('rescue_kernel',
                mutable=True,
-               help=_('Glance ID, http:// or file:// URL of the kernel of '
-                      'the default rescue image.')),
+               deprecated_for_removal=True,
+               deprecated_reason=_('Replaced by rescue_kernel_by_arch which '
+                                   'provides more configuration options.'),
+               help=_('DEPRECATED: Glance ID, http:// or file:// URL of the '
+                      'kernel of the default rescue image.')),
     cfg.StrOpt('rescue_ramdisk',
                mutable=True,
-               help=_('Glance ID, http:// or file:// URL of the initramfs of '
-                      'the default rescue image.')),
+               deprecated_for_removal=True,
+               deprecated_reason=_('Replaced by rescue_ramdisk_by_arch which '
+                                   'provides more configuration options.'),
+               help=_('DEPRECATED: Glance ID, http:// or file:// URL of the '
+                      'initramfs of the default rescue image.')),
+    cfg.DictOpt('rescue_kernel_by_arch',
+                default={},
+                mutable=True,
+                help=_('A dictionary of key-value pairs of each architecture '
+                       'with the Glance ID, http:// or file:// URL of the '
+                       'kernel of the default rescue image.')),
+    cfg.DictOpt('rescue_ramdisk_by_arch',
+                default={},
+                mutable=True,
+                help=_('A dictionary of key-value pairs of each architecture '
+                       'with the Glance ID, http:// or file:// URL of the '
+                       'initramfs of the default rescue image.')),
     cfg.StrOpt('rescue_password_hash_algorithm',
                default='sha256',
                choices=['sha256', 'sha512'],
diff --git a/ironic/drivers/utils.py b/ironic/drivers/utils.py
index fc5cdcf0d9..1bb0c49a23 100644
--- a/ironic/drivers/utils.py
+++ b/ironic/drivers/utils.py
@@ -447,9 +447,24 @@ def get_agent_kernel_ramdisk(node, mode='deploy', deprecated_prefix=None):
     # from driver_info but deploy_ramdisk comes from configuration,
     # since it's a sign of a potential operator's mistake.
     if not kernel or not ramdisk:
+        # NOTE(kubajj): If kernel and/or ramdisk are specified by architecture,
+        # prioritise them, otherwise use the default.
+        kernel_dict_param_name = f'{mode}_kernel_by_arch'
+        ramdisk_dict_param_name = f'{mode}_ramdisk_by_arch'
+        kernel_dict = getattr(CONF.conductor, kernel_dict_param_name)
+        ramdisk_dict = getattr(CONF.conductor, ramdisk_dict_param_name)
+        cpu_arch = node.properties.get('cpu_arch')
+        kernel_for_this_arch = kernel_dict.get(cpu_arch)
+        ramdisk_for_this_arch = ramdisk_dict.get(cpu_arch)
+        if kernel_for_this_arch and ramdisk_for_this_arch:
+            kernel = kernel_for_this_arch
+            ramdisk = ramdisk_for_this_arch
+        else:
+            kernel = getattr(CONF.conductor, kernel_name)
+            ramdisk = getattr(CONF.conductor, ramdisk_name)
         return {
-            kernel_name: getattr(CONF.conductor, kernel_name),
-            ramdisk_name: getattr(CONF.conductor, ramdisk_name),
+            kernel_name: kernel,
+            ramdisk_name: ramdisk,
         }
     else:
         return {
diff --git a/ironic/tests/unit/common/test_pxe_utils.py b/ironic/tests/unit/common/test_pxe_utils.py
index b775c68a1f..190bdfb811 100644
--- a/ironic/tests/unit/common/test_pxe_utils.py
+++ b/ironic/tests/unit/common/test_pxe_utils.py
@@ -1197,12 +1197,21 @@ class PXEInterfacesTestCase(db_base.DbTestCase):
     def test_parse_driver_info_rescue(self):
         self._test_parse_driver_info(mode='rescue')
 
-    def _test_parse_driver_info_from_conf(self, mode='deploy'):
-        del self.node.driver_info['%s_kernel' % mode]
-        del self.node.driver_info['%s_ramdisk' % mode]
-        exp_info = {'%s_ramdisk' % mode: 'glance://%s_ramdisk_uuid' % mode,
-                    '%s_kernel' % mode: 'glance://%s_kernel_uuid' % mode}
-        self.config(group='conductor', **exp_info)
+    def _test_parse_driver_info_from_conf(self, mode='deploy', by_arch=False):
+        ramdisk = 'glance://%s_ramdisk_uuid' % mode
+        kernel = 'glance://%s_kernel_uuid' % mode
+        if by_arch:
+            exp_info = {'%s_ramdisk' % mode: ramdisk,
+                        '%s_kernel' % mode: kernel}
+            config = {'%s_ramdisk_by_arch' % mode: ramdisk,
+                      '%s_kernel_by_arch' % mode: kernel}
+        else:
+            del self.node.driver_info['%s_kernel' % mode]
+            del self.node.driver_info['%s_ramdisk' % mode]
+            exp_info = {'%s_ramdisk' % mode: ramdisk,
+                        '%s_kernel' % mode: kernel}
+            config = exp_info
+        self.config(group='conductor', **config)
         image_info = pxe_utils.parse_driver_info(self.node, mode=mode)
         self.assertEqual(exp_info, image_info)
 
@@ -1212,12 +1221,24 @@ class PXEInterfacesTestCase(db_base.DbTestCase):
     def test_parse_driver_info_from_conf_rescue(self):
         self._test_parse_driver_info_from_conf(mode='rescue')
 
+    def test_parse_driver_info_from_conf_deploy_by_arch(self):
+        self._test_parse_driver_info_from_conf(by_arch=True)
+
+    def test_parse_driver_info_from_conf_rescue_by_arch(self):
+        self._test_parse_driver_info_from_conf(mode='rescue', by_arch=True)
+
     def test_parse_driver_info_mixed_source_deploy(self):
         self.config(deploy_kernel='file:///image',
                     deploy_ramdisk='file:///image',
                     group='conductor')
         self._test_parse_driver_info_missing_ramdisk()
 
+    def test_parse_driver_info_mixed_source_deploy_by_arch(self):
+        self.config(deploy_kernel_by_arch={'x86_64': 'file:///image'},
+                    deploy_ramdisk_by_arch={'x86_64': 'file:///image'},
+                    group='conductor')
+        self._test_parse_driver_info_missing_ramdisk()
+
     def test_parse_driver_info_mixed_source_rescue(self):
         self.config(rescue_kernel='file:///image',
                     rescue_ramdisk='file:///image',
diff --git a/ironic/tests/unit/drivers/modules/ilo/test_boot.py b/ironic/tests/unit/drivers/modules/ilo/test_boot.py
index 8ebaa14fa8..efce438489 100644
--- a/ironic/tests/unit/drivers/modules/ilo/test_boot.py
+++ b/ironic/tests/unit/drivers/modules/ilo/test_boot.py
@@ -127,6 +127,20 @@ class IloBootCommonMethodsTestCase(test_common.BaseIloTest):
         actual_driver_info = ilo_boot.parse_driver_info(self.node)
         self.assertEqual(expected_driver_info, actual_driver_info)
 
+    def test_parse_driver_info_deploy_config_by_arch(self):
+        CONF.set_override('deploy_kernel_by_arch', {'x86_64': 'kernel'},
+                          'conductor')
+        CONF.set_override('deploy_ramdisk_by_arch', {'x86_64': 'ramdisk'},
+                          'conductor')
+        CONF.set_override('bootloader', 'bootloader', 'conductor')
+        expected_driver_info = {'deploy_kernel': 'kernel',
+                                'deploy_ramdisk': 'ramdisk',
+                                'bootloader': 'bootloader',
+                                'kernel_append_params': None}
+
+        actual_driver_info = ilo_boot.parse_driver_info(self.node)
+        self.assertEqual(expected_driver_info, actual_driver_info)
+
     def test_parse_driver_info_rescue_config(self):
         CONF.set_override('rescue_kernel', 'kernel', 'conductor')
         CONF.set_override('rescue_ramdisk', 'ramdisk', 'conductor')
@@ -145,6 +159,14 @@ class IloBootCommonMethodsTestCase(test_common.BaseIloTest):
         self.assertRaisesRegex(exception.MissingParameterValue, 'bootloader',
                                ilo_boot.parse_driver_info, self.node)
 
+    def test_parse_driver_info_bootloader_none_by_arch(self):
+        CONF.set_override('deploy_kernel_by_arch', {'x86_64': 'kernel'},
+                          'conductor')
+        CONF.set_override('deploy_ramdisk_by_arch', {'x86_64': 'ramdisk'},
+                          'conductor')
+        self.assertRaisesRegex(exception.MissingParameterValue, 'bootloader',
+                               ilo_boot.parse_driver_info, self.node)
+
     def test_parse_driver_info_exc(self):
         self.assertRaises(exception.MissingParameterValue,
                           ilo_boot.parse_driver_info, self.node)
diff --git a/ironic/tests/unit/drivers/modules/redfish/test_boot.py b/ironic/tests/unit/drivers/modules/redfish/test_boot.py
index fe2def6f70..9e4b9c8b72 100644
--- a/ironic/tests/unit/drivers/modules/redfish/test_boot.py
+++ b/ironic/tests/unit/drivers/modules/redfish/test_boot.py
@@ -158,18 +158,32 @@ class RedfishVirtualMediaBootTestCase(db_base.DbTestCase):
                               redfish_boot._parse_driver_info,
                               task.node)
 
-    def _test_parse_driver_info_from_conf(self, mode='deploy'):
+    def _test_parse_driver_info_from_conf(self, mode='deploy', by_arch=False):
         with task_manager.acquire(self.context, self.node.uuid,
                                   shared=True) as task:
             if mode == 'rescue':
                 task.node.provision_state = states.RESCUING
 
-            expected = {
-                '%s_ramdisk' % mode: 'glance://%s_ramdisk_uuid' % mode,
-                '%s_kernel' % mode: 'glance://%s_kernel_uuid' % mode
-            }
+            if by_arch:
+                ramdisk = 'glance://%s_ramdisk_uuid' % mode
+                kernel = 'glance://%s_kernel_uuid' % mode
 
-            self.config(group='conductor', **expected)
+                config = {
+                    '%s_ramdisk_by_arch' % mode: {'x86_64': ramdisk},
+                    '%s_kernel_by_arch' % mode: {'x86_64': kernel}
+                }
+                expected = {
+                    '%s_ramdisk' % mode: ramdisk,
+                    '%s_kernel' % mode: kernel
+                }
+            else:
+                expected = {
+                    '%s_ramdisk' % mode: 'glance://%s_ramdisk_uuid' % mode,
+                    '%s_kernel' % mode: 'glance://%s_kernel_uuid' % mode
+                }
+                config = expected
+
+            self.config(group='conductor', **config)
 
             image_info = redfish_boot._parse_driver_info(task.node)
 
@@ -182,15 +196,29 @@ class RedfishVirtualMediaBootTestCase(db_base.DbTestCase):
     def test_parse_driver_info_from_conf_rescue(self):
         self._test_parse_driver_info_from_conf(mode='rescue')
 
-    def _test_parse_driver_info_mixed_source(self, mode='deploy'):
+    def test_parse_driver_info_from_conf_deploy_by_arch(self):
+        self._test_parse_driver_info_from_conf(by_arch=True)
+
+    def test_parse_driver_info_from_conf_rescue_by_arch(self):
+        self._test_parse_driver_info_from_conf(mode='rescue', by_arch=True)
+
+    def _test_parse_driver_info_mixed_source(self, mode='deploy',
+                                             by_arch=False):
         with task_manager.acquire(self.context, self.node.uuid,
                                   shared=True) as task:
             if mode == 'rescue':
                 task.node.provision_state = states.RESCUING
 
-            kernel_config = {
-                '%s_kernel' % mode: 'glance://%s_kernel_uuid' % mode
-            }
+            if by_arch:
+                kernel_config = {
+                    '%s_kernel_by_arch' % mode: {
+                        'x86': 'glance://%s_kernel_uuid' % mode
+                    }
+                }
+            else:
+                kernel_config = {
+                    '%s_kernel' % mode: 'glance://%s_kernel_uuid' % mode
+                }
 
             ramdisk_config = {
                 '%s_ramdisk' % mode: 'glance://%s_ramdisk_uuid' % mode,
@@ -209,6 +237,98 @@ class RedfishVirtualMediaBootTestCase(db_base.DbTestCase):
     def test_parse_driver_info_mixed_source_rescue(self):
         self._test_parse_driver_info_mixed_source(mode='rescue')
 
+    def test_parse_driver_info_mixed_source_deploy_by_arch(self):
+        self._test_parse_driver_info_mixed_source(by_arch=True)
+
+    def test_parse_driver_info_mixed_source_rescue_by_arch(self):
+        self._test_parse_driver_info_mixed_source(mode='rescue', by_arch=True)
+
+    def _test_parse_driver_info_choose_by_arch(self, mode='deploy'):
+        with task_manager.acquire(self.context, self.node.uuid,
+                                  shared=True) as task:
+            if mode == 'rescue':
+                task.node.provision_state = states.RESCUING
+            task.node.properties['cpu_arch'] = 'aarch64'
+            wrong_ramdisk = 'glance://wrong_%s_ramdisk_uuid' % mode
+            wrong_kernel = 'glance://wrong_%s_kernel_uuid' % mode
+            ramdisk = 'glance://%s_ramdisk_uuid' % mode
+            kernel = 'glance://%s_kernel_uuid' % mode
+
+            config = {
+                '%s_ramdisk_by_arch' % mode: {
+                    'x86_64': wrong_ramdisk, 'aarch64': ramdisk},
+                '%s_kernel_by_arch' % mode: {
+                    'x86_64': wrong_kernel, 'aarch64': kernel}
+            }
+            expected = {
+                '%s_ramdisk' % mode: ramdisk,
+                '%s_kernel' % mode: kernel
+            }
+
+            self.config(group='conductor', **config)
+
+            image_info = redfish_boot._parse_driver_info(task.node)
+
+            for key, value in expected.items():
+                self.assertEqual(value, image_info[key])
+
+    def test_parse_driver_info_choose_by_arch_deploy(self):
+        self._test_parse_driver_info_choose_by_arch()
+
+    def test_parse_driver_info_choose_by_arch_rescue(self):
+        self._test_parse_driver_info_choose_by_arch(mode='rescue')
+
+    def _test_parse_driver_info_choose_by_hierarchy(self, mode='deploy',
+                                                    ramdisk_missing=False):
+        with task_manager.acquire(self.context, self.node.uuid,
+                                  shared=True) as task:
+            if mode == 'rescue':
+                task.node.provision_state = states.RESCUING
+
+            ramdisk = 'glance://def_%s_ramdisk_uuid' % mode
+            kernel = 'glance://def_%s_kernel_uuid' % mode
+            ramdisk_by_arch = 'glance://%s_ramdisk_by_arch_uuid' % mode
+            kernel_by_arch = 'glance://%s_kernel_by_arch_uuid' % mode
+
+            config = {
+                '%s_kernel_by_arch' % mode: {
+                    'x86_64': kernel_by_arch},
+                '%s_ramdisk' % mode: ramdisk,
+                '%s_kernel' % mode: kernel
+            }
+            if not ramdisk_missing:
+                config['%s_ramdisk_by_arch' % mode] = {
+                    'x86_64': ramdisk_by_arch}
+                expected = {
+                    '%s_ramdisk' % mode: ramdisk_by_arch,
+                    '%s_kernel' % mode: kernel_by_arch
+                }
+            else:
+                expected = {
+                    '%s_ramdisk' % mode: ramdisk,
+                    '%s_kernel' % mode: kernel
+                }
+
+            self.config(group='conductor', **config)
+
+            image_info = redfish_boot._parse_driver_info(task.node)
+
+            for key, value in expected.items():
+                self.assertEqual(value, image_info[key])
+
+    def test_parse_driver_info_choose_by_hierarchy_deploy(self):
+        self._test_parse_driver_info_choose_by_hierarchy()
+
+    def test_parse_driver_info_choose_by_hierarchy_rescue(self):
+        self._test_parse_driver_info_choose_by_hierarchy(mode='rescue')
+
+    def test_parse_driver_info_choose_by_hierarchy_missing_param_deploy(self):
+        self._test_parse_driver_info_choose_by_hierarchy(ramdisk_missing=True)
+
+    def test_parse_driver_info_choose_by_hierarchy_missing_param_rescue(self):
+        self._test_parse_driver_info_choose_by_hierarchy(
+            mode='rescue', ramdisk_missing=True)
+
     def test_parse_deploy_info(self):
         with task_manager.acquire(self.context, self.node.uuid,
                                   shared=True) as task:
diff --git a/releasenotes/notes/kernel-ramdisk-by-arch-c35cd2fe09f6ee98.yaml b/releasenotes/notes/kernel-ramdisk-by-arch-c35cd2fe09f6ee98.yaml
new file mode 100644
index 0000000000..05541eee46
--- /dev/null
+++ b/releasenotes/notes/kernel-ramdisk-by-arch-c35cd2fe09f6ee98.yaml
@@ -0,0 +1,13 @@
+---
+features:
+  - |
+    Introduce new config parameters in the conductor group.
+    The `deploy_kernel_by_arch`, `deploy_ramdisk_by_arch`,
+    `rescue_kernel_by_arch`, and `rescue_ramdisk_by_arch` are dictionaries
+    allowing operators to specify parameters of kernel and ramdisk by the
+    architecture of the node.
+deprecations:
+  - |
+    The `deploy_kernel`, `deploy_ramdisk`, `rescue_kernel`, and
+    `rescue_ramdisk` parameters have been marked as deprecated as the new
+    parameters allow more configuration options.