ovn: Add support for OVN SB Relay

Add support for deploying OVN SB relays [1].
This is enabled by default to improve large scale scenarios.

Default mechanism deploys one relay group per 50 ovn-controller hosts
and uses random but idempotent method to choose a relay for a given host.

neutron-server and Octavia will connect directly to SB cluster, not via
the relays.

kolla-ansible check subcommand does not support ,,iterated'' containers
for now - but will be introduced in a subsequent patch.

[1]: https://docs.ovn.org/en/latest/tutorials/ovn-ovsdb-relay.html

Co-Authored-By: Krzysztof Tomaszewski <ktomaszewski@cloudferro.com>

Depends-On: https://review.opendev.org/c/openstack/kolla/+/928898

Change-Id: Id7d6973668d8b2b194913b533f8cb756c6708536
This commit is contained in:
Michal Nasiadka 2025-01-15 11:20:00 +00:00
parent 88e94f29bb
commit 8670c3f9d3
21 changed files with 272 additions and 4 deletions
ansible
group_vars
inventory
roles
neutron/templates
octavia/templates
ovn-db
service-check-containers/tasks
service-check/vars
doc/source/reference/networking
releasenotes/notes
tests
tools

@ -623,8 +623,16 @@ opensearch_dashboards_listen_port: "{{ opensearch_dashboards_port }}"
ovn_nb_db_port: "6641"
ovn_sb_db_port: "6642"
# OVN SB Relay related variables
ovn_sb_db_relay_count: "{{ ((groups['ovn-controller'] | length) / ovn_sb_db_relay_compute_per_relay | int) | round(0, 'ceil') | int }}"
ovn_sb_db_relay_compute_per_relay: "50"
ovn_sb_db_relay_port_prefix: "1664"
ovn_sb_db_relay_port: "{{ ovn_sb_db_relay_port_prefix ~ ovn_sb_db_relay_client_group_id }}"
ovn_sb_db_relay_client_group_id: "{{ range(1, ovn_sb_db_relay_count | int + 1) | random(seed=inventory_hostname) }}"
ovn_nb_connection: "{% for host in groups['ovn-nb-db'] %}tcp:{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ ovn_nb_db_port }}{% if not loop.last %},{% endif %}{% endfor %}"
ovn_sb_connection: "{% for host in groups['ovn-sb-db'] %}tcp:{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ ovn_sb_db_port }}{% if not loop.last %},{% endif %}{% endfor %}"
ovn_sb_connection: "{{ ovn_sb_connection_relay if enable_ovn_sb_db_relay | bool else ovn_sb_connection_no_relay }}"
ovn_sb_connection_no_relay: "{% for host in groups['ovn-sb-db'] %}tcp:{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ ovn_sb_db_port }}{% if not loop.last %},{% endif %}{% endfor %}"
ovn_sb_connection_relay: "{% for host in groups['ovn-sb-db-relay'] %}tcp:{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ ovn_sb_db_relay_port }}{% if not loop.last %},{% endif %}{% endfor %}"
ovsdb_port: "6640"
@ -931,6 +939,7 @@ enable_octavia_driver_agent: "{{ enable_octavia | bool and neutron_plugin_agent
enable_octavia_jobboard: "{{ enable_octavia | bool and 'amphora' in octavia_provider_drivers }}"
enable_openvswitch: "{{ enable_neutron | bool and neutron_plugin_agent != 'linuxbridge' }}"
enable_ovn: "{{ enable_neutron | bool and neutron_plugin_agent == 'ovn' }}"
enable_ovn_sb_db_relay: "{{ enable_ovn | bool }}"
enable_ovs_dpdk: "no"
enable_osprofiler: "no"
enable_placement: "{{ enable_nova | bool or enable_zun | bool }}"

@ -592,6 +592,9 @@ ovn-database
[ovn-sb-db:children]
ovn-database
[ovn-sb-db-relay:children]
ovn-database
[venus-api:children]
venus

@ -611,6 +611,9 @@ ovn-database
[ovn-sb-db:children]
ovn-database
[ovn-sb-db-relay:children]
ovn-database
[venus-api:children]
venus

@ -37,7 +37,7 @@ max_header_size = 38
[ovn]
ovn_nb_connection = {{ ovn_nb_connection }}
ovn_sb_connection = {{ ovn_sb_connection }}
ovn_sb_connection = {{ ovn_sb_connection_no_relay }}
ovn_metadata_enabled = true
enable_distributed_floating_ip = {{ neutron_ovn_distributed_fip | bool }}
ovn_emit_need_to_frag = true

@ -29,7 +29,7 @@ enabled_provider_agents = {{ octavia_provider_agents }}
{% if neutron_plugin_agent == 'ovn' %}
[ovn]
ovn_nb_connection = {{ ovn_nb_connection }}
ovn_sb_connection = {{ ovn_sb_connection }}
ovn_sb_connection = {{ ovn_sb_connection_no_relay }}
{% endif %}
[haproxy_amphora]

@ -21,6 +21,15 @@ ovn_db_services:
image: "{{ ovn_sb_db_image_full }}"
volumes: "{{ ovn_sb_db_default_volumes + ovn_sb_db_extra_volumes }}"
dimensions: "{{ ovn_sb_db_dimensions }}"
ovn-sb-db-relay:
container_name: ovn_sb_db_relay
group: ovn-sb-db-relay
enabled: "{{ enable_ovn_sb_db_relay | bool }}"
image: "{{ ovn_sb_db_relay_image_full }}"
iterate: true
iterate_var: "{{ ovn_sb_db_relay_count | int }}"
volumes: "{{ ovn_sb_db_relay_default_volumes + ovn_sb_db_relay_extra_volumes }}"
dimensions: "{{ ovn_sb_db_relay_dimensions }}"
####################
@ -40,9 +49,14 @@ ovn_sb_db_image: "{{ docker_registry ~ '/' if docker_registry else '' }}{{ docke
ovn_sb_db_tag: "{{ ovn_tag }}"
ovn_sb_db_image_full: "{{ ovn_sb_db_image }}:{{ ovn_sb_db_tag }}"
ovn_sb_db_relay_image: "{{ docker_registry ~ '/' if docker_registry else '' }}{{ docker_namespace }}/{{ docker_image_name_prefix }}ovn-sb-db-relay"
ovn_sb_db_relay_tag: "{{ ovn_tag }}"
ovn_sb_db_relay_image_full: "{{ ovn_sb_db_relay_image }}:{{ ovn_sb_db_relay_tag }}"
ovn_northd_dimensions: "{{ default_container_dimensions }}"
ovn_nb_db_dimensions: "{{ default_container_dimensions }}"
ovn_sb_db_dimensions: "{{ default_container_dimensions }}"
ovn_sb_db_relay_dimensions: "{{ default_container_dimensions }}"
ovn_northd_default_volumes:
- "{{ node_config_directory }}/ovn-northd/:{{ container_config_directory }}/:ro"
@ -58,11 +72,16 @@ ovn_sb_db_default_volumes:
- "/etc/localtime:/etc/localtime:ro"
- "ovn_sb_db:/var/lib/openvswitch/ovn-sb/"
- "kolla_logs:/var/log/kolla/"
ovn_sb_db_relay_default_volumes:
- "{{ node_config_directory }}/ovn-sb-db-relay{{ ('-' + item | string) if item is defined }}/:{{ container_config_directory }}/:ro"
- "/etc/localtime:/etc/localtime:ro"
- "kolla_logs:/var/log/kolla/"
ovn_db_extra_volumes: "{{ default_extra_volumes }}"
ovn_northd_extra_volumes: "{{ ovn_db_extra_volumes }}"
ovn_nb_db_extra_volumes: "{{ ovn_db_extra_volumes }}"
ovn_sb_db_extra_volumes: "{{ ovn_db_extra_volumes }}"
ovn_sb_db_relay_extra_volumes: "{{ ovn_db_extra_volumes }}"
#####
# OVN
@ -75,6 +94,19 @@ ovn_openflow_probe_interval: "60"
ovn_db_inactivity_probe: "60000"
ovn_sb_db_inactivity_probe: "{{ ovn_db_inactivity_probe }}"
ovn_nb_db_inactivity_probe: "{{ ovn_db_inactivity_probe }}"
ovn_sb_db_relay_active_inactivity_probe: "{{ ovn_db_inactivity_probe | int * 2}}"
ovn_sb_db_relay_passive_inactivity_probe: "{{ ovn_db_inactivity_probe }}"
ovn_sb_db_relay_max_backoff: "{{ ovn_db_inactivity_probe }}"
# TODO(mnnasiadka): remove that once Debian gets OVS 3.3+
ovn_sb_db_relay_config_file_args: >-
{% if kolla_base_distro == 'debian' %}
--db-sb-relay-remote={{ ovn_sb_connection_no_relay }}
-- --remote=ptcp:{{ ovn_sb_db_relay_port_prefix ~ ovn_sb_db_relay_group_id }}:{{ api_interface_address | put_address_in_context('url') }}
{% else %}
--db-sb-relay-config-file=/etc/ovn/ovsdb-relay.json
{% endif %}
# OVN startup commands
ovn_nb_command: >-
/usr/share/ovn/scripts/ovn-ctl run_nb_ovsdb
@ -94,5 +126,13 @@ ovn_sb_command: >-
--db-sb-pidfile=/run/ovn/ovnsb_db.pid
--db-sb-file=/var/lib/openvswitch/ovn-sb/ovnsb.db
--ovn-sb-logfile=/var/log/kolla/openvswitch/ovn-sb-db.log
ovn_sb_relay_command: >-
/usr/share/ovn/scripts/ovn-ctl run_sb_relay_ovsdb
--db-sb-relay-ctrl-sock=/run/ovn/ovnsb_db_relay_{{ ovn_sb_db_relay_group_id }}.ctl
--db-sb-relay-pidfile=/run/ovn/ovnsb_db_relay_{{ ovn_sb_db_relay_group_id }}.pid
--db-sb-relay-sock=/run/ovn/ovnsb_db_{{ ovn_sb_db_relay_group_id }}.sock
--db-sb-relay-use-remote-in-db=no
--ovn-sb-relay-logfile=/var/log/kolla/openvswitch/ovn-sb-relay-{{ ovn_sb_db_relay_group_id }}.log
{{ ovn_sb_db_relay_config_file_args }}
# Workaround: pause after restarting containers to allow for leader election.
ovn_leader_election_pause: 5

@ -25,6 +25,20 @@
volumes: "{{ service.volumes | reject('equalto', '') | list }}"
dimensions: "{{ service.dimensions }}"
- name: Restart ovn-sb-db-relay container
vars:
service_name: "ovn-sb-db-relay"
service: "{{ ovn_db_services[service_name] }}"
become: true
kolla_container:
action: "recreate_or_restart_container"
common_options: "{{ docker_common_options }}"
name: "{{ service.container_name }}_{{ item }}"
image: "{{ service.image }}"
volumes: "{{ service.volumes | reject('equalto', '') | list }}"
dimensions: "{{ service.dimensions }}"
loop: "{{ range(1, (ovn_sb_db_relay_count | int) + 1) | list }}"
- name: Restart ovn-northd container
vars:
service_name: "ovn-northd"

@ -56,3 +56,20 @@
until: check_ovn_sb_db_port is success
retries: 10
delay: 6
- name: Wait for ovn-sb-db-relay
vars:
ovn_sb_db_relay_group_id: "{{ item }}"
ovn_sb_db_relay_port: "{{ ovn_sb_db_relay_port_prefix ~ ovn_sb_db_relay_group_id }}"
wait_for:
host: "{{ api_interface_address }}"
port: "{{ ovn_sb_db_relay_port }}"
connect_timeout: 1
timeout: 60
register: check_ovn_sb_db_relay_port
until: check_ovn_sb_db_relay_port is success
retries: 10
delay: 6
when:
- enable_ovn_sb_db_relay | bool
loop: "{{ range(1, (ovn_sb_db_relay_count | int) +1) }}"

@ -0,0 +1,35 @@
---
- name: Ensuring config directories exist for OVN relay containers
vars:
ovn_sb_db_relay_group_id: "{{ item }}"
file:
path: "{{ node_config_directory }}/ovn-sb-db-relay-{{ item }}"
state: "directory"
owner: "{{ config_owner_user }}"
group: "{{ config_owner_group }}"
mode: "0770"
become: true
- name: Copying over config.json files for OVN relay services
vars:
# NOTE(mnasiadka): Used in the ovn_sb_relay_command template in defaults/main.yml
ovn_sb_db_relay_group_id: "{{ item }}"
template:
src: "ovn-sb-db-relay.json.j2"
dest: "{{ node_config_directory }}/ovn-sb-db-relay-{{ item }}/config.json"
mode: "0660"
become: true
notify:
- Restart ovn-sb-db-relay container
- name: Generate config files for OVN relay services
vars:
# NOTE(mnasiadka): Used in the ovn_sb_relay_command template in defaults/main.yml
ovn_sb_db_relay_group_id: "{{ item }}"
template:
src: "ovsdb-relay.json.j2"
dest: "{{ node_config_directory }}/ovn-sb-db-relay-{{ item }}/ovsdb-relay.json"
mode: "0660"
become: true
notify:
- Restart ovn-sb-db-relay container

@ -8,6 +8,7 @@
mode: "0770"
become: true
with_dict: "{{ ovn_db_services | select_services_enabled_and_mapped_to_host }}"
when: item.key != 'ovn-sb-db-relay'
- name: Copying over config.json files for services
template:
@ -16,3 +17,9 @@
mode: "0660"
become: true
with_dict: "{{ ovn_db_services | select_services_enabled_and_mapped_to_host }}"
when: item.key != 'ovn-sb-db-relay'
- name: Ensure configuration for relays exists
include_tasks: config-relay.yml
loop: "{{ range(1, ovn_sb_db_relay_count | int + 1) | list }}"
when: enable_ovn_sb_db_relay | bool

@ -1,5 +1,5 @@
{
"command": "/usr/bin/ovn-northd -vconsole:emer -vsyslog:err -vfile:info --ovnnb-db={{ ovn_nb_connection }} --ovnsb-db={{ ovn_sb_connection }} --log-file=/var/log/kolla/openvswitch/ovn-northd.log --pidfile=/run/ovn/ovn-northd.pid --unixctl=/run/ovn/ovn-northd.ctl",
"command": "/usr/bin/ovn-northd -vconsole:emer -vsyslog:err -vfile:info --ovnnb-db={{ ovn_nb_connection }} --ovnsb-db={{ ovn_sb_connection_no_relay }} --log-file=/var/log/kolla/openvswitch/ovn-northd.log --pidfile=/run/ovn/ovn-northd.pid --unixctl=/run/ovn/ovn-northd.ctl",
"permissions": [
{
"path": "/var/log/kolla/openvswitch",

@ -0,0 +1,18 @@
{
"command": "{{ ovn_sb_relay_command }}",
"config_files": [
{
"source": "{{ container_config_directory }}/ovsdb-relay.json",
"dest": "/etc/ovn/ovsdb-relay.json",
"owner": "openvswitch",
"perm": "0600"
}
],
"permissions": [
{
"path": "/var/log/kolla/openvswitch",
"owner": "root:root",
"recurse": true
}
]
}

@ -0,0 +1,20 @@
{
"remotes": {
"ptcp:{{ ovn_sb_db_relay_port_prefix ~ ovn_sb_db_relay_group_id }}:{{ api_interface_address | put_address_in_context('url') }}": {
"inactivity-probe": {{ ovn_sb_db_relay_passive_inactivity_probe }}
}
},
"databases": {
"OVN_Southbound": {
"service-model": "relay",
"source": {
"{{ ovn_sb_connection_no_relay }}": {
"inactivity-probe": {{ ovn_sb_db_relay_active_inactivity_probe }},
"max-backoff": {{ ovn_sb_db_relay_max_backoff }}
}
}
}
}
}

@ -0,0 +1,36 @@
---
- name: "{{ kolla_role_name | default(project_name) }} | Check containers with iteration"
become: true
vars:
service: "{{ outer_item.value }}"
kolla_container:
action: "compare_container"
common_options: "{{ docker_common_options }}"
name: "{{ service.container_name }}"
image: "{{ service.image | default(omit) }}"
volumes: "{{ service.volumes | default(omit) }}"
dimensions: "{{ service.dimensions | default(omit) }}"
tmpfs: "{{ service.tmpfs | default(omit) }}"
volumes_from: "{{ service.volumes_from | default(omit) }}"
privileged: "{{ service.privileged | default(omit) }}"
cap_add: "{{ service.cap_add | default(omit) }}"
environment: "{{ service.environment | default(omit) }}"
healthcheck: "{{ service.healthcheck | default(omit) }}"
ipc_mode: "{{ service.ipc_mode | default(omit) }}"
pid_mode: "{{ service.pid_mode | default(omit) }}"
security_opt: "{{ service.security_opt | default(omit) }}"
labels: "{{ service.labels | default(omit) }}"
command: "{{ service.command | default(omit) }}"
cgroupns_mode: "{{ service.cgroupns_mode | default(omit) }}"
loop:
- "{{ range(1,(service.iterate_var | int) + 1) | list }}"
register: container_check
# NOTE(yoctozepto): Must be a separate task because one cannot see the whole
# result in the previous task and Ansible has a quirk regarding notifiers.
# For details see https://github.com/ansible/ansible/issues/22579
- name: "{{ kolla_role_name | default(project_name) }} | Notify handlers to restart containers"
debug:
msg: Notifying handlers
changed_when: container_check is changed
notify: "Restart {{ outer_item.key }} container"

@ -25,13 +25,26 @@
command: "{{ service.command | default(omit) }}"
cgroupns_mode: "{{ service.cgroupns_mode | default(omit) }}"
with_dict: "{{ lookup('vars', (kolla_role_name | default(project_name)) + '_services') | select_services_enabled_and_mapped_to_host }}"
when: not (service.iterate | default(False)) | bool
register: container_check
# NOTE(yoctozepto): Must be a separate task because one cannot see the whole
# result in the previous task and Ansible has a quirk regarding notifiers.
# For details see https://github.com/ansible/ansible/issues/22579
- name: "{{ kolla_role_name | default(project_name) }} | Notify handlers to restart containers"
vars:
service: "{{ item.value }}"
debug:
msg: Notifying handlers
changed_when: container_check is changed
when: not (service.iterate | default(False)) | bool
notify: "{{ container_check.results | select('changed') | map(attribute='item.key') | map('regex_replace', '^(.*)$', 'Restart \\1 container') | list }}"
- name: Include tasks
vars:
service: "{{ outer_item.value }}"
include_tasks: iterated.yml
loop: "{{ lookup('vars', (kolla_role_name | default(project_name)) + '_services') | select_services_enabled_and_mapped_to_host | dict2items }}"
loop_control:
loop_var: outer_item
when: (service.iterate | default(False)) | bool

@ -1,9 +1,11 @@
---
# List of names of containers to check that are enabled and mapped to this
# host.
# TODO(mnasiadka): Add support for iterated containers (e.g. ovn_sb_db_relay)
service_check_enabled_container_names: >-
{{ lookup('vars', (kolla_role_name | default(project_name)) + '_services') |
select_services_enabled_and_mapped_to_host |
dict2items |
rejectattr('value.iterate', 'defined') |
map(attribute='value.container_name') |
list }}

@ -231,6 +231,33 @@ To change this behaviour you need to set the following:
neutron_ovn_distributed_fip: "yes"
By default, the number of relay groups (``ovn_sb_db_relay_count``) is computed
by dividing the total number of ``ovn-controller`` hosts by the value in
``ovn_sb_db_relay_compute_per_relay`` (which defaults to 50), and rounding up.
For instance, if you have 120 hosts in the ``ovn-controller`` group, you would
get ``ceil(120 / 50) = 3`` relay groups.
You can override ``ovn_sb_db_relay_compute_per_relay`` to scale how many hosts
each relay group handles, for example:
.. code-block:: yaml
ovn_sb_db_relay_compute_per_relay: 25
You can also bypass the automatic calculation and manually set a fixed number
of relay groups with ``ovn_sb_db_relay_count``:
.. code-block:: yaml
ovn_sb_db_relay_count: 10
.. note::
If you set ``ovn_sb_db_relay_count`` explicitly, it effectively overrides
the calculated count based on ``ovn_sb_db_relay_compute_per_relay``.
It is also possible to set a static mapping between a ``ovn-controller`` host
(network node or hypervisor) and particular OVN relay using an Ansible host_var
``ovn_sb_db_relay_client_group_id``.
Similarly - in order to have Neutron DHCP agents deployed in OVN networking
scenario, use:

@ -0,0 +1,15 @@
---
features:
- |
Adds support for deploying ``ovn-sb-db-relay``. By default, Kolla
automatically calculates the number of relay groups based on the size of
the ``ovn-controller`` group and the value of
``ovn_sb_db_relay_compute_per_relay`` (50 by default), rounding up. If you
do not want to use relays, set ``enable_ovn_sb_db_relay`` to ``no`` in
``globals.yml``.
upgrade:
- |
When upgrading, the new ``ovn-sb-db-relay`` containers are automatically
deployed unless you explicitly set ``enable_ovn_sb_db_relay`` to ``no``.
There are no special data migration or manual steps required to enable
``ovn-sb-db-relay``; it coexists alongside the existing OVN SB DB cluster.

@ -672,6 +672,9 @@ ovn-database
[ovn-sb-db:children]
ovn-database
[ovn-sb-db-relay:children]
ovn-database
[venus-api:children]
venus

@ -34,6 +34,9 @@ function test_ovn {
echo "Output: ${OVNSB_STATUS}"
exit 1
fi
echo "OVS entries"
sudo ${container_engine} exec openvswitch_vswitchd ovs-vsctl list open
}
function test_octavia {

@ -127,6 +127,9 @@ def check_json_j2():
'api_interface_address': '',
'kolla_internal_fqdn': '',
'octavia_provider_drivers': '',
'ovn_sb_db_relay_active_inactivity_probe': 120000,
'ovn_sb_db_relay_passive_inactivity_probe': 60000,
'ovn_sb_db_relay_max_backoff': 60000,
'rabbitmq_ha_replica_count': 2,
'rabbitmq_message_ttl_ms': 600000,
'rabbitmq_queue_expiry_ms': 3600000,