From 0bd9a3dc9a24eb141346e28ace770f5369f7b83d Mon Sep 17 00:00:00 2001 From: Alan Bounds Date: Fri, 19 Jun 2026 09:25:44 -0500 Subject: [PATCH] Add CUSTOM_NETGROUP_* traits and Nova network-group-affinity patch Extends the LLDP inspection hook to add CUSTOM_NETGROUP_ traits to Ironic nodes for each '-network' VLAN group they are connected to. These traits are consumed by the new NetworkGroupAffinityFilter and NetworkGroupAntiAffinityFilter in Nova to constrain scheduling to nodes within a specific cabinet switch pair. Changes: - inspect_hook_update_baremetal_ports.py: adds _network_group_trait_name() function and includes CUSTOM_NETGROUP_* traits in _set_node_traits() - _is_our_trait() updated to manage both CUSTOM_*_SWITCH and CUSTOM_NETGROUP_* patterns - Nova patch (0002_network_group_affinity_policy.patch) added to containers/nova/patches/ for quilt application during image build - Tests updated and new test class added for trait functions --- .../0002_network_group_affinity_policy.patch | 267 ++++++++++++++++++ containers/nova/patches/series | 1 + .../inspect_hook_update_baremetal_ports.py | 35 ++- ...est_inspect_hook_update_baremetal_ports.py | 106 ++++++- 4 files changed, 405 insertions(+), 4 deletions(-) create mode 100644 containers/nova/patches/0002_network_group_affinity_policy.patch diff --git a/containers/nova/patches/0002_network_group_affinity_policy.patch b/containers/nova/patches/0002_network_group_affinity_policy.patch new file mode 100644 index 000000000..db6d66120 --- /dev/null +++ b/containers/nova/patches/0002_network_group_affinity_policy.patch @@ -0,0 +1,267 @@ +Add network-group-affinity and network-group-anti-affinity server group +policies for constraining instance placement to specific physical network +groups (VLAN groups / cabinet switch pairs). + +diff --git a/nova/api/openstack/compute/schemas/server_groups.py b/nova/api/openstack/compute/schemas/server_groups.py +index 48f3a11705..d78f9589c1 100644 +--- a/nova/api/openstack/compute/schemas/server_groups.py ++++ b/nova/api/openstack/compute/schemas/server_groups.py +@@ -64,7 +64,8 @@ create_v264['properties']['server_group']['required'].append('policy') + create_v264['properties']['server_group']['properties']['policy'] = { + 'type': 'string', + 'enum': ['anti-affinity', 'affinity', +- 'soft-anti-affinity', 'soft-affinity'], ++ 'soft-anti-affinity', 'soft-affinity', ++ 'network-group-affinity', 'network-group-anti-affinity'], + } + + create_v264['properties']['server_group']['properties']['rules'] = { +@@ -72,6 +73,11 @@ create_v264['properties']['server_group']['properties']['rules'] = { + 'properties': { + 'max_server_per_host': + parameter_types.positive_integer, ++ 'network_group': { ++ 'type': 'string', ++ 'minLength': 1, ++ 'maxLength': 255, ++ }, + }, + 'additionalProperties': False, + } +@@ -157,12 +163,15 @@ _server_group_response_v264['properties'].update({ + 'anti-affinity', + 'soft-affinity', + 'soft-anti-affinity', ++ 'network-group-affinity', ++ 'network-group-anti-affinity', + ], + }, + 'rules': { + 'type': 'object', + 'properties': { + 'max_server_per_host': {'type': 'integer'}, ++ 'network_group': {'type': 'string'}, + }, + 'required': [], + 'additionalProperties': False, +diff --git a/nova/api/openstack/compute/server_groups.py b/nova/api/openstack/compute/server_groups.py +index fc65caa8c6..6838fd8387 100644 +--- a/nova/api/openstack/compute/server_groups.py ++++ b/nova/api/openstack/compute/server_groups.py +@@ -215,14 +215,28 @@ class ServerGroupController(wsgi.Controller): + if api_version_request.is_supported(req, "2.64"): + policy = vals['policy'] + rules = vals.get('rules', {}) +- if policy != 'anti-affinity' and rules: +- msg = _("Only anti-affinity policy supports rules.") ++ if policy == 'anti-affinity': ++ # NOTE(yikun): This should be removed in Stein version. ++ if not _should_enable_custom_max_server_rules(context, rules): ++ msg = _("Creating an anti-affinity group with rule " ++ "max_server_per_host > 1 is not yet supported.") ++ raise exc.HTTPConflict(explanation=msg) ++ elif policy in ('network-group-affinity', ++ 'network-group-anti-affinity'): ++ if 'max_server_per_host' in rules: ++ msg = _("network-group-affinity and " ++ "network-group-anti-affinity policies do not " ++ "support the max_server_per_host rule.") ++ raise exc.HTTPBadRequest(explanation=msg) ++ if 'network_group' not in rules: ++ msg = _("network-group-affinity and " ++ "network-group-anti-affinity policies require " ++ "a network_group rule.") ++ raise exc.HTTPBadRequest(explanation=msg) ++ elif rules: ++ msg = _("Only anti-affinity, network-group-affinity, and " ++ "network-group-anti-affinity policies support rules.") + raise exc.HTTPBadRequest(explanation=msg) +- # NOTE(yikun): This should be removed in Stein version. +- if not _should_enable_custom_max_server_rules(context, rules): +- msg = _("Creating an anti-affinity group with rule " +- "max_server_per_host > 1 is not yet supported.") +- raise exc.HTTPConflict(explanation=msg) + sg = objects.InstanceGroup(context, policy=policy, + rules=rules) + else: +diff --git a/nova/conf/scheduler.py b/nova/conf/scheduler.py +index f936e8f97b..f9276c1259 100644 +--- a/nova/conf/scheduler.py ++++ b/nova/conf/scheduler.py +@@ -330,6 +330,8 @@ Related options: + "ImagePropertiesFilter", + "ServerGroupAntiAffinityFilter", + "ServerGroupAffinityFilter", ++ "NetworkGroupAffinityFilter", ++ "NetworkGroupAntiAffinityFilter", + ], + help=""" + Filters that the scheduler will use. +diff --git a/nova/objects/instance_group.py b/nova/objects/instance_group.py +index 8a12a87693..ead359cfff 100644 +--- a/nova/objects/instance_group.py ++++ b/nova/objects/instance_group.py +@@ -152,6 +152,8 @@ class InstanceGroup(base.NovaPersistentObject, base.NovaObject, + if 'max_server_per_host' in self._rules: + rules['max_server_per_host'] = \ + int(self._rules['max_server_per_host']) ++ if 'network_group' in self._rules: ++ rules['network_group'] = self._rules['network_group'] + return rules + + def obj_make_compatible(self, primitive, target_version): +diff --git a/nova/scheduler/filters/network_group_filter.py b/nova/scheduler/filters/network_group_filter.py +new file mode 100644 +index 0000000000..e717b06e03 +--- /dev/null ++++ b/nova/scheduler/filters/network_group_filter.py +@@ -0,0 +1,135 @@ ++# Copyright 2025 Rackspace Technology, Inc. ++# All Rights Reserved. ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); you may ++# not use this file except in compliance with the License. You may obtain ++# a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT ++# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the ++# License for the specific language governing permissions and limitations ++# under the License. ++ ++"""Scheduler filters for network group affinity and anti-affinity. ++ ++These filters constrain instance placement based on the physical network ++group (VLAN group / cabinet switch pair) that an Ironic node belongs to. ++ ++The network group is specified in a server group's ``rules`` field at ++creation time and is matched against ``CUSTOM_NETGROUP_*`` traits reported ++by Ironic nodes via the Placement service. ++""" ++ ++from oslo_log import log as logging ++ ++from nova.scheduler import filters ++ ++LOG = logging.getLogger(__name__) ++ ++# Prefix used when converting a network group name to a trait. ++# Example: "a1-1-network" -> "CUSTOM_NETGROUP_A1_1_NETWORK" ++_TRAIT_PREFIX = "CUSTOM_NETGROUP_" ++ ++ ++def _network_group_to_trait(network_group): ++ """Convert a network group name to its corresponding Placement trait. ++ ++ :param network_group: The network group name (e.g. "a1-1-network") ++ :returns: The trait string (e.g. "CUSTOM_NETGROUP_A1_1_NETWORK") ++ """ ++ normalised = network_group.upper().replace("-", "_").replace("/", "_") ++ return _TRAIT_PREFIX + normalised ++ ++ ++class NetworkGroupAffinityFilter(filters.BaseHostFilter): ++ """Schedule instances onto hosts within a specific network group. ++ ++ When a server group has the ``network-group-affinity`` policy and a ++ ``network_group`` rule, this filter only passes hosts whose reported ++ traits include the matching ``CUSTOM_NETGROUP_*`` trait. ++ ++ Hosts without the required trait are rejected. ++ """ ++ ++ # The trait set of a host does not change within a single scheduling ++ # request. ++ run_filter_once_per_request = True ++ ++ RUN_ON_REBUILD = False ++ ++ def host_passes(self, host_state, spec_obj): ++ instance_group = spec_obj.instance_group ++ if not instance_group: ++ return True ++ ++ policy = instance_group.policy if instance_group else None ++ if policy != 'network-group-affinity': ++ return True ++ ++ rules = instance_group.rules ++ network_group = rules.get('network_group') if rules else None ++ if not network_group: ++ return True ++ ++ required_trait = _network_group_to_trait(network_group) ++ ++ host_traits = set() ++ if hasattr(host_state, 'traits'): ++ host_traits = host_state.traits ++ ++ passes = required_trait in host_traits ++ if not passes: ++ LOG.debug( ++ "NetworkGroupAffinityFilter: host %(host)s rejected. " ++ "Required trait %(trait)s not found in host traits.", ++ {'host': host_state.host, 'trait': required_trait}) ++ return passes ++ ++ ++class NetworkGroupAntiAffinityFilter(filters.BaseHostFilter): ++ """Schedule instances onto hosts NOT within a specific network group. ++ ++ When a server group has the ``network-group-anti-affinity`` policy and ++ a ``network_group`` rule, this filter rejects hosts whose reported ++ traits include the matching ``CUSTOM_NETGROUP_*`` trait. ++ ++ This is useful for spreading workloads across cabinets or ensuring ++ instances avoid a particular switch pair. ++ """ ++ ++ # The trait set of a host does not change within a single scheduling ++ # request. ++ run_filter_once_per_request = True ++ ++ RUN_ON_REBUILD = False ++ ++ def host_passes(self, host_state, spec_obj): ++ instance_group = spec_obj.instance_group ++ if not instance_group: ++ return True ++ ++ policy = instance_group.policy if instance_group else None ++ if policy != 'network-group-anti-affinity': ++ return True ++ ++ rules = instance_group.rules ++ network_group = rules.get('network_group') if rules else None ++ if not network_group: ++ return True ++ ++ excluded_trait = _network_group_to_trait(network_group) ++ ++ host_traits = set() ++ if hasattr(host_state, 'traits'): ++ host_traits = host_state.traits ++ ++ passes = excluded_trait not in host_traits ++ if not passes: ++ LOG.debug( ++ "NetworkGroupAntiAffinityFilter: host %(host)s rejected. " ++ "Excluded trait %(trait)s found in host traits.", ++ {'host': host_state.host, 'trait': excluded_trait}) ++ return passes +diff --git a/nova/scheduler/utils.py b/nova/scheduler/utils.py +index 58a52ab02d..3d127f5c3a 100644 +--- a/nova/scheduler/utils.py ++++ b/nova/scheduler/utils.py +@@ -1170,7 +1170,8 @@ def _get_group_details(context, instance_uuid, user_group_hosts=None): + return + + policies = set(('anti-affinity', 'affinity', 'soft-affinity', +- 'soft-anti-affinity')) ++ 'soft-anti-affinity', 'network-group-affinity', ++ 'network-group-anti-affinity')) + if group.policy in policies: + if not _SUPPORTS_AFFINITY and 'affinity' == group.policy: + msg = _("ServerGroupAffinityFilter not configured") diff --git a/containers/nova/patches/series b/containers/nova/patches/series index 2d6b2a32b..2aeb04ff0 100644 --- a/containers/nova/patches/series +++ b/containers/nova/patches/series @@ -1,2 +1,3 @@ 0001_trunk_details_metadata.patch ironic-attach-debug.patch +0002_network_group_affinity_policy.patch diff --git a/python/ironic-understack/ironic_understack/inspect_hook_update_baremetal_ports.py b/python/ironic-understack/ironic_understack/inspect_hook_update_baremetal_ports.py index c88e67deb..977310daa 100644 --- a/python/ironic-understack/ironic_understack/inspect_hook_update_baremetal_ports.py +++ b/python/ironic-understack/ironic_understack/inspect_hook_update_baremetal_ports.py @@ -236,16 +236,28 @@ def _set_node_traits(task, vlan_groups: set[str]): For example, a connection to VLAN Group whose name ends in "-storage" will result in a trait being added to the node called "CUSTOM_STORAGE_SWITCH". + We also add a CUSTOM_NETGROUP_ trait for each "-network" VLAN group + the node is connected to. This trait is used by the Nova scheduler's + NetworkGroupAffinityFilter and NetworkGroupAntiAffinityFilter to constrain + instance placement to specific cabinet switch pairs. + We remove pre-existing traits if the node does not have the required connections. - Traits other than CUSTOM_*_SWITCH are left alone. + Traits other than CUSTOM_*_SWITCH and CUSTOM_NETGROUP_* are left alone. """ node = task.node existing_traits = set(node.traits.get_trait_names()) vlan_group_traits = {_trait_name(x) for x in vlan_groups if x} + network_group_traits = { + _network_group_trait_name(x) + for x in vlan_groups + if x and x.endswith("-network") + } irrelevant_existing_traits = {x for x in existing_traits if not _is_our_trait(x)} - required_traits = irrelevant_existing_traits.union(vlan_group_traits) + required_traits = irrelevant_existing_traits.union(vlan_group_traits).union( + network_group_traits + ) if existing_traits == required_traits: LOG.debug( @@ -269,5 +281,22 @@ def _trait_name(vlan_group_name: str) -> str: return f"CUSTOM_{suffix}_SWITCH" +def _network_group_trait_name(vlan_group_name: str) -> str: + """Convert a VLAN group name to a CUSTOM_NETGROUP_* trait. + + This trait is consumed by Nova's NetworkGroupAffinityFilter and + NetworkGroupAntiAffinityFilter to constrain scheduling to nodes + within a specific cabinet / switch pair. + + Example: "a1-1-network" -> "CUSTOM_NETGROUP_A1_1_NETWORK" + Example: "a11-12/a11-13-network" -> "CUSTOM_NETGROUP_A11_12_A11_13_NETWORK" + """ + normalised = vlan_group_name.upper().replace("-", "_").replace("/", "_") + return f"CUSTOM_NETGROUP_{normalised}" + + def _is_our_trait(name: str) -> bool: - return bool(re.match(r"^CUSTOM_[A-Z0-9]+_SWITCH$", name)) + return bool( + re.match(r"^CUSTOM_[A-Z0-9]+_SWITCH$", name) + or re.match(r"^CUSTOM_NETGROUP_[A-Z0-9_]+$", name) + ) diff --git a/python/ironic-understack/ironic_understack/tests/test_inspect_hook_update_baremetal_ports.py b/python/ironic-understack/ironic_understack/tests/test_inspect_hook_update_baremetal_ports.py index 8d3ab3e7b..bb2a57cfa 100644 --- a/python/ironic-understack/ironic_understack/tests/test_inspect_hook_update_baremetal_ports.py +++ b/python/ironic-understack/ironic_understack/tests/test_inspect_hook_update_baremetal_ports.py @@ -6,6 +6,11 @@ from ironic_understack.inspect_hook_update_baremetal_ports import ( InspectHookUpdateBaremetalPorts, ) +from ironic_understack.inspect_hook_update_baremetal_ports import _is_our_trait +from ironic_understack.inspect_hook_update_baremetal_ports import ( + _network_group_trait_name, +) +from ironic_understack.inspect_hook_update_baremetal_ports import _trait_name # load some metaprgramming normally taken care of during Ironic initialization: ironic.objects.register_all() @@ -221,5 +226,104 @@ def test_node_traits_updated(mocker, caplog): mock_node.save.assert_called_once() trait_create.assert_called_once_with( - mock_context, 1234, {"CUSTOM_STORAGE_SWITCH", "CUSTOM_NETWORK_SWITCH", "bar"} + mock_context, + 1234, + { + "CUSTOM_STORAGE_SWITCH", + "CUSTOM_NETWORK_SWITCH", + "CUSTOM_NETGROUP_F20_3_NETWORK", + "bar", + }, ) + + +class TestTraitNames: + def test_trait_name_network(self): + assert _trait_name("f20-3-network") == "CUSTOM_NETWORK_SWITCH" + + def test_trait_name_storage(self): + assert _trait_name("f20-3-storage") == "CUSTOM_STORAGE_SWITCH" + + def test_network_group_trait_simple(self): + assert _network_group_trait_name("a1-1-network") == ( + "CUSTOM_NETGROUP_A1_1_NETWORK" + ) + + def test_network_group_trait_with_datacenter_prefix(self): + assert _network_group_trait_name("f20-3-network") == ( + "CUSTOM_NETGROUP_F20_3_NETWORK" + ) + + def test_network_group_trait_cross_rack(self): + """Cross-rack VLAN groups use slash separator.""" + assert _network_group_trait_name("a11-12/a11-13-network") == ( + "CUSTOM_NETGROUP_A11_12_A11_13_NETWORK" + ) + + def test_is_our_trait_switch_pattern(self): + assert _is_our_trait("CUSTOM_NETWORK_SWITCH") is True + assert _is_our_trait("CUSTOM_STORAGE_SWITCH") is True + assert _is_our_trait("CUSTOM_BMC_SWITCH") is True + + def test_is_our_trait_netgroup_pattern(self): + assert _is_our_trait("CUSTOM_NETGROUP_A1_1_NETWORK") is True + assert _is_our_trait("CUSTOM_NETGROUP_F20_3_NETWORK") is True + assert _is_our_trait("CUSTOM_NETGROUP_A11_12_A11_13_NETWORK") is True + + def test_is_our_trait_unrelated(self): + """Traits we don't manage should not match.""" + assert _is_our_trait("CUSTOM_HW_SOMETHING") is False + assert _is_our_trait("bar") is False + assert _is_our_trait("CUSTOM_NETGROUP") is False + + +class TestNetgroupTraitIncludedInNodeTraits: + """Verify that network group traits are added alongside switch traits.""" + + def test_traits_include_netgroup(self, mocker, caplog): + import logging + + caplog.set_level(logging.DEBUG) + + mock_traits = mocker.Mock() + mock_context = mocker.Mock() + mock_node = mocker.Mock(id=5678, traits=mock_traits) + mock_task = mocker.Mock(node=mock_node, context=mock_context) + + mocker.patch( + "ironic_understack.inspect_hook_update_baremetal_ports." + "ironic_ports_for_node", + return_value=[], + ) + mocker.patch( + "ironic_understack.inspect_hook_update_baremetal_ports." + "CONF.ironic_understack.switch_name_vlan_group_mapping", + MAPPING, + ) + trait_create = mocker.patch( + "ironic_understack.inspect_hook_update_baremetal_ports." + "objects.TraitList.create" + ) + + # Existing traits include one we manage and one we don't + mock_traits.get_trait_names.return_value = [ + "CUSTOM_NETWORK_SWITCH", + "CUSTOM_UNRELATED_THING", + ] + + InspectHookUpdateBaremetalPorts().__call__(mock_task, _INVENTORY, _PLUGIN_DATA) + + mock_node.save.assert_called_once() + created_traits = trait_create.call_args[0][2] + + # Should include both switch-type traits and netgroup trait + assert "CUSTOM_NETWORK_SWITCH" in created_traits + assert "CUSTOM_STORAGE_SWITCH" in created_traits + assert "CUSTOM_NETGROUP_F20_3_NETWORK" in created_traits + # Unrelated trait should be preserved + assert "CUSTOM_UNRELATED_THING" in created_traits + # Old managed traits from different groups should be removed + # (none in this case, but let's verify no spurious ones) + for trait in created_traits: + if trait.startswith("CUSTOM_NETGROUP_"): + assert trait == "CUSTOM_NETGROUP_F20_3_NETWORK"