From 0bd9a3dc9a24eb141346e28ace770f5369f7b83d Mon Sep 17 00:00:00 2001
From: Alan Bounds <abounds@rackspace.com>
Date: Fri, 19 Jun 2026 09:25:44 -0500
Subject: [PATCH] Add CUSTOM_NETGROUP_* traits and Nova network-group-affinity
 patch

Extends the LLDP inspection hook to add CUSTOM_NETGROUP_<name> traits
to Ironic nodes for each '-network' VLAN group they are connected to.
These traits are consumed by the new NetworkGroupAffinityFilter and
NetworkGroupAntiAffinityFilter in Nova to constrain scheduling to
nodes within a specific cabinet switch pair.

Changes:
- inspect_hook_update_baremetal_ports.py: adds _network_group_trait_name()
  function and includes CUSTOM_NETGROUP_* traits in _set_node_traits()
- _is_our_trait() updated to manage both CUSTOM_*_SWITCH and
  CUSTOM_NETGROUP_* patterns
- Nova patch (0002_network_group_affinity_policy.patch) added to
  containers/nova/patches/ for quilt application during image build
- Tests updated and new test class added for trait functions
---
 .../0002_network_group_affinity_policy.patch  | 267 ++++++++++++++++++
 containers/nova/patches/series                |   1 +
 .../inspect_hook_update_baremetal_ports.py    |  35 ++-
 ...est_inspect_hook_update_baremetal_ports.py | 106 ++++++-
 4 files changed, 405 insertions(+), 4 deletions(-)
 create mode 100644 containers/nova/patches/0002_network_group_affinity_policy.patch
diff --git a/containers/nova/patches/0002_network_group_affinity_policy.patch b/containers/nova/patches/0002_network_group_affinity_policy.patch
new file mode 100644
index 000000000..db6d66120
--- /dev/null
+++ b/containers/nova/patches/0002_network_group_affinity_policy.patch
@@ -0,0 +1,267 @@
+Add network-group-affinity and network-group-anti-affinity server group
+policies for constraining instance placement to specific physical network
+groups (VLAN groups / cabinet switch pairs).
+
+diff --git a/nova/api/openstack/compute/schemas/server_groups.py b/nova/api/openstack/compute/schemas/server_groups.py
+index 48f3a11705..d78f9589c1 100644
+--- a/nova/api/openstack/compute/schemas/server_groups.py
++++ b/nova/api/openstack/compute/schemas/server_groups.py
+@@ -64,7 +64,8 @@ create_v264['properties']['server_group']['required'].append('policy')
+ create_v264['properties']['server_group']['properties']['policy'] = {
+     'type': 'string',
+     'enum': ['anti-affinity', 'affinity',
+-             'soft-anti-affinity', 'soft-affinity'],
++             'soft-anti-affinity', 'soft-affinity',
++             'network-group-affinity', 'network-group-anti-affinity'],
+ }
+ 
+ create_v264['properties']['server_group']['properties']['rules'] = {
+@@ -72,6 +73,11 @@ create_v264['properties']['server_group']['properties']['rules'] = {
+     'properties': {
+         'max_server_per_host':
+             parameter_types.positive_integer,
++        'network_group': {
++            'type': 'string',
++            'minLength': 1,
++            'maxLength': 255,
++        },
+     },
+     'additionalProperties': False,
+ }
+@@ -157,12 +163,15 @@ _server_group_response_v264['properties'].update({
+             'anti-affinity',
+             'soft-affinity',
+             'soft-anti-affinity',
++            'network-group-affinity',
++            'network-group-anti-affinity',
+         ],
+     },
+     'rules': {
+         'type': 'object',
+         'properties': {
+             'max_server_per_host': {'type': 'integer'},
++            'network_group': {'type': 'string'},
+         },
+         'required': [],
+         'additionalProperties': False,
+diff --git a/nova/api/openstack/compute/server_groups.py b/nova/api/openstack/compute/server_groups.py
+index fc65caa8c6..6838fd8387 100644
+--- a/nova/api/openstack/compute/server_groups.py
++++ b/nova/api/openstack/compute/server_groups.py
+@@ -215,14 +215,28 @@ class ServerGroupController(wsgi.Controller):
+         if api_version_request.is_supported(req, "2.64"):
+             policy = vals['policy']
+             rules = vals.get('rules', {})
+-            if policy != 'anti-affinity' and rules:
+-                msg = _("Only anti-affinity policy supports rules.")
++            if policy == 'anti-affinity':
++                # NOTE(yikun): This should be removed in Stein version.
++                if not _should_enable_custom_max_server_rules(context, rules):
++                    msg = _("Creating an anti-affinity group with rule "
++                            "max_server_per_host > 1 is not yet supported.")
++                    raise exc.HTTPConflict(explanation=msg)
++            elif policy in ('network-group-affinity',
++                            'network-group-anti-affinity'):
++                if 'max_server_per_host' in rules:
++                    msg = _("network-group-affinity and "
++                            "network-group-anti-affinity policies do not "
++                            "support the max_server_per_host rule.")
++                    raise exc.HTTPBadRequest(explanation=msg)
++                if 'network_group' not in rules:
++                    msg = _("network-group-affinity and "
++                            "network-group-anti-affinity policies require "
++                            "a network_group rule.")
++                    raise exc.HTTPBadRequest(explanation=msg)
++            elif rules:
++                msg = _("Only anti-affinity, network-group-affinity, and "
++                        "network-group-anti-affinity policies support rules.")
+                 raise exc.HTTPBadRequest(explanation=msg)
+-            # NOTE(yikun): This should be removed in Stein version.
+-            if not _should_enable_custom_max_server_rules(context, rules):
+-                msg = _("Creating an anti-affinity group with rule "
+-                        "max_server_per_host > 1 is not yet supported.")
+-                raise exc.HTTPConflict(explanation=msg)
+             sg = objects.InstanceGroup(context, policy=policy,
+                                        rules=rules)
+         else:
+diff --git a/nova/conf/scheduler.py b/nova/conf/scheduler.py
+index f936e8f97b..f9276c1259 100644
+--- a/nova/conf/scheduler.py
++++ b/nova/conf/scheduler.py
+@@ -330,6 +330,8 @@ Related options:
+             "ImagePropertiesFilter",
+             "ServerGroupAntiAffinityFilter",
+             "ServerGroupAffinityFilter",
++            "NetworkGroupAffinityFilter",
++            "NetworkGroupAntiAffinityFilter",
+         ],
+         help="""
+ Filters that the scheduler will use.
+diff --git a/nova/objects/instance_group.py b/nova/objects/instance_group.py
+index 8a12a87693..ead359cfff 100644
+--- a/nova/objects/instance_group.py
++++ b/nova/objects/instance_group.py
+@@ -152,6 +152,8 @@ class InstanceGroup(base.NovaPersistentObject, base.NovaObject,
+         if 'max_server_per_host' in self._rules:
+             rules['max_server_per_host'] = \
+                     int(self._rules['max_server_per_host'])
++        if 'network_group' in self._rules:
++            rules['network_group'] = self._rules['network_group']
+         return rules
+ 
+     def obj_make_compatible(self, primitive, target_version):
+diff --git a/nova/scheduler/filters/network_group_filter.py b/nova/scheduler/filters/network_group_filter.py
+new file mode 100644
+index 0000000000..e717b06e03
+--- /dev/null
++++ b/nova/scheduler/filters/network_group_filter.py
+@@ -0,0 +1,135 @@
++# Copyright 2025 Rackspace Technology, Inc.
++# All Rights Reserved.
++#
++# Licensed under the Apache License, Version 2.0 (the "License"); you may
++# not use this file except in compliance with the License. You may obtain
++# a copy of the License at
++#
++# http://www.apache.org/licenses/LICENSE-2.0
++#
++# Unless required by applicable law or agreed to in writing, software
++# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
++# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
++# License for the specific language governing permissions and limitations
++# under the License.
++
++"""Scheduler filters for network group affinity and anti-affinity.
++
++These filters constrain instance placement based on the physical network
++group (VLAN group / cabinet switch pair) that an Ironic node belongs to.
++
++The network group is specified in a server group's ``rules`` field at
++creation time and is matched against ``CUSTOM_NETGROUP_*`` traits reported
++by Ironic nodes via the Placement service.
++"""
++
++from oslo_log import log as logging
++
++from nova.scheduler import filters
++
++LOG = logging.getLogger(__name__)
++
++# Prefix used when converting a network group name to a trait.
++# Example: "a1-1-network" -> "CUSTOM_NETGROUP_A1_1_NETWORK"
++_TRAIT_PREFIX = "CUSTOM_NETGROUP_"
++
++
++def _network_group_to_trait(network_group):
++    """Convert a network group name to its corresponding Placement trait.
++
++    :param network_group: The network group name (e.g. "a1-1-network")
++    :returns: The trait string (e.g. "CUSTOM_NETGROUP_A1_1_NETWORK")
++    """
++    normalised = network_group.upper().replace("-", "_").replace("/", "_")
++    return _TRAIT_PREFIX + normalised
++
++
++class NetworkGroupAffinityFilter(filters.BaseHostFilter):
++    """Schedule instances onto hosts within a specific network group.
++
++    When a server group has the ``network-group-affinity`` policy and a
++    ``network_group`` rule, this filter only passes hosts whose reported
++    traits include the matching ``CUSTOM_NETGROUP_*`` trait.
++
++    Hosts without the required trait are rejected.
++    """
++
++    # The trait set of a host does not change within a single scheduling
++    # request.
++    run_filter_once_per_request = True
++
++    RUN_ON_REBUILD = False
++
++    def host_passes(self, host_state, spec_obj):
++        instance_group = spec_obj.instance_group
++        if not instance_group:
++            return True
++
++        policy = instance_group.policy if instance_group else None
++        if policy != 'network-group-affinity':
++            return True
++
++        rules = instance_group.rules
++        network_group = rules.get('network_group') if rules else None
++        if not network_group:
++            return True
++
++        required_trait = _network_group_to_trait(network_group)
++
++        host_traits = set()
++        if hasattr(host_state, 'traits'):
++            host_traits = host_state.traits
++
++        passes = required_trait in host_traits
++        if not passes:
++            LOG.debug(
++                "NetworkGroupAffinityFilter: host %(host)s rejected. "
++                "Required trait %(trait)s not found in host traits.",
++                {'host': host_state.host, 'trait': required_trait})
++        return passes
++
++
++class NetworkGroupAntiAffinityFilter(filters.BaseHostFilter):
++    """Schedule instances onto hosts NOT within a specific network group.
++
++    When a server group has the ``network-group-anti-affinity`` policy and
++    a ``network_group`` rule, this filter rejects hosts whose reported
++    traits include the matching ``CUSTOM_NETGROUP_*`` trait.
++
++    This is useful for spreading workloads across cabinets or ensuring
++    instances avoid a particular switch pair.
++    """
++
++    # The trait set of a host does not change within a single scheduling
++    # request.
++    run_filter_once_per_request = True
++
++    RUN_ON_REBUILD = False
++
++    def host_passes(self, host_state, spec_obj):
++        instance_group = spec_obj.instance_group
++        if not instance_group:
++            return True
++
++        policy = instance_group.policy if instance_group else None
++        if policy != 'network-group-anti-affinity':
++            return True
++
++        rules = instance_group.rules
++        network_group = rules.get('network_group') if rules else None
++        if not network_group:
++            return True
++
++        excluded_trait = _network_group_to_trait(network_group)
++
++        host_traits = set()
++        if hasattr(host_state, 'traits'):
++            host_traits = host_state.traits
++
++        passes = excluded_trait not in host_traits
++        if not passes:
++            LOG.debug(
++                "NetworkGroupAntiAffinityFilter: host %(host)s rejected. "
++                "Excluded trait %(trait)s found in host traits.",
++                {'host': host_state.host, 'trait': excluded_trait})
++        return passes
+diff --git a/nova/scheduler/utils.py b/nova/scheduler/utils.py
+index 58a52ab02d..3d127f5c3a 100644
+--- a/nova/scheduler/utils.py
++++ b/nova/scheduler/utils.py
+@@ -1170,7 +1170,8 @@ def _get_group_details(context, instance_uuid, user_group_hosts=None):
+         return
+ 
+     policies = set(('anti-affinity', 'affinity', 'soft-affinity',
+-                    'soft-anti-affinity'))
++                    'soft-anti-affinity', 'network-group-affinity',
++                    'network-group-anti-affinity'))
+     if group.policy in policies:
+         if not _SUPPORTS_AFFINITY and 'affinity' == group.policy:
+             msg = _("ServerGroupAffinityFilter not configured")
diff --git a/containers/nova/patches/series b/containers/nova/patches/series
index 2d6b2a32b..2aeb04ff0 100644
--- a/containers/nova/patches/series
+++ b/containers/nova/patches/series
@@ -1,2 +1,3 @@
 0001_trunk_details_metadata.patch
 ironic-attach-debug.patch
+0002_network_group_affinity_policy.patch
diff --git a/python/ironic-understack/ironic_understack/inspect_hook_update_baremetal_ports.py b/python/ironic-understack/ironic_understack/inspect_hook_update_baremetal_ports.py
index c88e67deb..977310daa 100644
--- a/python/ironic-understack/ironic_understack/inspect_hook_update_baremetal_ports.py
+++ b/python/ironic-understack/ironic_understack/inspect_hook_update_baremetal_ports.py
@@ -236,16 +236,28 @@ def _set_node_traits(task, vlan_groups: set[str]):
     For example, a connection to VLAN Group whose name ends in "-storage" will
     result in a trait being added to the node called "CUSTOM_STORAGE_SWITCH".
 
+    We also add a CUSTOM_NETGROUP_<name> trait for each "-network" VLAN group
+    the node is connected to. This trait is used by the Nova scheduler's
+    NetworkGroupAffinityFilter and NetworkGroupAntiAffinityFilter to constrain
+    instance placement to specific cabinet switch pairs.
+
     We remove pre-existing traits if the node does not have the required
     connections.
 
-    Traits other than CUSTOM_*_SWITCH are left alone.
+    Traits other than CUSTOM_*_SWITCH and CUSTOM_NETGROUP_* are left alone.
     """
     node = task.node
     existing_traits = set(node.traits.get_trait_names())
     vlan_group_traits = {_trait_name(x) for x in vlan_groups if x}
+    network_group_traits = {
+        _network_group_trait_name(x)
+        for x in vlan_groups
+        if x and x.endswith("-network")
+    }
     irrelevant_existing_traits = {x for x in existing_traits if not _is_our_trait(x)}
-    required_traits = irrelevant_existing_traits.union(vlan_group_traits)
+    required_traits = irrelevant_existing_traits.union(vlan_group_traits).union(
+        network_group_traits
+    )
 
     if existing_traits == required_traits:
         LOG.debug(
@@ -269,5 +281,22 @@ def _trait_name(vlan_group_name: str) -> str:
     return f"CUSTOM_{suffix}_SWITCH"
 
 
+def _network_group_trait_name(vlan_group_name: str) -> str:
+    """Convert a VLAN group name to a CUSTOM_NETGROUP_* trait.
+
+    This trait is consumed by Nova's NetworkGroupAffinityFilter and
+    NetworkGroupAntiAffinityFilter to constrain scheduling to nodes
+    within a specific cabinet / switch pair.
+
+    Example: "a1-1-network" -> "CUSTOM_NETGROUP_A1_1_NETWORK"
+    Example: "a11-12/a11-13-network" -> "CUSTOM_NETGROUP_A11_12_A11_13_NETWORK"
+    """
+    normalised = vlan_group_name.upper().replace("-", "_").replace("/", "_")
+    return f"CUSTOM_NETGROUP_{normalised}"
+
+
 def _is_our_trait(name: str) -> bool:
-    return bool(re.match(r"^CUSTOM_[A-Z0-9]+_SWITCH$", name))
+    return bool(
+        re.match(r"^CUSTOM_[A-Z0-9]+_SWITCH$", name)
+        or re.match(r"^CUSTOM_NETGROUP_[A-Z0-9_]+$", name)
+    )
diff --git a/python/ironic-understack/ironic_understack/tests/test_inspect_hook_update_baremetal_ports.py b/python/ironic-understack/ironic_understack/tests/test_inspect_hook_update_baremetal_ports.py
index 8d3ab3e7b..bb2a57cfa 100644
--- a/python/ironic-understack/ironic_understack/tests/test_inspect_hook_update_baremetal_ports.py
+++ b/python/ironic-understack/ironic_understack/tests/test_inspect_hook_update_baremetal_ports.py
@@ -6,6 +6,11 @@
 from ironic_understack.inspect_hook_update_baremetal_ports import (
     InspectHookUpdateBaremetalPorts,
 )
+from ironic_understack.inspect_hook_update_baremetal_ports import _is_our_trait
+from ironic_understack.inspect_hook_update_baremetal_ports import (
+    _network_group_trait_name,
+)
+from ironic_understack.inspect_hook_update_baremetal_ports import _trait_name
 
 # load some metaprgramming normally taken care of during Ironic initialization:
 ironic.objects.register_all()
@@ -221,5 +226,104 @@ def test_node_traits_updated(mocker, caplog):
 
     mock_node.save.assert_called_once()
     trait_create.assert_called_once_with(
-        mock_context, 1234, {"CUSTOM_STORAGE_SWITCH", "CUSTOM_NETWORK_SWITCH", "bar"}
+        mock_context,
+        1234,
+        {
+            "CUSTOM_STORAGE_SWITCH",
+            "CUSTOM_NETWORK_SWITCH",
+            "CUSTOM_NETGROUP_F20_3_NETWORK",
+            "bar",
+        },
     )
+
+
+class TestTraitNames:
+    def test_trait_name_network(self):
+        assert _trait_name("f20-3-network") == "CUSTOM_NETWORK_SWITCH"
+
+    def test_trait_name_storage(self):
+        assert _trait_name("f20-3-storage") == "CUSTOM_STORAGE_SWITCH"
+
+    def test_network_group_trait_simple(self):
+        assert _network_group_trait_name("a1-1-network") == (
+            "CUSTOM_NETGROUP_A1_1_NETWORK"
+        )
+
+    def test_network_group_trait_with_datacenter_prefix(self):
+        assert _network_group_trait_name("f20-3-network") == (
+            "CUSTOM_NETGROUP_F20_3_NETWORK"
+        )
+
+    def test_network_group_trait_cross_rack(self):
+        """Cross-rack VLAN groups use slash separator."""
+        assert _network_group_trait_name("a11-12/a11-13-network") == (
+            "CUSTOM_NETGROUP_A11_12_A11_13_NETWORK"
+        )
+
+    def test_is_our_trait_switch_pattern(self):
+        assert _is_our_trait("CUSTOM_NETWORK_SWITCH") is True
+        assert _is_our_trait("CUSTOM_STORAGE_SWITCH") is True
+        assert _is_our_trait("CUSTOM_BMC_SWITCH") is True
+
+    def test_is_our_trait_netgroup_pattern(self):
+        assert _is_our_trait("CUSTOM_NETGROUP_A1_1_NETWORK") is True
+        assert _is_our_trait("CUSTOM_NETGROUP_F20_3_NETWORK") is True
+        assert _is_our_trait("CUSTOM_NETGROUP_A11_12_A11_13_NETWORK") is True
+
+    def test_is_our_trait_unrelated(self):
+        """Traits we don't manage should not match."""
+        assert _is_our_trait("CUSTOM_HW_SOMETHING") is False
+        assert _is_our_trait("bar") is False
+        assert _is_our_trait("CUSTOM_NETGROUP") is False
+
+
+class TestNetgroupTraitIncludedInNodeTraits:
+    """Verify that network group traits are added alongside switch traits."""
+
+    def test_traits_include_netgroup(self, mocker, caplog):
+        import logging
+
+        caplog.set_level(logging.DEBUG)
+
+        mock_traits = mocker.Mock()
+        mock_context = mocker.Mock()
+        mock_node = mocker.Mock(id=5678, traits=mock_traits)
+        mock_task = mocker.Mock(node=mock_node, context=mock_context)
+
+        mocker.patch(
+            "ironic_understack.inspect_hook_update_baremetal_ports."
+            "ironic_ports_for_node",
+            return_value=[],
+        )
+        mocker.patch(
+            "ironic_understack.inspect_hook_update_baremetal_ports."
+            "CONF.ironic_understack.switch_name_vlan_group_mapping",
+            MAPPING,
+        )
+        trait_create = mocker.patch(
+            "ironic_understack.inspect_hook_update_baremetal_ports."
+            "objects.TraitList.create"
+        )
+
+        # Existing traits include one we manage and one we don't
+        mock_traits.get_trait_names.return_value = [
+            "CUSTOM_NETWORK_SWITCH",
+            "CUSTOM_UNRELATED_THING",
+        ]
+
+        InspectHookUpdateBaremetalPorts().__call__(mock_task, _INVENTORY, _PLUGIN_DATA)
+
+        mock_node.save.assert_called_once()
+        created_traits = trait_create.call_args[0][2]
+
+        # Should include both switch-type traits and netgroup trait
+        assert "CUSTOM_NETWORK_SWITCH" in created_traits
+        assert "CUSTOM_STORAGE_SWITCH" in created_traits
+        assert "CUSTOM_NETGROUP_F20_3_NETWORK" in created_traits
+        # Unrelated trait should be preserved
+        assert "CUSTOM_UNRELATED_THING" in created_traits
+        # Old managed traits from different groups should be removed
+        # (none in this case, but let's verify no spurious ones)
+        for trait in created_traits:
+            if trait.startswith("CUSTOM_NETGROUP_"):
+                assert trait == "CUSTOM_NETGROUP_F20_3_NETWORK"