diff --git a/components/images-openstack.yaml b/components/images-openstack.yaml index c6e33d2a8..38f05a57a 100644 --- a/components/images-openstack.yaml +++ b/components/images-openstack.yaml @@ -42,8 +42,8 @@ images: neutron_metadata: "ghcr.io/rackerlabs/understack/neutron:2026.1" neutron_ovn_metadata: "ghcr.io/rackerlabs/understack/neutron:2026.1" neutron_openvswitch_agent: "ghcr.io/rackerlabs/understack/neutron:2026.1" - neutron_server: "ghcr.io/rackerlabs/understack/neutron:2026.1" - neutron_rpc_server: "ghcr.io/rackerlabs/understack/neutron:2026.1" + neutron_server: "ghcr.io/rackerlabs/understack/neutron:pr-2075" + neutron_rpc_server: "ghcr.io/rackerlabs/understack/neutron:pr-2075" neutron_bagpipe_bgp: "ghcr.io/rackerlabs/understack/neutron:2026.1" neutron_netns_cleanup_cron: "ghcr.io/rackerlabs/understack/neutron:2026.1" diff --git a/python/neutron-understack/neutron_understack/l3_router/svi.py b/python/neutron-understack/neutron_understack/l3_router/svi.py index 619a427af..86cddb0db 100644 --- a/python/neutron-understack/neutron_understack/l3_router/svi.py +++ b/python/neutron-understack/neutron_understack/l3_router/svi.py @@ -1,5 +1,337 @@ -from neutron.services.ovn_l3.service_providers.user_defined import UserDefined +import logging +from neutron.services.l3_router.service_providers import base +from neutron_lib import constants as const +from neutron_lib import exceptions as n_exc +from neutron_lib.callbacks import events +from neutron_lib.callbacks import registry +from neutron_lib.callbacks import resources +from neutron_lib.plugins import constants as plugin_constants +from neutron_lib.plugins import directory -class Svi(UserDefined): - pass +LOG = logging.getLogger(__name__) + +# Full dotted path as stored in the flavor profile's service_providers table. +# Must match the value in understack/components/neutron/values.yaml. +SVI_DRIVER = "neutron_understack.l3_router.svi.Svi" + + +def _is_svi_router(context, router): + flavor_id = router.get("flavor_id") + if not flavor_id or flavor_id is const.ATTR_NOT_SPECIFIED: + LOG.debug("SVI check: router %s has no flavor, skipping", router.get("id")) + return False + flavor_plugin = directory.get_plugin(plugin_constants.FLAVORS) + flavor = flavor_plugin.get_flavor(context, flavor_id) + provider = flavor_plugin.get_flavor_next_provider(context, flavor["id"])[0] + driver = provider["driver"] + is_svi = driver == SVI_DRIVER + LOG.debug( + "SVI check: router %s flavor %s driver %s is_svi=%s", + router.get("id"), + flavor_id, + driver, + is_svi, + ) + return is_svi + + +def _get_subnet_address_scope(context, subnet_id): + """Return (ip_version, address_scope_id). scope is None if not set.""" + core_plugin = directory.get_plugin() + subnet = core_plugin.get_subnet(context, subnet_id) + ip_version = subnet.get("ip_version") + subnetpool_id = subnet.get("subnetpool_id") + if not subnetpool_id: + LOG.debug("Subnet %s has no subnetpool, no address scope", subnet_id) + return ip_version, None + subnetpool = core_plugin.get_subnetpool(context, subnetpool_id) + scope_id = subnetpool.get("address_scope_id") + LOG.debug( + "Subnet %s subnetpool %s address_scope %s (IPv%s)", + subnet_id, + subnetpool_id, + scope_id, + ip_version, + ) + return ip_version, scope_id + + +def _get_existing_router_subnet_ids(context, router_id): + """Return subnet IDs of all internal interfaces already on the router.""" + core_plugin = directory.get_plugin() + ports = core_plugin.get_ports( + context, + filters={ + "device_id": [router_id], + "device_owner": [const.DEVICE_OWNER_ROUTER_INTF], + }, + ) + subnet_ids = [ + ip["subnet_id"] + for port in ports + for ip in port.get("fixed_ips", []) + if ip.get("subnet_id") + ] + LOG.debug( + "Router %s already has %d subnet(s): %s", + router_id, + len(subnet_ids), + subnet_ids, + ) + return subnet_ids + + +def _validate_address_scope_rules(context, router_id, new_subnet_ids): + """Validate address scope rules for subnets being attached to an SVI router. + + Raises BadRequest if any subnet has no scope or conflicts with existing ones. + Returns {ip_version: scope_id} for the new subnets. + """ + new_scopes: dict[int, str] = {} + LOG.debug( + "SVI scope check: router %s validating new subnet(s)=%s", + router_id, + new_subnet_ids, + ) + + # Rule 1: every new subnet must belong to an address scope + for subnet_id in new_subnet_ids: + ip_version, scope_id = _get_subnet_address_scope(context, subnet_id) + if not scope_id: + LOG.warning( + "SVI scope check FAILED: subnet %s has no address scope (router %s)", + subnet_id, + router_id, + ) + raise n_exc.BadRequest( + resource="router", + msg=( + f"Subnet {subnet_id} must belong to an address scope " + "to attach to an SVI router." + ), + ) + if ip_version in new_scopes and new_scopes[ip_version] != scope_id: + LOG.warning( + "SVI scope check FAILED: IPv%s conflict in new attach on router %s - " + "subnet %s scope=%s previous_scope=%s", + ip_version, + router_id, + subnet_id, + scope_id, + new_scopes[ip_version], + ) + raise n_exc.BadRequest( + resource="router", + msg=( + f"Cannot attach subnets {new_subnet_ids!r}: IPv{ip_version} " + f"address scope {scope_id!r} differs from scope " + f"{new_scopes[ip_version]!r} in the same request." + ), + ) + new_scopes[ip_version] = scope_id + LOG.debug( + "SVI scope check resolved: router %(router)s subnet %(subnet)s " + "IPv%(ip_version)s scope %(scope)s", + { + "router": router_id, + "subnet": subnet_id, + "ip_version": ip_version, + "scope": scope_id, + }, + ) + + LOG.debug( + "SVI scope check requested scopes: router %(router)s scopes=%(scopes)s", + {"router": router_id, "scopes": new_scopes}, + ) + + # Rule 2: must not conflict with existing interfaces (per IP version) + for existing_subnet_id in _get_existing_router_subnet_ids(context, router_id): + ip_version, existing_scope = _get_subnet_address_scope( + context, existing_subnet_id + ) + if ip_version not in new_scopes: + LOG.debug( + "SVI scope check compare skipped: router %(router)s existing " + "subnet %(subnet)s is IPv%(ip_version)s with no new subnet in " + "that IP family", + { + "router": router_id, + "subnet": existing_subnet_id, + "ip_version": ip_version, + }, + ) + continue + LOG.debug( + "SVI scope check compare: router %(router)s IPv%(ip_version)s " + "new_scope=%(new_scope)s existing_scope=%(existing_scope)s " + "existing_subnet=%(subnet)s", + { + "router": router_id, + "ip_version": ip_version, + "new_scope": new_scopes[ip_version], + "existing_scope": existing_scope, + "subnet": existing_subnet_id, + }, + ) + if not existing_scope: + LOG.warning( + "SVI scope check FAILED: existing IPv%s subnet %s on router %s " + "has no address scope", + ip_version, + existing_subnet_id, + router_id, + ) + raise n_exc.BadRequest( + resource="router", + msg=( + f"Existing subnet {existing_subnet_id} on router {router_id} " + "must belong to an address scope before attaching more " + "subnets to an SVI router." + ), + ) + if existing_scope != new_scopes[ip_version]: + LOG.warning( + "SVI scope check FAILED: IPv%s conflict on router %s - " + "new=%s existing=%s (from subnet %s)", + ip_version, + router_id, + new_scopes[ip_version], + existing_scope, + existing_subnet_id, + ) + raise n_exc.BadRequest( + resource="router", + msg=( + f"Cannot attach subnet {new_subnet_ids!r}: its IPv{ip_version} " + f"address scope {new_scopes[ip_version]!r} differs from " + f"scope {existing_scope!r} already in use on router {router_id}." + ), + ) + + return new_scopes + + +def validate_svi_router_port(plugin_context, port): + """Standalone SVI scope validator called from create_port_precommit. + + Fires before port is committed, so invalid subnets never reach the VLAN + allocation / trunk / Undersync steps in create_port_postcommit. + Raises BadRequest if validation fails. + Returns True when an SVI router interface was validated, otherwise False. + """ + device_owner = port.get("device_owner") + if device_owner != const.DEVICE_OWNER_ROUTER_INTF: + LOG.debug( + "precommit SVI scope check skipped: port %s owner %s is not an " + "internal router interface", + port.get("id"), + device_owner, + ) + return False + + router_id = port.get("device_id") + if not router_id: + return False + + try: + l3_plugin = directory.get_plugin(plugin_constants.L3) + router = l3_plugin.get_router(plugin_context, router_id) + except Exception: + LOG.exception( + "precommit SVI scope check failed to fetch router %s for port %s", + router_id, + port.get("id"), + ) + raise + + if not _is_svi_router(plugin_context, router): + LOG.debug( + "precommit SVI scope check skipped: router %(router)s is not SVI " + "(name=%(name)s flavor=%(flavor)s) for port %(port)s", + { + "router": router_id, + "name": router.get("name"), + "flavor": router.get("flavor_id"), + "port": port.get("id"), + }, + ) + return False + + new_subnet_ids = [ + ip["subnet_id"] for ip in port.get("fixed_ips", []) if ip.get("subnet_id") + ] + LOG.info( + "precommit SVI scope check: router %s (%s) port %s network %s subnet(s)=%s", + router_id, + router.get("name"), + port.get("id"), + port.get("network_id"), + new_subnet_ids, + ) + + new_scopes = _validate_address_scope_rules( + plugin_context, router_id, new_subnet_ids + ) + + LOG.info( + "precommit SVI scope check PASSED: router %s port %s subnet(s)=%s scopes=%s", + router_id, + port.get("id"), + new_subnet_ids, + new_scopes, + ) + return True + + +@registry.has_registry_receivers +class Svi(base.L3ServiceProvider): + ha_support = base.OPTIONAL + + def __init__(self, l3_plugin): + super().__init__(l3_plugin) + LOG.info("SVI service provider initialized: driver=%r", SVI_DRIVER) + + @registry.receives(resources.ROUTER_INTERFACE, [events.BEFORE_CREATE]) + def _validate_svi_router_interface(self, _resource, _event, _trigger, payload): + router = payload.states[0] + context = payload.context + router_id = payload.resource_id + + if not _is_svi_router(context, router): + LOG.debug( + "SVI callback validation skipped: router %(router)s is not SVI " + "(name=%(name)s flavor=%(flavor)s)", + { + "router": router_id, + "name": router.get("name"), + "flavor": router.get("flavor_id"), + }, + ) + return + + port = payload.metadata["port"] + new_subnet_ids = [ + ip["subnet_id"] for ip in port.get("fixed_ips", []) if ip.get("subnet_id") + ] + LOG.info( + "SVI callback validation: router %s (%s) port %s network %s " + "owner %s subnet(s)=%s", + router_id, + router.get("name"), + port.get("id"), + port.get("network_id"), + port.get("device_owner"), + new_subnet_ids, + ) + + new_scopes = _validate_address_scope_rules(context, router_id, new_subnet_ids) + + LOG.info( + "SVI callback validation PASSED: router %s port %s subnet(s)=%s scopes=%s", + router_id, + port.get("id"), + new_subnet_ids, + new_scopes, + ) diff --git a/python/neutron-understack/neutron_understack/neutron_understack_mech.py b/python/neutron-understack/neutron_understack/neutron_understack_mech.py index 850b56efd..9bdb6a9d9 100644 --- a/python/neutron-understack/neutron_understack/neutron_understack_mech.py +++ b/python/neutron-understack/neutron_understack/neutron_understack_mech.py @@ -17,6 +17,7 @@ from neutron_understack import routers from neutron_understack import utils from neutron_understack.ironic import IronicClient +from neutron_understack.l3_router import svi as svi_router from neutron_understack.trunk import UnderStackTrunkDriver from neutron_understack.undersync import Undersync @@ -140,7 +141,42 @@ def delete_subnet_postcommit(self, context): pass def create_port_precommit(self, context: PortContext): - pass + # Early SVI address scope check fires before port is committed and + # before create_port_postcommit, so invalid subnets never reach the + # VLAN allocation / trunk / Undersync steps. + # Neutron surfaces the BadRequest back through the router-interface API. + # The ROUTER_INTERFACE BEFORE_CREATE callback in svi.py acts as a + # second safety net if postcommit somehow ran first. + if utils.is_router_interface(context): + LOG.info( + "create_port_precommit: SVI scope check for router port %s " + "device_id=%s", + context.current["id"], + context.current.get("device_id"), + ) + checked = svi_router.validate_svi_router_port( + context.plugin_context, context.current + ) + if checked: + LOG.info( + "create_port_precommit: SVI scope check passed for port %(port)s " + "network %(network)s router %(router)s", + { + "port": context.current["id"], + "network": context.current.get("network_id"), + "router": context.current.get("device_id"), + }, + ) + else: + LOG.debug( + "create_port_precommit: SVI scope check not applicable for " + "port %(port)s owner %(owner)s router %(router)s", + { + "port": context.current["id"], + "owner": context.current.get("device_owner"), + "router": context.current.get("device_id"), + }, + ) def create_port_postcommit(self, context: PortContext) -> None: # Provide network node(s) with connectivity to the networks where this @@ -153,7 +189,34 @@ def create_port_postcommit(self, context: PortContext) -> None: ) if utils.is_router_interface(context): - routers.create_port_postcommit(context) + LOG.info( + "Router interface port %(port)s detected on network %(net)s " + "device_id=%(router)s owner=%(owner)s fixed_ips=%(fixed_ips)s " + "- handing off to routers.create_port_postcommit", + { + "port": port["id"], + "net": port["network_id"], + "router": port.get("device_id"), + "owner": port.get("device_owner"), + "fixed_ips": port.get("fixed_ips", []), + }, + ) + try: + routers.create_port_postcommit(context) + except Exception: + LOG.exception( + "Router interface postcommit failed for port %(port)s " + "network %(network)s router %(router)s owner %(owner)s " + "fixed_ips=%(fixed_ips)s", + { + "port": port["id"], + "network": port["network_id"], + "router": port.get("device_id"), + "owner": port.get("device_owner"), + "fixed_ips": port.get("fixed_ips", []), + }, + ) + raise def update_port_precommit(self, context): pass diff --git a/python/neutron-understack/neutron_understack/routers.py b/python/neutron-understack/neutron_understack/routers.py index 6aabb318e..d7c1a870e 100644 --- a/python/neutron-understack/neutron_understack/routers.py +++ b/python/neutron-understack/neutron_understack/routers.py @@ -44,26 +44,80 @@ def create_port_postcommit(context: PortContext) -> None: In situation 2, we don't have to do anything. """ network_id = context.current["network_id"] + port_id = context.current["id"] + router_id = context.current.get("device_id") if not is_only_router_port_on_network( network_id=network_id, transaction_context=context.plugin_context ): - LOG.debug( - "Creating only a router port %(port)s for a network %(network)s " - "as there are already other routers on the same network.", - {"port": context.current["id"], "network": network_id}, + LOG.info( + "Router port %(port)s on router %(router)s: network %(network)s " + "already has another router interface - VLAN infrastructure already " + "exists, nothing to do", + {"port": port_id, "router": router_id, "network": network_id}, ) return + LOG.info( + "Router port %(port)s on router %(router)s: first router on network " + "%(network)s - starting VLAN/trunk/OVN setup", + {"port": port_id, "router": router_id, "network": network_id}, + ) + + # Step 1: allocate dynamic VLAN segment for the network node physnet + LOG.debug( + "Step 1: allocating dynamic VLAN segment for network %(net)s", + {"net": network_id}, + ) segment = fetch_or_create_router_segment(context) + LOG.info( + "Step 1 done: dynamic segment %(seg)s VLAN %(vlan)s allocated for " + "network %(net)s", + { + "seg": segment["id"], + "vlan": segment.get("segmentation_id"), + "net": network_id, + }, + ) - # Trunk + # Step 2: create shared Neutron port on the network node for this VLAN + LOG.debug( + "Step 2: creating shared Neutron port for segment %(seg)s", + {"seg": segment["id"]}, + ) shared_port = utils.create_neutron_port_for_segment(segment, context) + LOG.info( + "Step 2 done: shared Neutron port %(port)s created on network %(network)s", + {"port": shared_port["id"], "network": network_id}, + ) + + # Step 3: add the shared port as a subport on the network node trunk + LOG.debug( + "Step 3: adding port %(port)s as subport (VLAN %(vlan)s) to network node trunk", + {"port": shared_port["id"], "vlan": segment.get("segmentation_id")}, + ) add_subport_to_trunk(shared_port, segment) + LOG.info("Step 3 done: subport added to trunk") - # OVN + # Step 4: create OVN localnet port to wire OVN to the physical VLAN segment_obj = utils.network_segment_by_id(segment["id"]) + LOG.debug( + "Step 4: creating OVN localnet port uplink-%(seg)s for network %(net)s", + {"seg": segment["id"], "net": network_id}, + ) create_uplink_port(segment_obj, network_id) + LOG.info( + "Step 4 done: OVN localnet port uplink-%(seg)s created for router " + "%(router)s - network=%(network)s shared_port=%(port)s vlan=%(vlan)s. " + "Undersync will push SVI config to physical switch", + { + "seg": segment["id"], + "router": router_id, + "network": network_id, + "port": shared_port["id"], + "vlan": segment.get("segmentation_id"), + }, + ) def is_only_router_port_on_network( @@ -78,7 +132,15 @@ def is_only_router_port_on_network( device_owner=ROUTER_INTERFACE_AND_GW, ) - LOG.debug("Router ports found: %(ports)s", {"ports": other_router_ports}) + router_port_ids = [getattr(port, "id", None) for port in other_router_ports] + LOG.info( + "Router-owned ports on network %(network)s: count=%(count)d ids=%(ports)s", + { + "network": network_id, + "count": len(other_router_ports), + "ports": router_port_ids, + }, + ) return not len(other_router_ports) > 1 @@ -96,13 +158,44 @@ def add_subport_to_trunk(shared_port: PortDict, segment: NetworkSegmentDict) -> }, ] } - trunk_id = utils.fetch_network_node_trunk_id() + try: + trunk_id = utils.fetch_network_node_trunk_id() + except Exception: + LOG.exception( + "add_subport_to_trunk: failed to find network node trunk for " + "shared port %(port)s VLAN %(vlan)s", + { + "port": shared_port["id"], + "vlan": segment["segmentation_id"], + }, + ) + raise - utils.fetch_trunk_plugin().add_subports( - context=n_context.get_admin_context(), - trunk_id=trunk_id, - subports=subports, + LOG.debug( + "add_subport_to_trunk: trunk %(trunk)s port %(port)s VLAN %(vlan)s", + { + "trunk": trunk_id, + "port": shared_port["id"], + "vlan": segment["segmentation_id"], + }, ) + try: + utils.fetch_trunk_plugin().add_subports( + context=n_context.get_admin_context(), + trunk_id=trunk_id, + subports=subports, + ) + except Exception: + LOG.exception( + "add_subport_to_trunk: failed adding shared port %(port)s as " + "VLAN %(vlan)s subport to trunk %(trunk)s", + { + "port": shared_port["id"], + "vlan": segment["segmentation_id"], + "trunk": trunk_id, + }, + ) + raise def fetch_or_create_router_segment(context: PortContext) -> NetworkSegmentDict: @@ -114,14 +207,29 @@ def fetch_or_create_router_segment(context: PortContext) -> NetworkSegmentDict: network_id = context.current["network_id"] physnet = cfg.CONF.ml2_understack.network_node_switchport_physnet if not physnet: + LOG.error( + "Router dynamic segment allocation failed for network %(network)s: " + "ml2_understack.network_node_switchport_physnet is not configured", + {"network": network_id}, + ) raise ValueError( "please configure ml2_understack.network_node_switchport_physnet" ) + LOG.info( + "Fetching or allocating router dynamic segment: network=%(network)s " + "physnet=%(physnet)s", + {"network": network_id, "physnet": physnet}, + ) segment = utils.allocate_dynamic_segment( network_id=network_id, physnet=physnet, ) if not segment: + LOG.error( + "Router dynamic segment allocation returned no segment: " + "network=%(network)s physnet=%(physnet)s", + {"network": network_id, "physnet": physnet}, + ) raise Exception( "failed allocating dynamic segment for" "network_id=%(network_id)s physnet=%(physnet)s", @@ -174,8 +282,21 @@ def create_uplink_port(segment: NetworkSegment, network_id: str, txn=None) -> No ovn_const.LSP_OPTIONS_MCAST_FLOOD: ovs_conf.get_igmp_flood(), ovn_const.LSP_OPTIONS_LOCALNET_LEARN_FDB: fdb_enabled, } - cmd = ovn_client()._nb_idl.create_lswitch_port( - lport_name=f"uplink-{segment['id']}", + port_name = f"uplink-{segment['id']}" + LOG.info( + "Creating OVN localnet port %(port)s: network=%(network)s " + "physnet=%(physnet)s vlan=%(vlan)s options=%(options)s", + { + "port": port_name, + "network": network_id, + "physnet": physnet, + "vlan": tag, + "options": options, + }, + ) + client = ovn_client() + cmd = client._nb_idl.create_lswitch_port( + lport_name=port_name, lswitch_name=ovn_utils.ovn_name(network_id), addresses=[ovn_const.UNKNOWN_ADDR], external_ids={}, @@ -183,7 +304,20 @@ def create_uplink_port(segment: NetworkSegment, network_id: str, txn=None) -> No tag=tag, options=options, ) - ovn_client()._transaction([cmd], txn=txn) + try: + client._transaction([cmd], txn=txn) + except Exception: + LOG.exception( + "Failed creating OVN localnet port %(port)s: network=%(network)s " + "physnet=%(physnet)s vlan=%(vlan)s", + { + "port": port_name, + "network": network_id, + "physnet": physnet, + "vlan": tag, + }, + ) + raise def link_vxlan_network_ha_chassis_group(_resource, _event, _trigger, payload) -> None: @@ -325,11 +459,27 @@ def handle_router_interface_removal(_resource, _event, trigger, payload) -> None if port.device_owner not in ROUTER_INTERFACE_AND_GW: return + LOG.info( + "Router interface removal: port %(port)s router %(router)s network " + "%(network)s owner %(owner)s", + { + "port": port.id, + "router": port.device_id, + "network": network_id, + "owner": port.device_owner, + }, + ) + if not is_only_router_port_on_network(network_id): - LOG.debug( - "Deleting only Router port %(port)s as there are other" - " router ports using the same network", - {"port": port}, + LOG.info( + "Router interface removal: port %(port)s router %(router)s on " + "network %(network)s is not the last router interface, skipping " + "shared infrastructure cleanup", + { + "port": port.id, + "router": port.device_id, + "network": network_id, + }, ) return @@ -337,6 +487,16 @@ def handle_router_interface_removal(_resource, _event, trigger, payload) -> None if not segment: return + LOG.info( + "Router interface removal: last router interface on network %(network)s " + "- cleaning shared infrastructure for segment %(segment)s vlan %(vlan)s", + { + "network": network_id, + "segment": segment["id"], + "vlan": segment.get("segmentation_id"), + }, + ) + shared_port = fetch_shared_router_port(segment) if not shared_port: return @@ -344,13 +504,25 @@ def handle_router_interface_removal(_resource, _event, trigger, payload) -> None handle_subport_removal(shared_port) delete_uplink_port(segment, network_id) shared_port.delete() + LOG.info( + "Router interface removal complete: network %(network)s segment " + "%(segment)s shared_port %(port)s cleaned up", + { + "network": network_id, + "segment": segment["id"], + "port": shared_port["id"], + }, + ) def handle_subport_removal(port: Port) -> None: """Removes router's subport from a network node trunk.""" trunk_id = utils.fetch_network_node_trunk_id() - LOG.debug("Router, Removing subport: %s(port)s", {"port": port}) port_id = port["id"] + LOG.info( + "Removing router shared subport %(port)s from trunk %(trunk)s", + {"port": port_id, "trunk": trunk_id}, + ) try: utils.remove_subport_from_trunk(trunk_id, port_id) except Exception as err: