Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/11494.fix.md
Comment thread
jopemachine marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Report `current_revision_id` correctly on deployment responses during rolling updates.
2 changes: 1 addition & 1 deletion src/ai/backend/manager/api/adapters/deployment/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2191,7 +2191,7 @@ def _deployment_data_to_dto(data: ModelDeploymentData) -> DeploymentNode:
created_user_id=data.created_user_id,
options=deployment_options_to_info(data.options),
scaling_state=data.scaling_state,
current_revision_id=data.revision.id if data.revision is not None else None,
current_revision_id=data.current_revision_id,
deploying_revision_id=data.deploying_revision_id,
policy=policy_info,
)
Expand Down
15 changes: 11 additions & 4 deletions src/ai/backend/manager/api/rest/service/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
DeploymentNetworkSpec,
ExecutionSpec,
ImageIdentifierDraft,
ModelRevisionSpec,
ModelRevisionSpecDraft,
MountMetadata,
ReplicaSpec,
Expand Down Expand Up @@ -167,6 +168,14 @@ def _serve_info_from_dto(dto: ServiceInfo, runtime_variant_name: RuntimeVariant)
)


def _resolve_target_revision_spec(info: DeploymentInfo) -> ModelRevisionSpec | None:
"""Resolve the target revision spec by id (current first, then deploying)."""
target_id = info.current_revision_id or info.deploying_revision_id
if target_id is None:
return None
return next((r for r in info.model_revisions if r.revision_id == target_id), None)


def _serve_info_from_deployment_info(
deployment_info: DeploymentInfo,
runtime_variant_name: RuntimeVariant,
Expand All @@ -177,7 +186,7 @@ def _serve_info_from_deployment_info(
active revision's ``runtime_variant_id`` (internal data types are
id-only; the legacy REST response still exposes the name string).
"""
model_revision = deployment_info.model_revisions[0] if deployment_info.model_revisions else None
model_revision = _resolve_target_revision_spec(deployment_info)

return ServeInfoModel(
endpoint_id=deployment_info.id,
Expand Down Expand Up @@ -378,9 +387,7 @@ async def create(self, body: BodyParam[NewServiceRequestModel], req: RequestCtx)
await self._deployment.create_legacy_deployment.wait_for_complete(deployment_action)
)
deployment_info = deployment_result.data
model_revision = (
deployment_info.model_revisions[0] if deployment_info.model_revisions else None
)
model_revision = _resolve_target_revision_spec(deployment_info)
if model_revision is None:
raise RuntimeVariantNotFound()
runtime_variant_name = await self._resolve_runtime_variant_name(
Expand Down
1 change: 1 addition & 0 deletions src/ai/backend/manager/data/deployment/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,7 @@ class ModelDeploymentData:
metadata: ModelDeploymentMetadataInfo
network_access: DeploymentNetworkSpec
revision: ModelRevisionData | None
current_revision_id: DeploymentRevisionID | None
deploying_revision_id: DeploymentRevisionID | None
revision_history_ids: list[DeploymentRevisionID]
scaling_rule_ids: list[UUID]
Expand Down
1 change: 1 addition & 0 deletions src/ai/backend/manager/models/endpoint/row.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ class EndpointRow(Base): # type: ignore[misc]
"DeploymentRevisionRow",
back_populates="endpoint_row",
primaryjoin=_get_endpoint_revisions_join_condition,
order_by="DeploymentRevisionRow.revision_number.desc()",
Comment thread
jopemachine marked this conversation as resolved.
)

auto_scaling_policy: Mapped[DeploymentAutoScalingPolicyRow | None] = relationship(
Expand Down
20 changes: 17 additions & 3 deletions src/ai/backend/manager/services/deployment/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,23 @@ def _convert_deployment_info_to_data(info: DeploymentInfo) -> ModelDeploymentDat

Note: Some fields are set to defaults as DeploymentInfo doesn't have all the data.
"""
# Map revision if available
revision: ModelRevisionData | None = None
if info.model_revisions:
rev = info.model_revisions[0]
rev: ModelRevisionSpec | None = None
if info.current_revision_id is not None:
rev = next(
(r for r in info.model_revisions if r.revision_id == info.current_revision_id),
None,
)
if rev is None:
log.error(
"Deployment {} has current_revision_id {} but no matching "
"ModelRevisionSpec was found in DeploymentInfo.model_revisions; "
"current_revision will be reported as null. This usually means "
"EndpointRow.revisions was not eagerly loaded by the caller.",
info.id,
info.current_revision_id,
)
if rev is not None:
if rev.revision_id is None:
raise ValueError(f"ModelRevisionSpec has no revision_id for deployment {info.id}")
revision = ModelRevisionData(
Expand Down Expand Up @@ -283,6 +296,7 @@ def _convert_deployment_info_to_data(info: DeploymentInfo) -> ModelDeploymentDat
network_access=info.network,
revision_history_ids=[info.current_revision_id] if info.current_revision_id else [],
revision=revision,
current_revision_id=info.current_revision_id,
deploying_revision_id=info.deploying_revision_id,
scaling_rule_ids=[], # Not available in DeploymentInfo
replica_state=ReplicaStateData(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from __future__ import annotations

import uuid
from collections.abc import Callable
from datetime import UTC, datetime
from typing import cast
from unittest.mock import AsyncMock, MagicMock
Expand All @@ -21,6 +22,7 @@
)
from ai.backend.common.dto.manager.v2.deployment.types import IntOrPercent
from ai.backend.common.identifier.deployment import DeploymentID
from ai.backend.common.identifier.deployment_revision import DeploymentRevisionID
from ai.backend.common.identifier.image import ImageID
from ai.backend.common.identifier.runtime_variant import RuntimeVariantID
from ai.backend.common.identifier.vfolder import VFolderUUID
Expand Down Expand Up @@ -50,7 +52,9 @@
ExecutionSpec,
ModelMountConfigData,
ModelRevisionData,
ModelRevisionSpec,
ModelRuntimeConfigData,
MountMetadata,
ReplicaSpec,
ResourceConfigData,
ResourceSpec,
Expand All @@ -76,7 +80,10 @@
AddModelRevisionAction,
)
from ai.backend.manager.services.deployment.processors import DeploymentProcessors
from ai.backend.manager.services.deployment.service import DeploymentService
from ai.backend.manager.services.deployment.service import (
DeploymentService,
_convert_deployment_info_to_data,
)
from ai.backend.manager.sokovan.deployment import DeploymentController


Expand Down Expand Up @@ -634,3 +641,72 @@ async def test_persists_coordinator_jwt_instead_of_random(
creator = cast(RBACEntityCreator[object], repo_call.args[0])
spec = cast(EndpointTokenCreatorSpec, creator.spec)
assert spec.token == sample_coordinator_jwt


class TestConvertDeploymentInfoToData:
"""Regression test for ``_convert_deployment_info_to_data`` (BA-5963)."""

@pytest.fixture
def make_revision_spec(self) -> Callable[[], ModelRevisionSpec]:
def make() -> ModelRevisionSpec:
return ModelRevisionSpec(
revision_id=DeploymentRevisionID(uuid.uuid4()),
image_id=ImageID(uuid.uuid4()),
resource_spec=ResourceSpec(
cluster_mode=ClusterMode.SINGLE_NODE,
cluster_size=1,
resource_slots={"cpu": "1"},
),
mounts=MountMetadata(
model_vfolder_id=VFolderUUID(uuid.uuid4()),
model_definition_path="model-definition.yaml",
model_mount_destination="/models",
extra_mounts=[],
),
execution=ExecutionSpec(
runtime_variant_id=RuntimeVariantID(uuid.uuid4()),
),
)

return make

def test_current_revision_resolved_by_id_match_not_list_order(
self,
make_revision_spec: Callable[[], ModelRevisionSpec],
) -> None:
"""Pin: revision lookup must use explicit ``current_revision_id``, not list[0]."""
deploying_spec = make_revision_spec()
current_spec = make_revision_spec()

deployment_info = DeploymentInfo(
id=DeploymentID(uuid.uuid4()),
metadata=DeploymentMetadata(
name="ba5963-test",
Comment thread
jopemachine marked this conversation as resolved.
domain="default",
project=uuid.uuid4(),
resource_group="default",
created_user=uuid.uuid4(),
session_owner=uuid.uuid4(),
created_at=datetime(2024, 1, 1, tzinfo=UTC),
revision_history_limit=10,
),
state=DeploymentState(
lifecycle=EndpointLifecycle.DEPLOYING,
scaling_state=ScalingState.STABLE,
retry_count=0,
),
replica_spec=ReplicaSpec(replica_count=1),
network=DeploymentNetworkSpec(open_to_public=False),
model_revisions=[deploying_spec, current_spec],
options=DeploymentOptions(),
current_revision_id=current_spec.revision_id,
deploying_revision_id=deploying_spec.revision_id,
)

deployment_data = _convert_deployment_info_to_data(deployment_info)

assert deployment_data.current_revision_id == current_spec.revision_id
assert deployment_data.deploying_revision_id == deploying_spec.revision_id
assert deployment_data.current_revision_id != deployment_data.deploying_revision_id
assert deployment_data.revision is not None
assert deployment_data.revision.id == current_spec.revision_id
Loading