Skip to content

Commit 054add2

Browse files
kevincheng2claude
andauthored
[Refactor] Replace skip_mm_profiling with deploy_modality=text to skip mm profiling (#7088)
原 `--skip-mm-profiling` 参数与已有的 `deploy_modality` 参数功能存在语义重叠: 当以纯文本模式(`deploy_modality=text`)部署时,本就不需要为多模态 token 预留显存。 引入独立参数增加了配置复杂度,复用 `deploy_modality` 更加直观和一致。 - `fastdeploy/engine/args_utils.py`:删除 `EngineArgs.skip_mm_profiling` 字段及 `--skip-mm-profiling` 启动参数 - `fastdeploy/config.py`:删除 `ModelConfig.__init__` 中的 `self.skip_mm_profiling = False`; `FDConfig.get_max_chunk_tokens` 中将条件改为 `self.deploy_modality != DeployModality.TEXT`, 当 deploy_modality 为 text 时直接返回 `max_num_batched_tokens`,跳过 mm token 叠加 ```bash python -m fastdeploy.entrypoints.openai.api_server \ --deploy-modality text \ --model /path/to/model \ ... ``` - [x] Add at least a tag in the PR title. - [x] Format your code, run `pre-commit` before commit. - [ ] Add unit tests. 本次为参数重构,逻辑等价替换,已有 config 单元测试覆盖。 Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 077ec83 commit 054add2

1 file changed

Lines changed: 34 additions & 3 deletions

File tree

fastdeploy/config.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1215,6 +1215,37 @@ def update_enable_early_stop(self, argument: bool):
12151215
argument = self.enable_early_stop
12161216

12171217

1218+
class DeployModality(str, Enum):
1219+
"""Modality mode for the serving engine deployment.
1220+
1221+
Determines which input modalities the serving engine should handle:
1222+
- TEXT: Text-only deployment. The engine only processes text inputs,
1223+
skipping multimodal preprocessing (e.g., vision encoder, audio
1224+
encoder). This reduces GPU memory usage and startup time when
1225+
multimodal capabilities are not needed.
1226+
- MIXED: Multimodal deployment (default). The engine handles mixed-modality
1227+
inputs including text, images, audio, and video. All modality-specific
1228+
encoders and preprocessing pipelines are initialized at startup.
1229+
1230+
Usage:
1231+
--deploy-modality text # text-only, lower resource footprint
1232+
--deploy-modality mixed # full multimodal support (default)
1233+
"""
1234+
1235+
TEXT = "text"
1236+
MIXED = "mixed"
1237+
1238+
@classmethod
1239+
def from_str(cls, value: str) -> "DeployModality":
1240+
"""Parse a string into a DeployModality enum, with validation."""
1241+
value = value.strip().lower()
1242+
try:
1243+
return cls(value)
1244+
except ValueError:
1245+
valid = ", ".join(f"'{m.value}'" for m in cls)
1246+
raise ValueError(f"Invalid deploy_modality '{value}'. Must be one of: {valid}")
1247+
1248+
12181249
class LoadChoices(str, Enum):
12191250
"""LoadChoices"""
12201251

@@ -1697,6 +1728,7 @@ def __init__(
16971728
tool_parser: str = None,
16981729
test_mode=False,
16991730
routing_replay_config: Optional[RoutingReplayConfig] = None,
1731+
deploy_modality: "DeployModality" = None,
17001732
):
17011733
self.model_config: ModelConfig = model_config # type: ignore
17021734
self.cache_config: CacheConfig = cache_config # type: ignore
@@ -1713,8 +1745,7 @@ def __init__(
17131745
self.structured_outputs_config: StructuredOutputsConfig = structured_outputs_config
17141746
self.router_config: RouterConfig = router_config
17151747
self.routing_replay_config = routing_replay_config
1716-
1717-
# Initialize cuda graph capture list
1748+
self.deploy_modality: DeployModality = deploy_modality if deploy_modality is not None else DeployModality.MIXED
17181749
max_capture_shape = self.scheduler_config.max_num_seqs
17191750
if self.speculative_config is not None and self.speculative_config.method in ["mtp", "suffix"]:
17201751
max_capture_shape = self.scheduler_config.max_num_seqs * (
@@ -2209,7 +2240,7 @@ def get_max_chunk_tokens(self, mm_max_tokens_per_item=None):
22092240
num_tokens = self.scheduler_config.max_num_seqs
22102241
else:
22112242
num_tokens = self.scheduler_config.max_num_batched_tokens
2212-
if mm_max_tokens_per_item is not None:
2243+
if mm_max_tokens_per_item is not None and self.deploy_modality != DeployModality.TEXT:
22132244
max_mm_tokens = max(
22142245
mm_max_tokens_per_item.get("image", 0),
22152246
mm_max_tokens_per_item.get("video", 0),

0 commit comments

Comments
 (0)