[Qwen3VL] Add clear_grpah_opt_backend method to Qwen3VLForConditionalGeneration (#7110)

Guo-Yilong · web-flow · commit c55f2e00da19 · 2026-03-31T20:37:04.000+08:00
Add clear_grpah_opt_backend method that delegates to the underlying model
to clear cuda graph optimization backend.
diff --git a/fastdeploy/model_executor/models/qwen3_vl/qwen3_vl.py b/fastdeploy/model_executor/models/qwen3_vl/qwen3_vl.py
@@ -381,6 +381,10 @@ def forward(
 
         return hidden_states
 
+    def clear_grpah_opt_backend(self):
+        """Clear graph optimization backend, the captured cuda graph will be cleaned"""
+        self.model.clear_grpah_opt_backend(fd_config=self.fd_config)
+
 
 class Qwen3VLPretrainedModel(PretrainedModel):
     """Utilities for tensor-parallel weight splitting."""