fix: add compat shim for Nemotron's is_flash_attn_greater_or_equal_2_10

RobotSail · RobotSail · commit b2bd5364dc65 · 2026-04-16T20:09:57.000Z
The function was renamed to is_flash_attn_greater_or_equal in
transformers 5.x, but Nemotron's HF Hub remote code still imports
the old name. Inject a compatibility wrapper before model loading.
diff --git a/src/instructlab/training/model.py b/src/instructlab/training/model.py
@@ -84,6 +84,17 @@ def __init__(
         if self.is_granitemoehybrid or self.is_nemotronh:
             self._use_local_mamba_kernels()
 
+        # Compatibility shim for Nemotron's HF Hub remote code which imports
+        # is_flash_attn_greater_or_equal_2_10, renamed in transformers 5.x.
+        if self.is_nemotronh:
+            # Third Party
+            from transformers.utils import import_utils as _iu
+
+            if not hasattr(_iu, "is_flash_attn_greater_or_equal_2_10"):
+                _iu.is_flash_attn_greater_or_equal_2_10 = lambda: (
+                    _iu.is_flash_attn_greater_or_equal("2.10")
+                )
+
         if self.is_gpt_oss:
             # Third Party
             quant_config = Mxfp4Config(dequantize=True)