@@ -767,6 +767,47 @@ def pytest_runtest_setup(item):
767767 except ImportError :
768768 pass
769769
770+ # Warm up Ollama models when entering Ollama group
771+ if current_group == "ollama" and prev_group != "ollama" :
772+ logger = FancyLogger .get_logger ()
773+ host_str = os .environ .get ("OLLAMA_HOST" , "127.0.0.1" )
774+ port = os .environ .get ("OLLAMA_PORT" , "11434" )
775+ logger .info (
776+ "Warming up ollama models before ollama group (keep_alive=-1)..."
777+ )
778+ for model in ["granite4:micro" , "granite4:micro-h" , "granite3.2-vision" ]:
779+ try :
780+ requests .post (
781+ f"http://{ host_str } :{ port } /api/generate" ,
782+ json = {
783+ "model" : model ,
784+ "prompt" : "hi" ,
785+ "stream" : False ,
786+ "keep_alive" : - 1 ,
787+ },
788+ timeout = 120 ,
789+ )
790+ logger .info (" Warmed up and pinned: %s" , model )
791+ except Exception as e :
792+ logger .warning (" Warmup failed for %s: %s" , model , e )
793+
794+ # Evict Ollama models when leaving Ollama group
795+ if prev_group == "ollama" and current_group != "ollama" :
796+ logger = FancyLogger .get_logger ()
797+ host_str = os .environ .get ("OLLAMA_HOST" , "127.0.0.1" )
798+ port = os .environ .get ("OLLAMA_PORT" , "11434" )
799+ logger .info ("Evicting ollama models from VRAM after ollama group..." )
800+ for model in ["granite4:micro" , "granite4:micro-h" , "granite3.2-vision" ]:
801+ try :
802+ requests .post (
803+ f"http://{ host_str } :{ port } /api/generate" ,
804+ json = {"model" : model , "keep_alive" : 0 },
805+ timeout = 10 ,
806+ )
807+ logger .info (" Evicted: %s" , model )
808+ except Exception as e :
809+ logger .warning (" Eviction failed for %s: %s" , model , e )
810+
770811 pytest_runtest_setup ._last_backend_group = current_group
771812
772813 # Check for override flags from CLI
0 commit comments