fix: sofai graph coloring example — broken model and incorrect problem (#806) (#807)

planetf1 · web-flow · commit 8860f6465472 · 2026-04-10T12:25:34.000Z
- Replace phi:2.7b (crashes with GGUF sampler assertion) with granite4:micro - Replace qwen3-4b-thinking with granite4:latest (already used elsewhere) - Fix graph/description mismatch: graph dict was a path but description claimed a triangle, making the problem unsolvable with 2 colors - Use odd 5-cycle (A-B-C-D-E-A) with 3 colors — non-trivial enough that granite4:micro consistently fails, properly exercising the SOFAI retry loop and S1→S2 escalation Closes #806
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -372,8 +372,6 @@ models must be pulled locally before running the tests that need them.
 - `granite4:latest` — melp examples
 - `llama3.2` — repair-with-guardian example
 - `llama3.2:3b` — tutorial / mify examples (via `META_LLAMA_3_2_3B`)
-- `phi:2.7b` — SOFAI graph-colouring example
-- `pielee/qwen3-4b-thinking-2507_q8:latest` — SOFAI S2 solver
 - `qwen2.5vl:7b` — vision (OpenAI-via-Ollama) example
 
 **Additional test models (`test/`):**
@@ -390,7 +388,7 @@ Pull everything:
 ```bash
 for m in granite4:micro granite4:micro-h deepseek-r1:8b \
   granite3-guardian:2b granite3.2-vision granite3.3:8b granite4:latest \
-  llama3.2 llama3.2:3b phi:2.7b pielee/qwen3-4b-thinking-2507_q8:latest \
+  llama3.2 llama3.2:3b \
   qwen2.5vl:7b granite4:small-h llama3.2:1b llama3:8b llava mistral:7b \
   smollm2:1.7b; do ollama pull "$m"; done
 ```
diff --git a/docs/examples/sofai/README.md b/docs/examples/sofai/README.md
@@ -51,8 +51,8 @@ from mellea.stdlib.sampling import SOFAISamplingStrategy
 from mellea.stdlib.requirements import req
 
 # Create fast and slow backends
-s1_backend = OllamaModelBackend(model_id="phi:2.7b")
-s2_backend = OllamaModelBackend(model_id="qwen3-4b-thinking")
+s1_backend = OllamaModelBackend(model_id="granite4:micro")
+s2_backend = OllamaModelBackend(model_id="granite4:latest")
 
 # Create SOFAI strategy
 strategy = SOFAISamplingStrategy(
@@ -99,16 +99,16 @@ SOFAISamplingStrategy(
 ## Model Selection
 
 ### Fast Models (S1)
-- phi:2.7b
-- llama2:7b
+
+- granite4:micro
+- llama3.2:3b
 - mistral:7b
-- granite-3.2-8b-instruct
 
 ### Slow Models (S2)
-- qwen3-4b-thinking
+
+- granite4:latest
 - llama3:70b
 - mixtral:8x7b
-- granite-3.3-8b-instruct
 
 ## Performance Tips
 
diff --git a/docs/examples/sofai/sofai_graph_coloring.py b/docs/examples/sofai/sofai_graph_coloring.py
@@ -7,8 +7,8 @@
 
 In this example, we use the SOFAI sampling strategy. Because we wrote this
 example to run on consumer grade hardware, each model is still relatively small:
-1. S1 Solver (phi:2.7b) - Fast model with iterative feedback loop
-2. S2 Solver (qwen3-4b-thinking) - Slow model, called once on escalation
+1. S1 Solver (granite4:micro) - Fast model with iterative feedback loop
+2. S2 Solver (granite4:latest) - Slow model, called once on escalation
 3. Custom validator - Provides detailed feedback for constraint violations
 
 Note: This example uses a custom validator (check_graph_coloring). To use the
@@ -29,15 +29,21 @@
 from mellea.stdlib.requirements import ValidationResult, req
 from mellea.stdlib.sampling import SOFAISamplingStrategy
 
-# Define the graph coloring problem
-graph = {"A": ["B"], "B": ["A", "C"], "C": ["B"]}
-colors = ["Red", "Blue"]
+# Define the graph coloring problem — an odd 5-cycle (needs 3 colors;
+# small models often fail on the first attempt, exercising the SOFAI loop).
+graph = {
+    "A": ["B", "E"],
+    "B": ["A", "C"],
+    "C": ["B", "D"],
+    "D": ["C", "E"],
+    "E": ["D", "A"],
+}
+colors = ["Red", "Blue", "Green"]
 
 graph_description = (
-    f"Color the nodes of the graph (A, B, C) using at most {len(colors)} colors "
+    f"Color the nodes of the graph (A, B, C, D, E) using at most {len(colors)} colors "
     f"({', '.join(colors)}). Adjacent nodes must have different colors. "
-    f"The adjacencies are: A is adjacent to B and C; B is adjacent to A and C; "
-    f"C is adjacent to A and B."
+    f"The adjacencies are: A-B, B-C, C-D, D-E, E-A."
 )
 
 output_format_instruction = (
@@ -136,10 +142,8 @@ def check_graph_coloring(ctx) -> ValidationResult:
 def main():
     """Run the graph coloring example with SOFAI strategy."""
     # Initialize backends
-    s1_solver_backend = OllamaModelBackend(model_id="phi:2.7b")
-    s2_solver_backend = OllamaModelBackend(
-        model_id="pielee/qwen3-4b-thinking-2507_q8:latest"
-    )
+    s1_solver_backend = OllamaModelBackend(model_id="granite4:micro")
+    s2_solver_backend = OllamaModelBackend(model_id="granite4:latest")
 
     # Optional: Initialize judge backend for LLM-as-Judge validation
     # Uncomment to use a third model for validation instead of custom validator
@@ -191,9 +195,9 @@ def main():
 
         # Determine which solver was used
         if i < solver_1_attempts:
-            solver_name = "S1 Solver (phi:2.7b)"
+            solver_name = "S1 Solver (granite4:micro)"
         else:
-            solver_name = "S2 Solver (qwen3-4b-thinking)"
+            solver_name = "S2 Solver (granite4:latest)"
 
         print(f"Solver: {solver_name}")