Address review: precompute class mapping once, improve test robustness

jeongyoonlee · claude · jeongyoonlee · commit 009a109aa5a9 · 2026-03-13T09:59:03.000-07:00
- Build class_to_forest_idx dict once in predict() instead of per tree
- Use model.n_jobs instead of parallel_backend for parallel test
- Assert that sparse-group condition actually occurred in test

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/causalml/inference/tree/uplift.pyx b/causalml/inference/tree/uplift.pyx
@@ -61,18 +61,20 @@ cdef extern from "math.h":
     double sqrt(double x) nogil
 
 
-def _align_tree_predict(tree, X, forest_classes):
+def _align_tree_predict(tree, X, forest_classes, class_to_forest_idx):
     """Predict with a single tree and align output to the forest's classes.
 
     When a bootstrap sample excludes some treatment groups, the tree's
     classes_ will be a subset of the forest's classes_. This function
     maps the tree's predictions to the forest-level class ordering.
+
+    Args:
+        class_to_forest_idx: Precomputed {class_label: forest_index} mapping.
     """
     raw = tree.predict(X=X)
     if len(tree.classes_) == len(forest_classes):
         return raw
     aligned = np.zeros((raw.shape[0], len(forest_classes)), dtype=raw.dtype)
-    class_to_forest_idx = {cls: idx for idx, cls in enumerate(forest_classes)}
     for tree_idx, cls in enumerate(tree.classes_):
         forest_idx = class_to_forest_idx.get(cls)
         if forest_idx is not None:
@@ -2705,14 +2707,15 @@ class UpliftRandomForestClassifier:
 
         '''
         # Make predictions with all trees and take the average
+        class_to_forest_idx = {cls: idx for idx, cls in enumerate(self.classes_)}
 
         if self.n_jobs != 1:
             y_pred_ensemble = sum(
                 Parallel(n_jobs=self.n_jobs, prefer=self.joblib_prefer)
-                (delayed(_align_tree_predict)(tree, X, self.classes_) for tree in self.uplift_forest)
+                (delayed(_align_tree_predict)(tree, X, self.classes_, class_to_forest_idx) for tree in self.uplift_forest)
             ) / len(self.uplift_forest)
         else:
-            y_pred_ensemble = sum([_align_tree_predict(tree, X, self.classes_) for tree in self.uplift_forest]) / len(self.uplift_forest)
+            y_pred_ensemble = sum([_align_tree_predict(tree, X, self.classes_, class_to_forest_idx) for tree in self.uplift_forest]) / len(self.uplift_forest)
 
         # Summarize results into dataframe
         df_res = pd.DataFrame(y_pred_ensemble, columns=self.classes_)
diff --git a/tests/test_uplift_trees.py b/tests/test_uplift_trees.py
@@ -382,13 +382,23 @@ def test_UpliftRandomForestClassifier_predict_shape_with_sparse_groups():
     model = UpliftRandomForestClassifier(
         control_name=CONTROL_NAME,
         n_estimators=10,
+        n_jobs=2,
         min_samples_leaf=1,
         min_samples_treatment=0,
         random_state=RANDOM_SEED,
     )
     model.fit(X, treatment=treatment, y=y)
 
+    # Verify that at least one tree was fit without some treatment groups
+    assert any(
+        len(tree.classes_) < len(model.classes_) for tree in model.uplift_forest
+    ), (
+        "Test setup failed to produce any trees missing treatment groups; "
+        "adjust seed or sampling parameters to exercise sparse-group behavior."
+    )
+
     # Single-threaded
+    model.n_jobs = 1
     preds = model.predict(X)
     assert preds.shape == (
         n,
@@ -397,7 +407,7 @@ def test_UpliftRandomForestClassifier_predict_shape_with_sparse_groups():
     assert not np.any(np.isnan(preds)), "Predictions contain NaN"
 
     # Parallel
-    with parallel_backend("threading", n_jobs=2):
-        preds_par = model.predict(X)
+    model.n_jobs = 2
+    preds_par = model.predict(X)
     assert preds_par.shape == preds.shape
     assert np.allclose(preds, preds_par)