Add tests

gaugup · gaugup · commit da3ad79cb951 · 2022-01-24T23:53:49.000-08:00
Signed-off-by: Gaurav Gupta &lt;gaugup@microsoft.com&gt;
diff --git a/dice_ml/explainer_interfaces/explainer_base.py b/dice_ml/explainer_interfaces/explainer_base.py
@@ -60,7 +60,7 @@ def _validate_counterfactual_configuration(
         if posthoc_sparsity_algorithm not in _PostHocSparsityTypes.ALL:
             raise UserConfigValidationException(
                 'The posthoc_sparsity_algorithm should be {0} and not {1}'.format(
-                    ','.join(_PostHocSparsityTypes.ALL), posthoc_sparsity_algorithm)
+                    ' or '.join(_PostHocSparsityTypes.ALL), posthoc_sparsity_algorithm)
                 )
 
         if stopping_threshold < 0.0 or stopping_threshold > 1.0:
@@ -250,6 +250,16 @@ def local_feature_importance(self, query_instances, cf_examples_list=None,
                   the list of counterfactuals per input, local feature importances per
                   input, and the global feature importance summarized over all inputs.
         """
+        self._validate_counterfactual_configuration(
+            query_instances=query_instances,
+            total_CFs=total_CFs,
+            desired_class=desired_class,
+            desired_range=desired_range,
+            permitted_range=permitted_range, features_to_vary=features_to_vary,
+            stopping_threshold=stopping_threshold, posthoc_sparsity_param=posthoc_sparsity_param,
+            posthoc_sparsity_algorithm=posthoc_sparsity_algorithm,
+            kwargs=kwargs
+        )
         if cf_examples_list is not None:
             if any([len(cf_examples.final_cfs_df) < 10 for cf_examples in cf_examples_list]):
                 raise UserConfigValidationException(
@@ -299,6 +309,16 @@ def global_feature_importance(self, query_instances, cf_examples_list=None,
                   the list of counterfactuals per input, local feature importances per
                   input, and the global feature importance summarized over all inputs.
         """
+        self._validate_counterfactual_configuration(
+            query_instances=query_instances,
+            total_CFs=total_CFs,
+            desired_class=desired_class,
+            desired_range=desired_range,
+            permitted_range=permitted_range, features_to_vary=features_to_vary,
+            stopping_threshold=stopping_threshold, posthoc_sparsity_param=posthoc_sparsity_param,
+            posthoc_sparsity_algorithm=posthoc_sparsity_algorithm,
+            kwargs=kwargs
+        )
         if query_instances is not None and len(query_instances) < 10:
             raise UserConfigValidationException(
                 "The number of query instances should be greater than or equal to 10 "
@@ -355,6 +375,16 @@ def feature_importance(self, query_instances, cf_examples_list=None,
                   the list of counterfactuals per input, local feature importances per
                   input, and the global feature importance summarized over all inputs.
         """
+        self._validate_counterfactual_configuration(
+            query_instances=query_instances,
+            total_CFs=total_CFs,
+            desired_class=desired_class,
+            desired_range=desired_range,
+            permitted_range=permitted_range, features_to_vary=features_to_vary,
+            stopping_threshold=stopping_threshold, posthoc_sparsity_param=posthoc_sparsity_param,
+            posthoc_sparsity_algorithm=posthoc_sparsity_algorithm,
+            kwargs=kwargs
+        )
         if cf_examples_list is None:
             cf_examples_list = self.generate_counterfactuals(
                 query_instances, total_CFs,
diff --git a/tests/test_dice_interface/test_explainer_base.py b/tests/test_dice_interface/test_explainer_base.py
@@ -47,23 +47,6 @@ def test_check_any_counterfactuals_computed(
         cf_examples_arr = [cf_example_has_cf, cf_example_no_cf]
         exp._check_any_counterfactuals_computed(cf_examples_arr=cf_examples_arr)
 
-    @pytest.mark.parametrize("desired_class", [1])
-    def test_zero_totalcfs(
-        self, desired_class, method, sample_custom_query_1,
-        custom_public_data_interface,
-        sklearn_binary_classification_model_interface
-    ):
-        exp = dice_ml.Dice(
-            custom_public_data_interface,
-            sklearn_binary_classification_model_interface,
-            method=method)
-
-        with pytest.raises(UserConfigValidationException):
-            exp.generate_counterfactuals(
-                    query_instances=[sample_custom_query_1],
-                    total_CFs=0,
-                    desired_class=desired_class)
-
     @pytest.mark.parametrize("desired_class", [1])
     def test_local_feature_importance(
             self, desired_class, method,
@@ -128,109 +111,6 @@ def test_global_feature_importance(
 
         self._verify_feature_importance(global_importance.summary_importance)
 
-    @pytest.mark.parametrize("desired_class", [1])
-    def test_global_feature_importance_error_conditions_with_insufficient_query_points(
-            self, desired_class, method,
-            sample_custom_query_1,
-            custom_public_data_interface,
-            sklearn_binary_classification_model_interface):
-        exp = dice_ml.Dice(
-            custom_public_data_interface,
-            sklearn_binary_classification_model_interface,
-            method=method)
-
-        cf_explanations = exp.generate_counterfactuals(
-                    query_instances=sample_custom_query_1,
-                    total_CFs=15,
-                    desired_class=desired_class)
-
-        with pytest.raises(
-            UserConfigValidationException,
-            match="The number of points for which counterfactuals generated should be "
-                  "greater than or equal to 10 "
-                  "to compute global feature importance"):
-            exp.global_feature_importance(
-                query_instances=None,
-                cf_examples_list=cf_explanations.cf_examples_list)
-
-        with pytest.raises(
-            UserConfigValidationException,
-            match="The number of query instances should be greater than or equal to 10 "
-                  "to compute global feature importance over all query points"):
-            exp.global_feature_importance(
-                query_instances=sample_custom_query_1,
-                total_CFs=15,
-                desired_class=desired_class)
-
-    @pytest.mark.parametrize("desired_class", [1])
-    def test_global_feature_importance_error_conditions_with_insufficient_cfs_per_query_point(
-            self, desired_class, method,
-            sample_custom_query_10,
-            custom_public_data_interface,
-            sklearn_binary_classification_model_interface):
-        exp = dice_ml.Dice(
-            custom_public_data_interface,
-            sklearn_binary_classification_model_interface,
-            method=method)
-
-        cf_explanations = exp.generate_counterfactuals(
-                    query_instances=sample_custom_query_10,
-                    total_CFs=1,
-                    desired_class=desired_class)
-
-        with pytest.raises(
-            UserConfigValidationException,
-            match="The number of counterfactuals generated per query instance should be "
-                  "greater than or equal to 10 "
-                  "to compute global feature importance over all query points"):
-            exp.global_feature_importance(
-                query_instances=None,
-                cf_examples_list=cf_explanations.cf_examples_list)
-
-        with pytest.raises(
-            UserConfigValidationException,
-            match="The number of counterfactuals requested per query instance should be greater "
-                  "than or equal to 10 "
-                  "to compute global feature importance over all query points"):
-            exp.global_feature_importance(
-                query_instances=sample_custom_query_10,
-                total_CFs=1,
-                desired_class=desired_class)
-
-    @pytest.mark.parametrize("desired_class", [1])
-    def test_local_feature_importance_error_conditions_with_insufficient_cfs_per_query_point(
-            self, desired_class, method,
-            sample_custom_query_1,
-            custom_public_data_interface,
-            sklearn_binary_classification_model_interface):
-        exp = dice_ml.Dice(
-            custom_public_data_interface,
-            sklearn_binary_classification_model_interface,
-            method=method)
-
-        cf_explanations = exp.generate_counterfactuals(
-                    query_instances=sample_custom_query_1,
-                    total_CFs=1,
-                    desired_class=desired_class)
-
-        with pytest.raises(
-            UserConfigValidationException,
-            match="The number of counterfactuals generated per query instance should be "
-                  "greater than or equal to 10 to compute feature importance for all query points"):
-            exp.local_feature_importance(
-                query_instances=None,
-                cf_examples_list=cf_explanations.cf_examples_list)
-
-        with pytest.raises(
-            UserConfigValidationException,
-            match="The number of counterfactuals requested per "
-                  "query instance should be greater than or equal to 10 "
-                  "to compute feature importance for all query points"):
-            exp.local_feature_importance(
-                query_instances=sample_custom_query_1,
-                total_CFs=1,
-                desired_class=desired_class)
-
     # @pytest.mark.parametrize("desired_class, binary_classification_exp_object_out_of_order",
     #                          [(1, 'random'), (1, 'genetic'), (1, 'kdtree')],
     #                          indirect=['binary_classification_exp_object_out_of_order'])
@@ -545,3 +425,180 @@ class TestExplainerBase:
     def test_instantiating_explainer_base(self, public_data_object):
         with pytest.raises(TypeError):
             ExplainerBase(data_interface=public_data_object)
+
+
+@pytest.mark.parametrize("method", ['random', 'genetic', 'kdtree'])
+class TestExplainerBaseUserConfigValidations:
+
+    @pytest.mark.parametrize('explainer_function',
+                             ['generate_counterfactuals', 'local_feature_importance',
+                              'feature_importance', 'global_feature_importance'])
+    def test_generate_counterfactuals_user_config_validations(
+            self, method, sample_custom_query_2,
+            custom_public_data_interface,
+            sklearn_binary_classification_model_interface,
+            explainer_function):
+        exp = dice_ml.Dice(
+            custom_public_data_interface,
+            sklearn_binary_classification_model_interface,
+            method=method)
+
+        explainer_function = getattr(exp, explainer_function)
+        with pytest.raises(
+                UserConfigValidationException,
+                match=r"The number of counterfactuals generated per query instance \(total_CFs\) "
+                      "should be a positive integer."):
+            explainer_function(query_instances=sample_custom_query_2,
+                               total_CFs=-10, desired_class='opposite')
+
+        with pytest.raises(
+                UserConfigValidationException,
+                match=r"The number of counterfactuals generated per query instance \(total_CFs\) "
+                      "should be a positive integer."):
+            explainer_function(
+                    query_instances=sample_custom_query_2,
+                    total_CFs=0,
+                    desired_class="opposite")
+
+        with pytest.raises(
+                UserConfigValidationException,
+                match=r"The posthoc_sparsity_algorithm should be linear or binary and not random"):
+            explainer_function(query_instances=sample_custom_query_2,
+                               total_CFs=10,
+                               posthoc_sparsity_algorithm='random')
+
+        with pytest.raises(
+                UserConfigValidationException,
+                match=r"The posthoc_sparsity_algorithm should be linear or binary and not random"):
+            explainer_function(query_instances=sample_custom_query_2,
+                               total_CFs=10,
+                               posthoc_sparsity_algorithm='random')
+
+        with pytest.raises(
+                UserConfigValidationException,
+                match=r'The stopping_threshold should lie between 0.0 and 1.0'):
+            explainer_function(query_instances=sample_custom_query_2,
+                               total_CFs=10,
+                               stopping_threshold=-10.0)
+
+        with pytest.raises(
+                UserConfigValidationException,
+                match=r'The posthoc_sparsity_param should lie between 0.0 and 1.0'):
+            explainer_function(query_instances=sample_custom_query_2,
+                               total_CFs=10,
+                               posthoc_sparsity_param=-10.0)
+
+        with pytest.raises(
+                UserConfigValidationException,
+                match=r'The desired_range parameter should not be set for classification task'):
+            explainer_function(query_instances=sample_custom_query_2,
+                               total_CFs=10, desired_range=[0, 10])
+
+    @pytest.mark.parametrize('explainer_function',
+                             ['generate_counterfactuals', 'local_feature_importance',
+                              'feature_importance', 'global_feature_importance'])
+    def test_generate_counterfactuals_user_config_validations_regression(
+            self, regression_exp_object, sample_custom_query_1,
+            method, explainer_function):
+        explainer_function = getattr(regression_exp_object, explainer_function)
+        with pytest.raises(
+                UserConfigValidationException,
+                match=r'The desired_range parameter should be set for regression task'):
+            explainer_function(query_instances=sample_custom_query_1,
+                               total_CFs=10)
+
+    def test_global_feature_importance_error_conditions_with_insufficient_query_points(
+            self, method,
+            sample_custom_query_1,
+            custom_public_data_interface,
+            sklearn_binary_classification_model_interface):
+        exp = dice_ml.Dice(
+            custom_public_data_interface,
+            sklearn_binary_classification_model_interface,
+            method=method)
+
+        cf_explanations = exp.generate_counterfactuals(
+                    query_instances=sample_custom_query_1,
+                    total_CFs=15)
+
+        with pytest.raises(
+            UserConfigValidationException,
+            match="The number of points for which counterfactuals generated should be "
+                  "greater than or equal to 10 "
+                  "to compute global feature importance"):
+            exp.global_feature_importance(
+                query_instances=None,
+                cf_examples_list=cf_explanations.cf_examples_list)
+
+        with pytest.raises(
+            UserConfigValidationException,
+            match="The number of query instances should be greater than or equal to 10 "
+                  "to compute global feature importance over all query points"):
+            exp.global_feature_importance(
+                query_instances=sample_custom_query_1,
+                total_CFs=15)
+
+    def test_global_feature_importance_error_conditions_with_insufficient_cfs_per_query_point(
+            self, method,
+            sample_custom_query_10,
+            custom_public_data_interface,
+            sklearn_binary_classification_model_interface):
+        exp = dice_ml.Dice(
+            custom_public_data_interface,
+            sklearn_binary_classification_model_interface,
+            method=method)
+
+        cf_explanations = exp.generate_counterfactuals(
+                    query_instances=sample_custom_query_10,
+                    total_CFs=1)
+
+        with pytest.raises(
+            UserConfigValidationException,
+            match="The number of counterfactuals generated per query instance should be "
+                  "greater than or equal to 10 "
+                  "to compute global feature importance over all query points"):
+            exp.global_feature_importance(
+                query_instances=None,
+                cf_examples_list=cf_explanations.cf_examples_list)
+
+        with pytest.raises(
+            UserConfigValidationException,
+            match="The number of counterfactuals requested per query instance should be greater "
+                  "than or equal to 10 "
+                  "to compute global feature importance over all query points"):
+            exp.global_feature_importance(
+                query_instances=sample_custom_query_10,
+                total_CFs=1)
+
+    def test_local_feature_importance_error_conditions_with_insufficient_cfs_per_query_point(
+            self, method,
+            sample_custom_query_1,
+            custom_public_data_interface,
+            sklearn_binary_classification_model_interface):
+        exp = dice_ml.Dice(
+            custom_public_data_interface,
+            sklearn_binary_classification_model_interface,
+            method=method)
+
+        cf_explanations = exp.generate_counterfactuals(
+                    query_instances=sample_custom_query_1,
+                    total_CFs=1)
+
+        with pytest.raises(
+            UserConfigValidationException,
+            match="The number of counterfactuals generated per query instance should be "
+                  "greater than or equal to 10 to compute feature importance for all query points"):
+            exp.local_feature_importance(
+                query_instances=None,
+                cf_examples_list=cf_explanations.cf_examples_list)
+
+        with pytest.raises(
+            UserConfigValidationException,
+            match="The number of counterfactuals requested per "
+                  "query instance should be greater than or equal to 10 "
+                  "to compute feature importance for all query points"):
+            exp.local_feature_importance(
+                query_instances=sample_custom_query_1,
+                total_CFs=1)
+
+# class TestExplainerBaseDataValidations: