interpretml
diff --git a/‎.github/workflows/pythonpackage.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/pythonpackage.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dice_ml/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎dice_ml/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎dice_ml/counterfactual_explanations.py‎
Lines changed: 9 additions & 5 deletions b/‎dice_ml/counterfactual_explanations.py‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎dice_ml/data.py‎
Lines changed: 15 additions & 9 deletions b/‎dice_ml/data.py‎
Lines changed: 15 additions & 9 deletions
diff --git a/‎dice_ml/data_interfaces/private_data_interface.py‎
Lines changed: 40 additions & 23 deletions b/‎dice_ml/data_interfaces/private_data_interface.py‎
Lines changed: 40 additions & 23 deletions
@@ -35,7 +35,7 @@ jobs:
         # stop the build if there are Python syntax errors or undefined names
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+        flake8 . --count --max-complexity=30 --max-line-length=127 --statistics
     - name: Test with pytest
       run: |
         pytest
 
@@ -1,3 +1,7 @@
 from .data import Data
 from .model import Model
 from .dice import Dice
+
+__all__ = ["Data",
+           "Model",
+           "Dice"]
@@ -216,17 +216,21 @@ def to_json(self):
             raise UserConfigValidationException(
                 "Unsupported serialization version {}".format(serialization_version))
 
+    @staticmethod
+    def _validate_serialization_version(version):
+        if version is None:
+            raise UserConfigValidationException("No version field in the json input")
+        elif not _check_supported_json_output_versions(version):
+            raise UserConfigValidationException("Incompatible version {} found in json input".format(version))
+
     @staticmethod
     def from_json(json_str):
         """ Deserialize json string to a CounterfactualExplanations object.
         """
         json_dict = json.loads(json_str)
         if _CommonSchemaConstants.METADATA in json_dict:
             version = json_dict[_CommonSchemaConstants.METADATA].get('version')
-            if version is None:
-                raise UserConfigValidationException("No version field in the json input")
-            elif not _check_supported_json_output_versions(version):
-                raise UserConfigValidationException("Incompatible version {} found in json input".format(version))
+            CounterfactualExplanations._validate_serialization_version(version)
 
             if version == _SchemaVersions.V1:
                 CounterfactualExplanations._check_cf_exp_output_against_json_schema(
@@ -240,7 +244,7 @@ def from_json(json_str):
                         local_importance=json_dict[_CounterfactualExpV1SchemaConstants.LOCAL_IMPORTANCE],
                         summary_importance=json_dict[_CounterfactualExpV1SchemaConstants.SUMMARY_IMPORTANCE],
                         version=version)
-            elif version == _SchemaVersions.V2:
+            else:
                 CounterfactualExplanations._check_cf_exp_output_against_json_schema(
                     json_dict, version=version)
                 cf_examples_list = []
 
@@ -1,6 +1,9 @@
 """Module pointing to different implementations of Data class
 
-DiCE requires only few parameters about the data such as the range of continuous features and the levels of categorical features. Hence, DiCE can be used for a private data whose meta data are only available (such as the feature names and range/levels of different features) by specifying appropriate parameters.
+DiCE requires only few parameters about the data such as the range of continuous
+features and the levels of categorical features. Hence, DiCE can be used for a
+private data whose meta data are only available (such as the feature names and
+range/levels of different features) by specifying appropriate parameters.
 """
 
 
@@ -12,23 +15,26 @@ def __init__(self, **params):
 
         :param **params: a dictionary of required parameters.
         """
-
         self.decide_implementation_type(params)
 
     def decide_implementation_type(self, params):
         """Decides if the Data class is for public or private data."""
-
-        self.__class__  = decide(params)
+        self.__class__ = decide(params)
         self.__init__(params)
 
-# To add new implementations of Data, add the class in data_interfaces subpackage and import-and-return the class in an elif loop as shown in the below method.
 
 def decide(params):
-    """Decides if the Data class is for public or private data."""
-
-    if 'dataframe' in params: # if params contain a Pandas dataframe, then use PublicData class
+    """Decides if the Data class is for public or private data.
+
+    To add new implementations of Data, add the class in data_interfaces
+    subpackage and import-and-return the class in an elif loop as shown
+    in the below method.
+    """
+    if 'dataframe' in params:
+        # if params contain a Pandas dataframe, then use PublicData class
         from dice_ml.data_interfaces.public_data_interface import PublicData
         return PublicData
-    else: # use PrivateData if only meta data is provided
+    else:
+        # use PrivateData if only meta data is provided
         from dice_ml.data_interfaces.private_data_interface import PrivateData
         return PrivateData
@@ -3,34 +3,44 @@
 import sys
 import pandas as pd
 import numpy as np
-from sklearn.model_selection import train_test_split
 import collections
-from collections import OrderedDict
 import logging
+
+
 logging.basicConfig(level=logging.NOTSET)
-from sklearn.preprocessing import LabelEncoder
+
 
 class PrivateData:
     """A data interface for private data with meta information."""
 
     def __init__(self, params):
         """Init method
 
-        :param features: Dictionary or OrderedDict with feature names as keys and range in int/float (for continuous features) or categories in string (for categorical features) as values. For python version <=3.6, should provide only an OrderedDict.
+        :param features: Dictionary or OrderedDict with feature names as keys and range in int/float
+                         (for continuous features) or categories in string (for categorical features)
+                         as values. For python version <=3.6, should provide only an OrderedDict.
         :param outcome_name: Outcome feature name.
-        :param type_and_precision (optional): Dictionary with continuous feature names as keys. If the feature is of type int, just string 'int' should be provided, if the feature is of type float, a list of type and precision should be provided. For instance, type_and_precision: {cont_f1: 'int', cont_f2: ['float', 2]} for continuous features cont_f1 and cont_f2 of type int and float (and precision up to 2 decimal places) respectively. Default value is None and all features are treated as int.
-        :param mad (optional): Dictionary with feature names as keys and corresponding Median Absolute Deviations (MAD) as values. Default MAD value is 1 for all features.
+        :param type_and_precision (optional): Dictionary with continuous feature names as keys.
+                                              If the feature is of type int, just string 'int' should be provided,
+                                              if the feature is of type float, a list of type and precision should be
+                                              provided. For instance, type_and_precision: {cont_f1: 'int',
+                                              cont_f2: ['float', 2]} for continuous features cont_f1 and cont_f2 of
+                                              type int and float (and precision up to 2 decimal places) respectively.
+                                              Default value is None and all features are treated as int.
+        :param mad (optional): Dictionary with feature names as keys and corresponding Median Absolute Deviations (MAD)
+                               as values.
+                               Default MAD value is 1 for all features.
         :param data_name (optional): Dataset name
-
         """
-
-        if sys.version_info > (3,6,0) and type(params['features']) in [dict, collections.OrderedDict]:
+        if sys.version_info > (3, 6, 0) and type(params['features']) in [dict, collections.OrderedDict]:
             features_dict = params['features']
-        elif sys.version_info <= (3,6,0) and type(params['features']) is collections.OrderedDict:
+        elif sys.version_info <= (3, 6, 0) and type(params['features']) is collections.OrderedDict:
             features_dict = params['features']
         else:
             raise ValueError(
-                "should provide dictionary with feature names as keys and range (for continuous features) or categories (for categorical features) as values. For python version <3.6, should provide an OrderedDict")
+                "should provide dictionary with feature names as keys and range"
+                "(for continuous features) or categories (for categorical features) as values. "
+                "For python version <3.6, should provide an OrderedDict")
 
         if type(params['outcome_name']) is str:
             self.outcome_name = params['outcome_name']
@@ -80,7 +90,8 @@ def __init__(self, params):
                 #
                 # for column in self.categorical_feature_names:
                 #     self.labelencoder[column] = LabelEncoder()
-                #     self.label_encoded_data[column] = self.labelencoder[column].fit_transform(self.categorical_levels[column])
+                #     self.label_encoded_data[column] = \
+                #           self.labelencoder[column].fit_transform(self.categorical_levels[column])
 
                 # self.max_range = -np.inf
                 # for feature in self.continuous_feature_names:
@@ -178,8 +189,10 @@ def create_ohe_params(self):
             # one-hot-encoded data is same as original data if there is no categorical features.
             self.ohe_encoded_feature_names = [feat for feat in self.feature_names]
 
-        self.ohe_base_df = self.prepare_df_for_ohe_encoding() # base dataframe for doing one-hot-encoding
-        # ohe_encoded_feature_names and ohe_base_df are created (and stored as data class's parameters) when get_data_params_for_gradient_dice() is called from gradient-based DiCE explainers
+        # base dataframe for doing one-hot-encoding
+        # ohe_encoded_feature_names and ohe_base_df are created (and stored as data class's parameters)
+        # when get_data_params_for_gradient_dice() is called from gradient-based DiCE explainers
+        self.ohe_base_df = self.prepare_df_for_ohe_encoding()
 
     def get_data_params_for_gradient_dice(self):
         """Gets all data related params for DiCE."""
@@ -200,8 +213,8 @@ def get_data_params_for_gradient_dice(self):
         # decimal precisions for continuous features
         cont_precisions = [self.get_decimal_precisions()[ix] for ix in range(len(self.continuous_feature_names))]
 
-        return minx, maxx, encoded_categorical_feature_indexes, encoded_continuous_feature_indexes, cont_minx, cont_maxx, cont_precisions
-
+        return minx, maxx, encoded_categorical_feature_indexes, encoded_continuous_feature_indexes, \
+            cont_minx, cont_maxx, cont_precisions
 
     def get_encoded_categorical_feature_indexes(self):
         """Gets the column indexes categorical features after one-hot-encoding."""
@@ -243,10 +256,10 @@ def from_label(self, data):
     def from_dummies(self, data, prefix_sep='_'):
         """Gets the original data from dummy encoded data with k levels."""
         out = data.copy()
-        for l in self.categorical_feature_names:
+        for feature_name in self.categorical_feature_names:
             cols, labs = [[c.replace(
-                x, "") for c in data.columns if l+prefix_sep in c] for x in ["", l+prefix_sep]]
-            out[l] = pd.Categorical(
+                x, "") for c in data.columns if feature_name+prefix_sep in c] for x in ["", feature_name+prefix_sep]]
+            out[feature_name] = pd.Categorical(
                 np.array(labs)[np.argmax(data[cols].values, axis=1)])
             out.drop(cols, axis=1, inplace=True)
         return out
@@ -330,19 +343,23 @@ def prepare_query_instance(self, query_instance):
         return test
 
     def get_ohe_min_max_normalized_data(self, query_instance):
-        """Transforms query_instance into one-hot-encoded and min-max normalized data. query_instance should be a dict, a dataframe, a list, or a list of dicts"""
+        """Transforms query_instance into one-hot-encoded and min-max normalized data. query_instance should be a dict,
+           a dataframe, a list, or a list of dicts"""
         query_instance = self.prepare_query_instance(query_instance)
         temp = self.ohe_base_df.append(query_instance, ignore_index=True, sort=False)
         temp = self.one_hot_encode_data(temp)
         temp = temp.tail(query_instance.shape[0]).reset_index(drop=True)
-        return self.normalize_data(temp) # returns a pandas dataframe
+        # returns a pandas dataframe
+        return self.normalize_data(temp)
 
     def get_inverse_ohe_min_max_normalized_data(self, transformed_data):
-        """Transforms one-hot-encoded and min-max normalized data into raw user-fed data format. transformed_data should be a dataframe or an array"""
+        """Transforms one-hot-encoded and min-max normalized data into raw user-fed data format. transformed_data
+           should be a dataframe or an array"""
         raw_data = self.get_decoded_data(transformed_data, encoding='one-hot')
         raw_data = self.de_normalize_data(raw_data)
         precisions = self.get_decimal_precisions()
         for ix, feature in enumerate(self.continuous_feature_names):
             raw_data[feature] = raw_data[feature].astype(float).round(precisions[ix])
         raw_data = raw_data[self.feature_names]
-        return raw_data # returns a pandas dataframe
+        # returns a pandas dataframe
+        return raw_data