Skip to content

Commit 8abfb23

Browse files
committed
pep8 and better docstrings
1 parent deda557 commit 8abfb23

3 files changed

Lines changed: 33 additions & 44 deletions

File tree

openml/extensions/extension_interface.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
import numpy as np
66
import scipy.sparse
7-
import pandas as pd
87

98
# Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles
109
if TYPE_CHECKING:
@@ -151,49 +150,50 @@ def _run_model_on_fold(
151150
self,
152151
model: Any,
153152
task: 'OpenMLTask',
154-
X_train: Union[np.ndarray, scipy.sparse.spmatrix, pd.DataFrame],
155-
y_train: np.ndarray,
153+
X_train: Union[np.ndarray, scipy.sparse.spmatrix],
156154
rep_no: int,
157155
fold_no: int,
158-
X_test: Optional[Union[np.ndarray, scipy.sparse.spmatrix, pd.DataFrame]] = None,
159-
n_classes: Optional[int] = None,
160-
) -> Tuple[List[List], List[List], 'OrderedDict[str, float]', Optional['OpenMLRunTrace']]:
156+
y_train: Optional[np.ndarray] = None,
157+
X_test: Optional[Union[np.ndarray, scipy.sparse.spmatrix]] = None,
158+
classes: Optional[List] = None,
159+
) -> Tuple[np.ndarray, np.ndarray, 'OrderedDict[str, float]', Any]:
161160
"""Run a model on a repeat,fold,subsample triplet of the task and return prediction information.
162161
163162
Returns the data that is necessary to construct the OpenML Run object. Is used by
164-
run_task_get_arff_content.
163+
:func:`openml.runs.run_flow_on_task`.
165164
166165
Parameters
167166
----------
168167
model : Any
169168
The UNTRAINED model to run. The model instance will be copied and not altered.
170169
task : OpenMLTask
171170
The task to run the model on.
171+
X_train : array-like
172+
Training data for the given repetition and fold.
172173
rep_no : int
173174
The repeat of the experiment (0-based; in case of 1 time CV, always 0)
174175
fold_no : int
175176
The fold nr of the experiment (0-based; in case of holdout, always 0)
176-
sample_no : int
177-
In case of learning curves, the index of the subsample (0-based; in case of no
178-
learning curve, always 0)
179-
add_local_measures : bool
180-
Determines whether to calculate a set of measures (i.e., predictive accuracy) locally,
181-
to later verify server behaviour.
177+
y_train : Optional[np.ndarray] (default=None)
178+
Target attributes for supervised tasks. In case of classification, these are integer
179+
indices to the potential classes specified by dataset.
180+
X_test : Optional, array-like (default=None)
181+
Test attributes to test for generalization in supervised tasks.
182+
classes : List
183+
List of classes for supervised classification tasks (and supervised data stream
184+
classification).
182185
183186
Returns
184187
-------
185-
arff_datacontent : List[List]
186-
Arff representation (list of lists) of the predictions that were
187-
generated by this fold (required to populate predictions.arff)
188-
arff_tracecontent : List[List]
189-
Arff representation (list of lists) of the trace data that was generated by this fold
190-
(will be used to populate trace.arff, leave it empty if the model did not perform any
191-
hyperparameter optimization).
188+
predictions : np.ndarray
189+
Model predictions.
190+
probabilities : Optional, np.ndarray
191+
Predicted probabilities (only applicable for supervised classification tasks).
192192
user_defined_measures : OrderedDict[str, float]
193193
User defined measures that were generated on this fold
194-
model : Any
195-
The model trained on this repeat,fold,subsample triple. Will be used to generate trace
196-
information later on (in ``obtain_arff_trace``).
194+
trace : Optional, OpenMLRunTrace
195+
Hyperparameter optimization trace (only applicable for supervised tasks with
196+
hyperparameter optimization).
197197
"""
198198

199199
@abstractmethod

openml/extensions/sklearn/extension.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def flow_to_model(self, flow: 'OpenMLFlow', initialize_with_defaults: bool = Fal
9595
9696
Parameters
9797
----------
98-
o : mixed
98+
flow : mixed
9999
the object to deserialize (can be flow object, or any serialized
100100
parameter value that is accepted by)
101101
@@ -470,7 +470,7 @@ def _check_multiple_occurence_of_component_in_flow(
470470
) -> None:
471471
to_visit_stack = [] # type: List[OpenMLFlow]
472472
to_visit_stack.extend(sub_components.values())
473-
known_sub_components = set() # type: Set[OpenMLFlow]
473+
known_sub_components = set() # type: Set[str]
474474
while len(to_visit_stack) > 0:
475475
visitee = to_visit_stack.pop()
476476
if visitee.name in known_sub_components:
@@ -1103,7 +1103,7 @@ def _run_model_on_fold(
11031103
fold_no: int,
11041104
y_train: Optional[np.ndarray] = None,
11051105
X_test: Optional[Union[np.ndarray, scipy.sparse.spmatrix, pd.DataFrame]] = None,
1106-
classes: Optional[int] = None,
1106+
classes: Optional[List] = None,
11071107
) -> Tuple[np.ndarray, np.ndarray, 'OrderedDict[str, float]', Any]:
11081108
"""Run a model on a repeat,fold,subsample triplet of the task and return prediction
11091109
information.
@@ -1123,17 +1123,12 @@ def _run_model_on_fold(
11231123
The UNTRAINED model to run. The model instance will be copied and not altered.
11241124
task : OpenMLTask
11251125
The task to run the model on.
1126+
X_train : array-like
1127+
Training data for the given repetition and fold.
11261128
rep_no : int
11271129
The repeat of the experiment (0-based; in case of 1 time CV, always 0)
11281130
fold_no : int
11291131
The fold nr of the experiment (0-based; in case of holdout, always 0)
1130-
sample_no : int
1131-
In case of learning curves, the index of the subsample (0-based; in case of no
1132-
learning curve, always 0)
1133-
add_local_measures : bool
1134-
Determines whether to calculate a set of measures (i.e., predictive accuracy)
1135-
locally,
1136-
to later verify server behaviour.
11371132
11381133
Returns
11391134
-------
@@ -1154,10 +1149,7 @@ def _run_model_on_fold(
11541149
information later on (in ``obtain_arff_trace``).
11551150
"""
11561151

1157-
def _prediction_to_probabilities(
1158-
y: np.ndarray,
1159-
classes: List,
1160-
) -> np.ndarray:
1152+
def _prediction_to_probabilities(y: np.ndarray, classes: List[Any]) -> np.ndarray:
11611153
"""Transforms predicted probabilities to match with OpenML class indices.
11621154
11631155
Parameters
@@ -1259,6 +1251,9 @@ def _prediction_to_probabilities(
12591251

12601252
if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
12611253

1254+
if classes is None:
1255+
raise TypeError("Argument classes must not be of type 'None'")
1256+
12621257
try:
12631258
proba_y = model_copy.predict_proba(X_test)
12641259
except AttributeError:

tests/test_runs/test_run_functions.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
from sklearn.naive_bayes import GaussianNB
2828
from sklearn.model_selection._search import BaseSearchCV
29-
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
29+
from sklearn.tree import DecisionTreeClassifier
3030
from sklearn.preprocessing.imputation import Imputer
3131
from sklearn.dummy import DummyClassifier
3232
from sklearn.preprocessing import StandardScaler
@@ -38,7 +38,6 @@
3838
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, \
3939
StratifiedKFold
4040
from sklearn.pipeline import Pipeline
41-
from sklearn.cluster import KMeans
4241

4342

4443
class TestRun(TestBase):
@@ -484,11 +483,6 @@ def test_run_and_upload_logistic_regression(self):
484483
self._run_and_upload_classification(lr, task_id, n_missing_vals,
485484
n_test_obs, '62501')
486485

487-
def test_run_and_upload_kmeans(self):
488-
kmeans = KMeans()
489-
task_id = 126034
490-
491-
492486
def test_run_and_upload_linear_regression(self):
493487
lr = LinearRegression()
494488
task_id = self.TEST_SERVER_TASK_REGRESSION[0]

0 commit comments

Comments
 (0)