Skip to content

Commit 2a468f9

Browse files
authored
Merge pull request #670 from openml/pyproject
Enable pip install from clean
2 parents 5b56127 + 415ee9f commit 2a468f9

9 files changed

Lines changed: 85 additions & 65 deletions

File tree

appveyor.yml

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,8 @@ install:
3434

3535
# Install the build and runtime dependencies of the project.
3636
- "cd C:\\projects\\openml-python"
37-
- conda install --quiet --yes scikit-learn=0.20.0 nb_conda nb_conda_kernels numpy scipy requests nbformat python-dateutil nbconvert pandas matplotlib seaborn
38-
- pip install liac-arff xmltodict oslo.concurrency
39-
# Packages for (parallel) unit tests with pytest
40-
- pip install pytest pytest-xdist pytest-timeout
41-
- "pip install .[test]"
37+
- "pip install .[examples,test]"
38+
- conda install --quiet --yes scikit-learn=0.20.0
4239

4340

4441
# Not a .NET project, we build scikit-learn in the install step instead

ci_scripts/install.sh

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,17 @@ popd
2626
# provided versions
2727
conda create -n testenv --yes python=$PYTHON_VERSION pip
2828
source activate testenv
29-
pip install pytest pytest-xdist pytest-timeout numpy scipy cython scikit-learn==$SKLEARN_VERSION \
30-
oslo.concurrency
29+
30+
python --version
31+
pip install -e '.[test]'
32+
python -c "import numpy; print('numpy %s' % numpy.__version__)"
33+
python -c "import scipy; print('scipy %s' % scipy.__version__)"
3134

3235
if [[ "$EXAMPLES" == "true" ]]; then
33-
pip install matplotlib jupyter notebook nbconvert nbformat jupyter_client \
34-
ipython ipykernel pandas seaborn
36+
pip install -e '.[examples]'
3537
fi
3638
if [[ "$DOCTEST" == "true" ]]; then
37-
pip install pandas sphinx_bootstrap_theme
39+
pip install sphinx_bootstrap_theme
3840
fi
3941
if [[ "$COVERAGE" == "true" ]]; then
4042
pip install codecov pytest-cov
@@ -43,7 +45,6 @@ if [[ "$RUN_FLAKE8" == "true" ]]; then
4345
pip install flake8 mypy
4446
fi
4547

46-
python --version
47-
python -c "import numpy; print('numpy %s' % numpy.__version__)"
48-
python -c "import scipy; print('scipy %s' % scipy.__version__)"
49-
pip install -e '.[test]'
48+
# Install scikit-learn last to make sure the openml package installation works
49+
# from a clean environment without scikit-learn.
50+
pip install scikit-learn==$SKLEARN_VERSION

openml/datasets/dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ def get_data(self, target: Optional[Union[List[str], str]] = None,
564564
else:
565565
return rval
566566

567-
def retrieve_class_labels(self, target_name='class'):
567+
def retrieve_class_labels(self, target_name: str = 'class') -> Union[None, List[str]]:
568568
"""Reads the datasets arff to determine the class-labels.
569569
570570
If the task has no class labels (for example a regression problem)

openml/flows/flow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ def publish(self, raise_error_if_exists: bool = False) -> 'OpenMLFlow':
388388
(flow_id, message))
389389
return self
390390

391-
def get_structure(self, key_item):
391+
def get_structure(self, key_item: str) -> Dict[str, List[str]]:
392392
"""
393393
Returns for each sub-component of the flow the path of identifiers
394394
that should be traversed to reach this component. The resulting dict

openml/runs/run.py

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,13 @@
1212
import openml._api_calls
1313
from ..exceptions import PyOpenMLError
1414
from ..flows import get_flow
15-
from ..tasks import get_task, TaskTypeEnum
15+
from ..tasks import (get_task,
16+
TaskTypeEnum,
17+
OpenMLClassificationTask,
18+
OpenMLLearningCurveTask,
19+
OpenMLClusteringTask,
20+
OpenMLRegressionTask
21+
)
1622
from ..utils import _tag_entity
1723

1824

@@ -69,7 +75,7 @@ def _repr_pretty_(self, pp, cycle):
6975
pp.text(str(self))
7076

7177
@classmethod
72-
def from_filesystem(cls, directory, expect_model=True):
78+
def from_filesystem(cls, directory: str, expect_model: bool = True) -> 'OpenMLRun':
7379
"""
7480
The inverse of the to_filesystem method. Instantiates an OpenMLRun
7581
object based on files stored on the file system.
@@ -109,24 +115,24 @@ def from_filesystem(cls, directory, expect_model=True):
109115
if not os.path.isfile(model_path) and expect_model:
110116
raise ValueError('Could not find model.pkl')
111117

112-
with open(description_path, 'r') as fp:
113-
xml_string = fp.read()
118+
with open(description_path, 'r') as fht:
119+
xml_string = fht.read()
114120
run = openml.runs.functions._create_run_from_xml(xml_string, from_server=False)
115121

116122
if run.flow_id is None:
117123
flow = openml.flows.OpenMLFlow.from_filesystem(directory)
118124
run.flow = flow
119125
run.flow_name = flow.name
120126

121-
with open(predictions_path, 'r') as fp:
122-
predictions = arff.load(fp)
127+
with open(predictions_path, 'r') as fht:
128+
predictions = arff.load(fht)
123129
run.data_content = predictions['data']
124130

125131
if os.path.isfile(model_path):
126132
# note that it will load the model if the file exists, even if
127133
# expect_model is False
128-
with open(model_path, 'rb') as fp:
129-
run.model = pickle.load(fp)
134+
with open(model_path, 'rb') as fhb:
135+
run.model = pickle.load(fhb)
130136

131137
if os.path.isfile(trace_path):
132138
run.trace = openml.runs.OpenMLRunTrace._from_filesystem(trace_path)
@@ -209,7 +215,18 @@ def _generate_arff_dict(self) -> 'OrderedDict[str, Any]':
209215
arff_dict['relation'] =\
210216
'openml_task_{}_predictions'.format(task.task_id)
211217

212-
if task.task_type_id == TaskTypeEnum.SUPERVISED_CLASSIFICATION:
218+
if isinstance(task, OpenMLLearningCurveTask):
219+
class_labels = task.class_labels # type: ignore
220+
arff_dict['attributes'] = [('repeat', 'NUMERIC'),
221+
('fold', 'NUMERIC'),
222+
('sample', 'NUMERIC'),
223+
('row_id', 'NUMERIC')] + \
224+
[('confidence.' + class_labels[i],
225+
'NUMERIC') for i in
226+
range(len(class_labels))] + \
227+
[('prediction', class_labels),
228+
('correct', class_labels)]
229+
elif isinstance(task, OpenMLClassificationTask):
213230
class_labels = task.class_labels
214231
instance_specifications = [('repeat', 'NUMERIC'),
215232
('fold', 'NUMERIC'),
@@ -223,27 +240,14 @@ def _generate_arff_dict(self) -> 'OrderedDict[str, Any]':
223240
arff_dict['attributes'] = (instance_specifications
224241
+ prediction_confidences
225242
+ prediction_and_true)
226-
227-
elif task.task_type_id == TaskTypeEnum.LEARNING_CURVE:
228-
class_labels = task.class_labels
229-
arff_dict['attributes'] = [('repeat', 'NUMERIC'),
230-
('fold', 'NUMERIC'),
231-
('sample', 'NUMERIC'),
232-
('row_id', 'NUMERIC')] + \
233-
[('confidence.' + class_labels[i],
234-
'NUMERIC') for i in
235-
range(len(class_labels))] + \
236-
[('prediction', class_labels),
237-
('correct', class_labels)]
238-
239-
elif task.task_type_id == TaskTypeEnum.SUPERVISED_REGRESSION:
243+
elif isinstance(task, OpenMLRegressionTask):
240244
arff_dict['attributes'] = [('repeat', 'NUMERIC'),
241245
('fold', 'NUMERIC'),
242246
('row_id', 'NUMERIC'),
243247
('prediction', 'NUMERIC'),
244248
('truth', 'NUMERIC')]
245249

246-
elif task.task_type == TaskTypeEnum.CLUSTERING:
250+
elif isinstance(task, OpenMLClusteringTask):
247251
arff_dict['attributes'] = [('repeat', 'NUMERIC'),
248252
('fold', 'NUMERIC'),
249253
('row_id', 'NUMERIC'),
@@ -461,7 +465,7 @@ def _create_description_xml(self):
461465
description_xml = xmltodict.unparse(description, pretty=True)
462466
return description_xml
463467

464-
def push_tag(self, tag):
468+
def push_tag(self, tag: str) -> None:
465469
"""Annotates this run with a tag on the server.
466470
467471
Parameters
@@ -471,7 +475,7 @@ def push_tag(self, tag):
471475
"""
472476
_tag_entity('run', self.run_id, tag)
473477

474-
def remove_tag(self, tag):
478+
def remove_tag(self, tag: str) -> None:
475479
"""Removes a tag from this run on the server.
476480
477481
Parameters

openml/runs/trace.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def __init__(self, run_id, trace_iterations):
3232
self.run_id = run_id
3333
self.trace_iterations = trace_iterations
3434

35-
def get_selected_iteration(self, fold, repeat):
35+
def get_selected_iteration(self, fold: int, repeat: int) -> int:
3636
"""
3737
Returns the trace iteration that was marked as selected. In
3838
case multiple are marked as selected (should not happen) the
@@ -46,7 +46,7 @@ def get_selected_iteration(self, fold, repeat):
4646
4747
Returns
4848
----------
49-
OpenMLTraceIteration
49+
int
5050
The trace iteration from the given fold and repeat that was
5151
selected as the best iteration by the search procedure
5252
"""
@@ -104,7 +104,7 @@ def generate(cls, attributes, content):
104104
)
105105

106106
@classmethod
107-
def _from_filesystem(cls, file_path):
107+
def _from_filesystem(cls, file_path: str) -> 'OpenMLRunTrace':
108108
"""
109109
Logic to deserialize the trace from the filesystem.
110110

openml/tasks/functions.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
OpenMLLearningCurveTask,
2020
TaskTypeEnum,
2121
OpenMLRegressionTask,
22-
OpenMLSupervisedTask
22+
OpenMLSupervisedTask,
23+
OpenMLTask
2324
)
2425
import openml.utils
2526
import openml._api_calls
@@ -54,7 +55,7 @@ def _get_cached_tasks():
5455
return tasks
5556

5657

57-
def _get_cached_task(tid):
58+
def _get_cached_task(tid: int) -> OpenMLTask:
5859
"""Return a cached task based on the given id.
5960
6061
Parameters
@@ -299,7 +300,7 @@ def get_tasks(task_ids, download_data=True):
299300
return tasks
300301

301302

302-
def get_task(task_id, download_data=True):
303+
def get_task(task_id: int, download_data: bool = True) -> OpenMLTask:
303304
"""Download OpenML task for a given task ID.
304305
305306
Downloads the task representation, while the data splits can be

openml/tasks/split.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def __eq__(self, other):
5858
return True
5959

6060
@classmethod
61-
def _from_arff_file(cls, filename):
61+
def _from_arff_file(cls, filename: str) -> 'OpenMLSplit':
6262

6363
repetitions = None
6464

setup.py

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,18 @@
66
with open("openml/__version__.py") as fh:
77
version = fh.readlines()[-1].split()[-1].strip("\"'")
88

9-
dependency_links = []
10-
11-
try:
12-
import numpy # noqa: F401
13-
except ImportError:
14-
print('numpy is required during installation')
15-
sys.exit(1)
16-
17-
try:
18-
import scipy # noqa: F401
19-
except ImportError:
20-
print('scipy is required during installation')
9+
# Using Python setup.py install will try to build numpy which is prone to failure and
10+
# very time consuming anyway.
11+
if len(sys.argv) > 1 and sys.argv[1] == 'install':
12+
print('Please install this package with pip: `pip install -e .` '
13+
'Installation requires pip>=10.0.')
2114
sys.exit(1)
2215

16+
if sys.version_info < (3, 5):
17+
raise ValueError(
18+
'Unsupported Python version {}.{}.{} found. OpenML requires Python 3.5 or higher.'
19+
.format(sys.version_info.major, sys.version_info.minor, sys.version_info.micro)
20+
)
2321

2422
setuptools.setup(name="openml",
2523
author="Matthias Feurer, Andreas Müller, Farzan Majdani, "
@@ -30,12 +28,14 @@
3028
description="Python API for OpenML",
3129
license="BSD 3-clause",
3230
url="http://openml.org/",
31+
project_urls={
32+
"Documentation": "https://openml.github.io/openml-python/master/",
33+
"Source Code": "https://github.com/openml/openml-python"
34+
},
3335
version=version,
3436
packages=setuptools.find_packages(),
3537
package_data={'': ['*.txt', '*.md']},
3638
install_requires=[
37-
'numpy>=1.6.2',
38-
'scipy>=0.13.3',
3939
'liac-arff>=2.2.2',
4040
'xmltodict',
4141
'pytest',
@@ -45,12 +45,29 @@
4545
'python-dateutil',
4646
'oslo.concurrency',
4747
'pandas>=0.19.2',
48+
'scipy>=0.13.3',
49+
'numpy>=1.6.2'
4850
],
4951
extras_require={
5052
'test': [
5153
'nbconvert',
5254
'jupyter_client',
53-
'matplotlib'
55+
'matplotlib',
56+
'pytest',
57+
'pytest-xdist',
58+
'pytest-timeout',
59+
60+
],
61+
'examples': [
62+
'matplotlib',
63+
'jupyter',
64+
'notebook',
65+
'nbconvert',
66+
'nbformat',
67+
'jupyter_client',
68+
'ipython',
69+
'ipykernel',
70+
'seaborn'
5471
]
5572
},
5673
test_suite="pytest",
@@ -66,5 +83,5 @@
6683
'Programming Language :: Python :: 3',
6784
'Programming Language :: Python :: 3.4',
6885
'Programming Language :: Python :: 3.5',
69-
'Programming Language :: Python :: 3.6'
86+
'Programming Language :: Python :: 3.6',
7087
'Programming Language :: Python :: 3.7'])

0 commit comments

Comments
 (0)