Skip to content

Commit 397f94d

Browse files
committed
Make oslo a test-only dependency.
1 parent 80dff77 commit 397f94d

5 files changed

Lines changed: 88 additions & 71 deletions

File tree

openml/datasets/functions.py

Lines changed: 28 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import io
22
import os
33
import re
4-
import warnings
54
from typing import List, Dict, Union
65

76
import numpy as np
@@ -10,11 +9,6 @@
109

1110
import xmltodict
1211
from scipy.sparse import coo_matrix
13-
# Currently, importing oslo raises a lot of warning that it will stop working
14-
# under python3.8; remove this once they disappear
15-
with warnings.catch_warnings():
16-
warnings.simplefilter("ignore")
17-
from oslo_concurrency import lockutils
1812
from collections import OrderedDict
1913

2014
import openml.utils
@@ -334,6 +328,7 @@ def get_datasets(
334328
return datasets
335329

336330

331+
@openml.utils.thread_safe_if_oslo_installed
337332
def get_dataset(dataset_id: Union[int, str], download_data: bool = True) -> OpenMLDataset:
338333
""" Download the OpenML dataset representation, optionally also download actual data file.
339334
@@ -361,38 +356,34 @@ def get_dataset(dataset_id: Union[int, str], download_data: bool = True) -> Open
361356
raise ValueError("Dataset ID is neither an Integer nor can be "
362357
"cast to an Integer.")
363358

364-
with lockutils.external_lock(
365-
name='datasets.functions.get_dataset:%d' % dataset_id,
366-
lock_path=_create_lockfiles_dir(),
367-
):
368-
did_cache_dir = _create_cache_directory_for_id(
369-
DATASETS_CACHE_DIR_NAME, dataset_id,
370-
)
359+
did_cache_dir = _create_cache_directory_for_id(
360+
DATASETS_CACHE_DIR_NAME, dataset_id,
361+
)
371362

372-
try:
373-
remove_dataset_cache = True
374-
description = _get_dataset_description(did_cache_dir, dataset_id)
375-
features = _get_dataset_features(did_cache_dir, dataset_id)
376-
qualities = _get_dataset_qualities(did_cache_dir, dataset_id)
377-
378-
arff_file = _get_dataset_arff(description) if download_data else None
379-
380-
remove_dataset_cache = False
381-
except OpenMLServerException as e:
382-
# if there was an exception,
383-
# check if the user had access to the dataset
384-
if e.code == 112:
385-
raise OpenMLPrivateDatasetError(e.message) from None
386-
else:
387-
raise e
388-
finally:
389-
if remove_dataset_cache:
390-
_remove_cache_dir_for_id(DATASETS_CACHE_DIR_NAME,
391-
did_cache_dir)
392-
393-
dataset = _create_dataset_from_description(
394-
description, features, qualities, arff_file
395-
)
363+
try:
364+
remove_dataset_cache = True
365+
description = _get_dataset_description(did_cache_dir, dataset_id)
366+
features = _get_dataset_features(did_cache_dir, dataset_id)
367+
qualities = _get_dataset_qualities(did_cache_dir, dataset_id)
368+
369+
arff_file = _get_dataset_arff(description) if download_data else None
370+
371+
remove_dataset_cache = False
372+
except OpenMLServerException as e:
373+
# if there was an exception,
374+
# check if the user had access to the dataset
375+
if e.code == 112:
376+
raise OpenMLPrivateDatasetError(e.message) from None
377+
else:
378+
raise e
379+
finally:
380+
if remove_dataset_cache:
381+
_remove_cache_dir_for_id(DATASETS_CACHE_DIR_NAME,
382+
did_cache_dir)
383+
384+
dataset = _create_dataset_from_description(
385+
description, features, qualities, arff_file
386+
)
396387
return dataset
397388

398389

openml/flows/functions.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ def _get_cached_flow(fid: int) -> OpenMLFlow:
7070
"cached" % fid)
7171

7272

73+
@openml.utils.thread_safe_if_oslo_installed
7374
def get_flow(flow_id: int, reinstantiate: bool = False) -> OpenMLFlow:
7475
"""Download the OpenML flow for a given flow ID.
7576
@@ -87,11 +88,7 @@ def get_flow(flow_id: int, reinstantiate: bool = False) -> OpenMLFlow:
8788
the flow
8889
"""
8990
flow_id = int(flow_id)
90-
with lockutils.external_lock(
91-
name='flows.functions.get_flow:%d' % flow_id,
92-
lock_path=openml.utils._create_lockfiles_dir(),
93-
):
94-
flow = _get_flow_description(flow_id)
91+
flow = _get_flow_description(flow_id)
9592

9693
if reinstantiate:
9794
flow.model = flow.extension.flow_to_model(flow)

openml/tasks/functions.py

Lines changed: 24 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ def get_tasks(task_ids, download_data=True):
300300
return tasks
301301

302302

303+
@openml.utils.thread_safe_if_oslo_installed
303304
def get_task(task_id: int, download_data: bool = True) -> OpenMLTask:
304305
"""Download OpenML task for a given task ID.
305306
@@ -324,34 +325,30 @@ def get_task(task_id: int, download_data: bool = True) -> OpenMLTask:
324325
raise ValueError("Dataset ID is neither an Integer nor can be "
325326
"cast to an Integer.")
326327

327-
with lockutils.external_lock(
328-
name='task.functions.get_task:%d' % task_id,
329-
lock_path=openml.utils._create_lockfiles_dir(),
330-
):
331-
tid_cache_dir = openml.utils._create_cache_directory_for_id(
332-
TASKS_CACHE_DIR_NAME, task_id,
333-
)
328+
tid_cache_dir = openml.utils._create_cache_directory_for_id(
329+
TASKS_CACHE_DIR_NAME, task_id,
330+
)
334331

335-
try:
336-
task = _get_task_description(task_id)
337-
dataset = get_dataset(task.dataset_id, download_data)
338-
# List of class labels availaible in dataset description
339-
# Including class labels as part of task meta data handles
340-
# the case where data download was initially disabled
341-
if isinstance(task, OpenMLClassificationTask):
342-
task.class_labels = \
343-
dataset.retrieve_class_labels(task.target_name)
344-
# Clustering tasks do not have class labels
345-
# and do not offer download_split
346-
if download_data:
347-
if isinstance(task, OpenMLSupervisedTask):
348-
task.download_split()
349-
except Exception as e:
350-
openml.utils._remove_cache_dir_for_id(
351-
TASKS_CACHE_DIR_NAME,
352-
tid_cache_dir,
353-
)
354-
raise e
332+
try:
333+
task = _get_task_description(task_id)
334+
dataset = get_dataset(task.dataset_id, download_data)
335+
# List of class labels availaible in dataset description
336+
# Including class labels as part of task meta data handles
337+
# the case where data download was initially disabled
338+
if isinstance(task, OpenMLClassificationTask):
339+
task.class_labels = \
340+
dataset.retrieve_class_labels(task.target_name)
341+
# Clustering tasks do not have class labels
342+
# and do not offer download_split
343+
if download_data:
344+
if isinstance(task, OpenMLSupervisedTask):
345+
task.download_split()
346+
except Exception as e:
347+
openml.utils._remove_cache_dir_for_id(
348+
TASKS_CACHE_DIR_NAME,
349+
tid_cache_dir,
350+
)
351+
raise e
355352

356353
return task
357354

openml/utils.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,23 @@
22
import hashlib
33
import xmltodict
44
import shutil
5+
import warnings
56

67
import openml._api_calls
78
import openml.exceptions
89
from . import config
910

11+
oslo_installed = False
12+
try:
13+
# Currently, importing oslo raises a lot of warning that it will stop working
14+
# under python3.8; remove this once they disappear
15+
with warnings.catch_warnings():
16+
warnings.simplefilter("ignore")
17+
from oslo_concurrency import lockutils
18+
oslo_installed = True
19+
except ImportError:
20+
pass
21+
1022

1123
def extract_xml_tags(xml_tag_name, node, allow_none=True):
1224
"""Helper to extract xml tags from xmltodict.
@@ -279,6 +291,26 @@ def _remove_cache_dir_for_id(key, cache_dir):
279291
'Please do this manually!' % (key, cache_dir))
280292

281293

294+
def thread_safe_if_oslo_installed(func, *args, **kwargs):
295+
if oslo_installed:
296+
# Lock directories use the id that is passed as either a first argument, or as a keyword.
297+
id_parameters = ['_id' in parameter_name for parameter_name in kwargs]
298+
if len(id_parameters) == 1:
299+
id_ = kwargs[id_parameters[0]]
300+
elif len(args) > 0:
301+
id_ = args[0]
302+
else:
303+
raise RuntimeError("An id must be specified for {}, was passed: ({}, {}).".format(
304+
func.__name__, args, kwargs
305+
))
306+
# The [7:] gets rid of the 'openml.' prefix
307+
lock_name = "{}.{}:{}".format(func.__module__[7:], func.__name__, id_)
308+
with lockutils.external_lock(name=lock_name, lock_path=_create_lockfiles_dir()):
309+
return func(*args, **kwargs)
310+
else:
311+
return func(*args, **kwargs)
312+
313+
282314
def _create_lockfiles_dir():
283315
dir = os.path.join(config.get_cache_directory(), 'locks')
284316
try:

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@
4141
'requests',
4242
'scikit-learn>=0.18',
4343
'python-dateutil', # Installed through pandas anyway.
44-
'oslo.concurrency',
4544
'pandas>=0.19.2',
4645
'scipy>=0.13.3',
4746
'numpy>=1.6.2'
@@ -54,7 +53,8 @@
5453
'pytest',
5554
'pytest-xdist',
5655
'pytest-timeout',
57-
'nbformat'
56+
'nbformat',
57+
'oslo.concurrency'
5858
],
5959
'examples': [
6060
'matplotlib',

0 commit comments

Comments
 (0)