|
1 | 1 | import io |
2 | 2 | import os |
3 | 3 | import re |
4 | | -import warnings |
5 | 4 | from typing import List, Dict, Union |
6 | 5 |
|
7 | 6 | import numpy as np |
|
10 | 9 |
|
11 | 10 | import xmltodict |
12 | 11 | from scipy.sparse import coo_matrix |
13 | | -# Currently, importing oslo raises a lot of warning that it will stop working |
14 | | -# under python3.8; remove this once they disappear |
15 | | -with warnings.catch_warnings(): |
16 | | - warnings.simplefilter("ignore") |
17 | | - from oslo_concurrency import lockutils |
18 | 12 | from collections import OrderedDict |
19 | 13 |
|
20 | 14 | import openml.utils |
@@ -334,6 +328,7 @@ def get_datasets( |
334 | 328 | return datasets |
335 | 329 |
|
336 | 330 |
|
| 331 | +@openml.utils.thread_safe_if_oslo_installed |
337 | 332 | def get_dataset(dataset_id: Union[int, str], download_data: bool = True) -> OpenMLDataset: |
338 | 333 | """ Download the OpenML dataset representation, optionally also download actual data file. |
339 | 334 |
|
@@ -361,38 +356,34 @@ def get_dataset(dataset_id: Union[int, str], download_data: bool = True) -> Open |
361 | 356 | raise ValueError("Dataset ID is neither an Integer nor can be " |
362 | 357 | "cast to an Integer.") |
363 | 358 |
|
364 | | - with lockutils.external_lock( |
365 | | - name='datasets.functions.get_dataset:%d' % dataset_id, |
366 | | - lock_path=_create_lockfiles_dir(), |
367 | | - ): |
368 | | - did_cache_dir = _create_cache_directory_for_id( |
369 | | - DATASETS_CACHE_DIR_NAME, dataset_id, |
370 | | - ) |
| 359 | + did_cache_dir = _create_cache_directory_for_id( |
| 360 | + DATASETS_CACHE_DIR_NAME, dataset_id, |
| 361 | + ) |
371 | 362 |
|
372 | | - try: |
373 | | - remove_dataset_cache = True |
374 | | - description = _get_dataset_description(did_cache_dir, dataset_id) |
375 | | - features = _get_dataset_features(did_cache_dir, dataset_id) |
376 | | - qualities = _get_dataset_qualities(did_cache_dir, dataset_id) |
377 | | - |
378 | | - arff_file = _get_dataset_arff(description) if download_data else None |
379 | | - |
380 | | - remove_dataset_cache = False |
381 | | - except OpenMLServerException as e: |
382 | | - # if there was an exception, |
383 | | - # check if the user had access to the dataset |
384 | | - if e.code == 112: |
385 | | - raise OpenMLPrivateDatasetError(e.message) from None |
386 | | - else: |
387 | | - raise e |
388 | | - finally: |
389 | | - if remove_dataset_cache: |
390 | | - _remove_cache_dir_for_id(DATASETS_CACHE_DIR_NAME, |
391 | | - did_cache_dir) |
392 | | - |
393 | | - dataset = _create_dataset_from_description( |
394 | | - description, features, qualities, arff_file |
395 | | - ) |
| 363 | + try: |
| 364 | + remove_dataset_cache = True |
| 365 | + description = _get_dataset_description(did_cache_dir, dataset_id) |
| 366 | + features = _get_dataset_features(did_cache_dir, dataset_id) |
| 367 | + qualities = _get_dataset_qualities(did_cache_dir, dataset_id) |
| 368 | + |
| 369 | + arff_file = _get_dataset_arff(description) if download_data else None |
| 370 | + |
| 371 | + remove_dataset_cache = False |
| 372 | + except OpenMLServerException as e: |
| 373 | + # if there was an exception, |
| 374 | + # check if the user had access to the dataset |
| 375 | + if e.code == 112: |
| 376 | + raise OpenMLPrivateDatasetError(e.message) from None |
| 377 | + else: |
| 378 | + raise e |
| 379 | + finally: |
| 380 | + if remove_dataset_cache: |
| 381 | + _remove_cache_dir_for_id(DATASETS_CACHE_DIR_NAME, |
| 382 | + did_cache_dir) |
| 383 | + |
| 384 | + dataset = _create_dataset_from_description( |
| 385 | + description, features, qualities, arff_file |
| 386 | + ) |
396 | 387 | return dataset |
397 | 388 |
|
398 | 389 |
|
|
0 commit comments