Skip to content

Commit 3984a64

Browse files
PGijsbersmfeurer
authored andcommitted
Prefer lazy loading in unit tests (#655)
* Prefer lazy loading for all unit tests that don't explicitly need the arff file. * Skip test for which API is currently not working.
1 parent 6b081c5 commit 3984a64

3 files changed

Lines changed: 17 additions & 16 deletions

File tree

tests/test_datasets/test_dataset.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ def setUp(self):
2020

2121
# Load dataset id 2 - dataset 2 is interesting because it contains
2222
# missing values, categorical features etc.
23-
self.dataset = openml.datasets.get_dataset(2)
23+
self.dataset = openml.datasets.get_dataset(2, download_data=False)
2424
# titanic as missing values, categories, and string
25-
self.titanic = openml.datasets.get_dataset(40945)
25+
self.titanic = openml.datasets.get_dataset(40945, download_data=False)
2626
# these datasets have some boolean features
27-
self.pc4 = openml.datasets.get_dataset(1049)
28-
self.jm1 = openml.datasets.get_dataset(1053)
27+
self.pc4 = openml.datasets.get_dataset(1049, download_data=False)
28+
self.jm1 = openml.datasets.get_dataset(1053, download_data=False)
2929

3030
def test_get_data_future_warning(self):
3131
warn_msg = 'will change from "array" to "dataframe"'
@@ -197,7 +197,7 @@ class OpenMLDatasetTestOnTestServer(TestBase):
197197
def setUp(self):
198198
super(OpenMLDatasetTestOnTestServer, self).setUp()
199199
# longley, really small dataset
200-
self.dataset = openml.datasets.get_dataset(125)
200+
self.dataset = openml.datasets.get_dataset(125, download_data=False)
201201

202202
def test_tagging(self):
203203
tag = "testing_tag_{}_{}".format(self.id(), time())
@@ -219,7 +219,7 @@ def setUp(self):
219219
super(OpenMLDatasetTestSparse, self).setUp()
220220
openml.config.server = self.production_server
221221

222-
self.sparse_dataset = openml.datasets.get_dataset(4136)
222+
self.sparse_dataset = openml.datasets.get_dataset(4136, download_data=False)
223223

224224
def test_get_sparse_dataset_with_target(self):
225225
X, y = self.sparse_dataset.get_data(

tests/test_datasets/test_dataset_functions.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ def test_get_datasets_lazy(self):
271271
openml.config.get_cache_directory(), "datasets", "2", "dataset.arff")))
272272

273273
def test_get_dataset(self):
274+
# This is the only non-lazy load to ensure default behaviour works.
274275
dataset = openml.datasets.get_dataset(1)
275276
self.assertEqual(type(dataset), OpenMLDataset)
276277
self.assertEqual(dataset.name, 'anneal')
@@ -313,7 +314,7 @@ def test_get_dataset_lazy(self):
313314

314315
# Issue324 Properly handle private datasets when trying to access them
315316
openml.config.server = self.production_server
316-
self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, 45)
317+
self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, 45, False)
317318

318319
def test_get_dataset_lazy_all_functions(self):
319320
""" Test that all expected functionality is available without downloading the dataset. """
@@ -343,14 +344,14 @@ def test_get_dataset_lazy_all_functions(self):
343344
openml.config.get_cache_directory(), "datasets", "1", "dataset.arff")))
344345

345346
def test_get_dataset_sparse(self):
346-
dataset = openml.datasets.get_dataset(102)
347+
dataset = openml.datasets.get_dataset(102, download_data=False)
347348
X = dataset.get_data(dataset_format='array')
348349
self.assertIsInstance(X, scipy.sparse.csr_matrix)
349350

350351
def test_download_rowid(self):
351352
# Smoke test which checks that the dataset has the row-id set correctly
352353
did = 44
353-
dataset = openml.datasets.get_dataset(did)
354+
dataset = openml.datasets.get_dataset(did, download_data=False)
354355
self.assertEqual(dataset.row_id_attribute, 'Counter')
355356

356357
def test__get_dataset_description(self):
@@ -416,7 +417,7 @@ def test_deletion_of_cache_dir_faulty_download(self, patch):
416417
self.assertEqual(len(os.listdir(datasets_cache_dir)), 0)
417418

418419
def test_publish_dataset(self):
419-
420+
# lazy loading not possible as we need the arff-file.
420421
openml.datasets.get_dataset(3)
421422
file_path = os.path.join(openml.config.get_cache_directory(),
422423
"datasets", "3", "dataset.arff")
@@ -434,9 +435,9 @@ def test_publish_dataset(self):
434435

435436
def test__retrieve_class_labels(self):
436437
openml.config.cache_directory = self.static_cache_dir
437-
labels = openml.datasets.get_dataset(2).retrieve_class_labels()
438+
labels = openml.datasets.get_dataset(2, download_data=False).retrieve_class_labels()
438439
self.assertEqual(labels, ['1', '2', '3', '4', '5', 'U'])
439-
labels = openml.datasets.get_dataset(2).retrieve_class_labels(
440+
labels = openml.datasets.get_dataset(2, download_data=False).retrieve_class_labels(
440441
target_name='product-type')
441442
self.assertEqual(labels, ['C', 'H', 'G'])
442443

@@ -761,9 +762,8 @@ def test_create_invalid_dataset(self):
761762
)
762763

763764
def test_get_online_dataset_arff(self):
764-
765-
# Australian dataset
766-
dataset_id = 100
765+
dataset_id = 100 # Australian
766+
# lazy loading not used as arff file is checked.
767767
dataset = openml.datasets.get_dataset(dataset_id)
768768
decoder = arff.ArffDecoder()
769769
# check if the arff from the dataset is
@@ -785,7 +785,7 @@ def test_get_online_dataset_format(self):
785785

786786
# Phoneme dataset
787787
dataset_id = 77
788-
dataset = openml.datasets.get_dataset(dataset_id)
788+
dataset = openml.datasets.get_dataset(dataset_id, download_data=False)
789789

790790
self.assertEqual(
791791
(dataset.format).lower(),

tests/test_runs/test_run_functions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1434,6 +1434,7 @@ def test_get_runs_list_by_filters(self):
14341434

14351435
runs = openml.runs.list_runs(id=ids, task=tasks, uploader=uploaders_1)
14361436

1437+
@unittest.skip("API currently broken: https://github.com/openml/OpenML/issues/948")
14371438
def test_get_runs_list_by_tag(self):
14381439
# TODO: comes from live, no such lists on test
14391440
openml.config.server = self.production_server

0 commit comments

Comments
 (0)