Skip to content

Commit f9bf4f2

Browse files
committed
made python unit tests work with new test server setup
1 parent 7236528 commit f9bf4f2

5 files changed

Lines changed: 60 additions & 42 deletions

File tree

tests/test_datasets/test_dataset_functions.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,13 @@ def test_list_datasets(self):
9090
# data from the internet...
9191
datasets = openml.datasets.list_datasets()
9292
# 1087 as the number of datasets on openml.org
93-
self.assertGreaterEqual(len(datasets), 1087)
93+
self.assertGreaterEqual(len(datasets), 100)
9494
for did in datasets:
9595
self._check_dataset(datasets[did])
9696

9797
def test_list_datasets_by_tag(self):
98-
datasets = openml.datasets.list_datasets(tag='uci')
99-
self.assertGreaterEqual(len(datasets), 5)
98+
datasets = openml.datasets.list_datasets(tag='study_14')
99+
self.assertGreaterEqual(len(datasets), 100)
100100
for did in datasets:
101101
self._check_dataset(datasets[did])
102102

@@ -153,20 +153,20 @@ def test_get_dataset(self):
153153
openml.config.get_cache_directory(), "datasets", "1", "qualities.xml")))
154154

155155
def test_get_dataset_with_string(self):
156-
dataset = openml.datasets.get_dataset(373)
156+
dataset = openml.datasets.get_dataset(101)
157157
self.assertRaises(PyOpenMLError, dataset._get_arff, 'arff')
158158
self.assertRaises(PyOpenMLError, dataset.get_data)
159159

160160
def test_get_dataset_sparse(self):
161-
dataset = openml.datasets.get_dataset(1571)
161+
dataset = openml.datasets.get_dataset(102)
162162
X = dataset.get_data()
163163
self.assertIsInstance(X, scipy.sparse.csr_matrix)
164164

165165
def test_download_rowid(self):
166166
# Smoke test which checks that the dataset has the row-id set correctly
167-
did = 164
167+
did = 44
168168
dataset = openml.datasets.get_dataset(did)
169-
self.assertEqual(dataset.row_id_attribute, 'instance')
169+
self.assertEqual(dataset.row_id_attribute, 'Counter')
170170

171171
def test__get_dataset_description(self):
172172
description = _get_dataset_description(self.workdir, 2)

tests/test_flows/test_flow.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ def test_get_flow(self):
104104
def test_from_xml_to_xml(self):
105105
# Get the raw xml thing
106106
# TODO maybe get this via get_flow(), which would have to be refactored to allow getting only the xml dictionary
107-
for flow_id in [1185, 1244, 1196, 1112, ]:
107+
# TODO: no sklearn flows.
108+
for flow_id in [3, 5, 7, 9, ]:
108109
flow_xml = _perform_api_call("flow/%d" % flow_id)
109110
flow_dict = xmltodict.parse(flow_xml)
110111

tests/test_runs/test_run_functions.py

Lines changed: 44 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def _perform_run(self, task_id, num_instances, clf):
3131
return run
3232

3333
def test_run_regression_on_classif_task(self):
34-
task_id = 10107
34+
task_id = 115
3535

3636
clf = LinearRegression()
3737
task = openml.tasks.get_task(task_id)
@@ -43,7 +43,7 @@ def test_run_regression_on_classif_task(self):
4343

4444
@mock.patch('openml.flows.sklearn_to_flow')
4545
def test_check_erronous_sklearn_flow_fails(self, sklearn_to_flow_mock):
46-
task_id = 10107
46+
task_id = 115
4747
task = openml.tasks.get_task(task_id)
4848

4949
# Invalid parameter values
@@ -52,16 +52,16 @@ def test_check_erronous_sklearn_flow_fails(self, sklearn_to_flow_mock):
5252
self.assertRaisesRegexp(ValueError, "Penalty term must be positive; got \(C='abc'\)",
5353
openml.runs.run_task, task=task, model=clf)
5454

55-
def test_run_iris(self):
56-
task_id = 10107
57-
num_instances = 150
55+
def test_run_diabetes(self):
56+
task_id = 115
57+
num_instances = 768
5858

5959
clf = LogisticRegression()
6060
self._perform_run(task_id,num_instances, clf)
6161

6262
def test_run_optimize_randomforest_iris(self):
63-
task_id = 10107
64-
num_instances = 150
63+
task_id = 115
64+
num_instances = 768
6565
num_folds = 10
6666
num_iterations = 5
6767

@@ -80,8 +80,8 @@ def test_run_optimize_randomforest_iris(self):
8080
self.assertEqual(len(run.trace_content), num_iterations * num_folds)
8181

8282
def test_run_optimize_bagging_iris(self):
83-
task_id = 10107
84-
num_instances = 150
83+
task_id = 115
84+
num_instances = 768
8585
num_folds = 10
8686
num_iterations = 9 # (num values for C times gamma)
8787

@@ -94,8 +94,8 @@ def test_run_optimize_bagging_iris(self):
9494
self.assertEqual(len(run.trace_content), num_iterations * num_folds)
9595

9696
def test_run_pipeline(self):
97-
task_id = 10107
98-
num_instances = 150
97+
task_id = 115
98+
num_instances = 768
9999
num_folds = 10
100100
num_iterations = 9 # (num values for C times gamma)
101101

@@ -107,8 +107,11 @@ def test_run_pipeline(self):
107107
self.assertEqual(run.trace_content, None)
108108

109109
def test__run_task_get_arffcontent(self):
110-
task = openml.tasks.get_task(1939)
110+
task = openml.tasks.get_task(7)
111111
class_labels = task.class_labels
112+
num_instances = 3196
113+
num_folds = 10
114+
num_repeats = 1
112115

113116
clf = SGDClassifier(loss='hinge', random_state=1)
114117
self.assertRaisesRegexp(AttributeError,
@@ -125,20 +128,24 @@ def test__run_task_get_arffcontent(self):
125128
self.assertIsInstance(arff_tracecontent, type(None))
126129

127130
# 10 times 10 fold CV of 150 samples
128-
self.assertEqual(len(arff_datacontent), 1500)
131+
self.assertEqual(len(arff_datacontent), num_instances * num_repeats)
129132
for arff_line in arff_datacontent:
130-
self.assertEqual(len(arff_line), 8)
133+
print(arff_line)
134+
# check number columns
135+
self.assertEqual(len(arff_line), 7)
136+
# check repeat
131137
self.assertGreaterEqual(arff_line[0], 0)
132-
self.assertLessEqual(arff_line[0], 9)
138+
self.assertLessEqual(arff_line[0], num_repeats - 1)
139+
# check fold
133140
self.assertGreaterEqual(arff_line[1], 0)
134-
self.assertLessEqual(arff_line[1], 9)
141+
self.assertLessEqual(arff_line[1], num_folds - 1)
142+
# check row id
135143
self.assertGreaterEqual(arff_line[2], 0)
136-
self.assertLessEqual(arff_line[2], 149)
137-
self.assertAlmostEqual(sum(arff_line[3:6]), 1.0)
138-
self.assertIn(arff_line[6], ['Iris-setosa', 'Iris-versicolor',
139-
'Iris-virginica'])
140-
self.assertIn(arff_line[7], ['Iris-setosa', 'Iris-versicolor',
141-
'Iris-virginica'])
144+
self.assertLessEqual(arff_line[2], num_instances - 1)
145+
# check confidences
146+
self.assertAlmostEqual(sum(arff_line[3:5]), 1.0)
147+
self.assertIn(arff_line[5], ['won', 'nowin'])
148+
self.assertIn(arff_line[6], ['won', 'nowin'])
142149

143150
def test_get_run(self):
144151
# this run is not available on test
@@ -163,12 +170,16 @@ def _check_run(self, run):
163170
self.assertEqual(len(run), 5)
164171

165172
def test_get_runs_list(self):
173+
# TODO: comes from live, no such lists on test
174+
openml.config.server = self.production_server
166175
runs = openml.runs.list_runs(id=[2])
167176
self.assertEqual(len(runs), 1)
168177
for rid in runs:
169178
self._check_run(runs[rid])
170179

171180
def test_get_runs_list_by_task(self):
181+
# TODO: comes from live, no such lists on test
182+
openml.config.server = self.production_server
172183
task_ids = [20]
173184
runs = openml.runs.list_runs(task=task_ids)
174185
self.assertGreaterEqual(len(runs), 590)
@@ -185,6 +196,8 @@ def test_get_runs_list_by_task(self):
185196
self._check_run(runs[rid])
186197

187198
def test_get_runs_list_by_uploader(self):
199+
# TODO: comes from live, no such lists on test
200+
openml.config.server = self.production_server
188201
# 29 is Dominik Kirchhoff - Joaquin and Jan have too many runs right now
189202
uploader_ids = [29]
190203

@@ -204,6 +217,8 @@ def test_get_runs_list_by_uploader(self):
204217
self._check_run(runs[rid])
205218

206219
def test_get_runs_list_by_flow(self):
220+
# TODO: comes from live, no such lists on test
221+
openml.config.server = self.production_server
207222
flow_ids = [1154]
208223
runs = openml.runs.list_runs(flow=flow_ids)
209224
self.assertGreaterEqual(len(runs), 1)
@@ -220,6 +235,8 @@ def test_get_runs_list_by_flow(self):
220235
self._check_run(runs[rid])
221236

222237
def test_get_runs_pagination(self):
238+
# TODO: comes from live, no such lists on test
239+
openml.config.server = self.production_server
223240
uploader_ids = [1]
224241
size = 10
225242
max = 100
@@ -230,9 +247,11 @@ def test_get_runs_pagination(self):
230247
self.assertIn(runs[rid]["uploader"], uploader_ids)
231248

232249
def test_get_runs_list_by_filters(self):
250+
# TODO: comes from live, no such lists on test
251+
openml.config.server = self.production_server
233252
ids = [505212, 6100]
234253
tasks = [2974, 339]
235-
uploaders_1 = [1, 17]
254+
uploaders_1 = [1, 2]
236255
uploaders_2 = [29, 274]
237256
flows = [74, 1718]
238257

@@ -253,6 +272,8 @@ def test_get_runs_list_by_filters(self):
253272
runs = openml.runs.list_runs(id=ids, task=tasks, uploader=uploaders_1)
254273

255274
def test_get_runs_list_by_tag(self):
275+
# TODO: comes from live, no such lists on test
276+
openml.config.server = self.production_server
256277
runs = openml.runs.list_runs(tag='curves')
257278
self.assertGreaterEqual(len(runs), 1)
258279

tests/test_tasks/test_task.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,6 @@
1414

1515
class OpenMLTaskTest(TestBase):
1616

17-
def test_get_clustering_task(self):
18-
self.assertRaisesRegexp(KeyError, 'oml:target_feature',
19-
openml.tasks.get_task, 10128)
20-
2117
@mock.patch('openml.datasets.get_dataset', autospec=True)
2218
def test_get_dataset(self, patch):
2319
patch.return_value = mock.MagicMock()
@@ -40,11 +36,11 @@ def test_get_X_and_Y(self):
4036
self.assertEqual(Y.dtype, int)
4137

4238
# Regression task
43-
task = openml.tasks.get_task(2280)
39+
task = openml.tasks.get_task(631)
4440
X, Y = task.get_X_and_y()
45-
self.assertEqual((8192, 8), X.shape)
41+
self.assertEqual((52, 2), X.shape)
4642
self.assertIsInstance(X, np.ndarray)
47-
self.assertEqual((8192,), Y.shape)
43+
self.assertEqual((52,), Y.shape)
4844
self.assertIsInstance(Y, np.ndarray)
4945
self.assertEqual(Y.dtype, float)
5046

tests/test_tasks/test_task_functions.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,15 +59,15 @@ def test_list_tasks_by_type(self):
5959
self._check_task(tasks[tid])
6060

6161
def test_list_tasks_by_tag(self):
62-
num_basic_tasks = 54 # number is flexible, check server if fails
63-
tasks = openml.tasks.list_tasks(tag='basic')
62+
num_basic_tasks = 100 # number is flexible, check server if fails
63+
tasks = openml.tasks.list_tasks(tag='study_14')
6464
self.assertGreaterEqual(len(tasks), num_basic_tasks)
6565
for tid in tasks:
6666
self._check_task(tasks[tid])
6767

6868
def test_list_tasks(self):
6969
tasks = openml.tasks.list_tasks()
70-
self.assertGreaterEqual(len(tasks), 2000)
70+
self.assertGreaterEqual(len(tasks), 900)
7171
for tid in tasks:
7272
self._check_task(tasks[tid])
7373

@@ -83,7 +83,7 @@ def test_list_tasks_paginate(self):
8383
def test_list_tasks_per_type_paginate(self):
8484
size = 10
8585
max = 100
86-
task_types = 5
86+
task_types = 4
8787
for j in range(1,task_types):
8888
for i in range(0, max, size):
8989
tasks = openml.tasks.list_tasks(task_type_id=j, offset=i, size=size)

0 commit comments

Comments
 (0)