Skip to content

Commit c59c3b8

Browse files
sahithyaravimfeurer
authored andcommitted
Fix 838 (#846)
* fix list_evaluations_setups * edit existing test * remove prints * remove print * remove blank line * add comments * add space comment
1 parent 35dd7d3 commit c59c3b8

3 files changed

Lines changed: 17 additions & 11 deletions

File tree

doc/progress.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Changelog
88

99
0.10.0
1010
~~~~~~
11+
* FIX #838: Fix list_evaluations_setups to work when evaluations are not a 100 multiple.
1112
* ADD #737: Add list_evaluations_setups to return hyperparameters along with list of evaluations.
1213
* FIX #261: Test server is cleared of all files uploaded during unit testing.
1314
* FIX #447: All files created by unit tests no longer persist in local.

openml/evaluations/functions.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -324,14 +324,17 @@ def list_evaluations_setups(
324324
evals = list_evaluations(function=function, offset=offset, size=size, run=run, task=task,
325325
setup=setup, flow=flow, uploader=uploader, tag=tag,
326326
per_fold=per_fold, sort_order=sort_order, output_format='dataframe')
327-
328327
# List setups
329-
# Split setups in evals into chunks of N setups as list_setups does not support large size
328+
# list_setups by setup id does not support large sizes (exceeds URL length limit)
329+
# Hence we split the list of unique setup ids returned by list_evaluations into chunks of size N
330330
df = pd.DataFrame()
331331
if len(evals) != 0:
332-
N = 100
333-
setup_chunks = np.split(evals['setup_id'].unique(),
334-
((len(evals['setup_id'].unique()) - 1) // N) + 1)
332+
N = 100 # size of section
333+
length = len(evals['setup_id'].unique()) # length of the array we want to split
334+
# array_split - allows indices_or_sections to not equally divide the array
335+
# array_split -length % N sub-arrays of size length//N + 1 and the rest of size length//N.
336+
setup_chunks = np.array_split(ary=evals['setup_id'].unique(),
337+
indices_or_sections=((length - 1) // N) + 1)
335338
setups = pd.DataFrame()
336339
for setup in setup_chunks:
337340
result = pd.DataFrame(openml.setups.list_setups(setup=setup, output_format='dataframe'))

tests/test_evaluations/test_evaluation_functions.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,20 @@
66
class TestEvaluationFunctions(TestBase):
77
_multiprocess_can_split_ = True
88

9-
def _check_list_evaluation_setups(self, size, **kwargs):
9+
def _check_list_evaluation_setups(self, **kwargs):
1010
evals_setups = openml.evaluations.list_evaluations_setups("predictive_accuracy",
11-
**kwargs, size=size,
11+
**kwargs,
1212
sort_order='desc',
1313
output_format='dataframe')
1414
evals = openml.evaluations.list_evaluations("predictive_accuracy",
15-
**kwargs, size=size,
15+
**kwargs,
1616
sort_order='desc',
1717
output_format='dataframe')
1818

1919
# Check if list is non-empty
2020
self.assertGreater(len(evals_setups), 0)
21+
# Check if length is accurate
22+
self.assertEqual(len(evals_setups), len(evals))
2123
# Check if output from sort is sorted in the right order
2224
self.assertSequenceEqual(sorted(evals_setups['value'].tolist(), reverse=True),
2325
evals_setups['value'].tolist())
@@ -176,7 +178,7 @@ def test_list_evaluations_setups_filter_flow(self):
176178
openml.config.server = self.production_server
177179
flow_id = [405]
178180
size = 100
179-
evals = self._check_list_evaluation_setups(size, flow=flow_id)
181+
evals = self._check_list_evaluation_setups(flow=flow_id, size=size)
180182
# check if parameters in separate columns works
181183
evals_cols = openml.evaluations.list_evaluations_setups("predictive_accuracy",
182184
flow=flow_id, size=size,
@@ -191,5 +193,5 @@ def test_list_evaluations_setups_filter_flow(self):
191193
def test_list_evaluations_setups_filter_task(self):
192194
openml.config.server = self.production_server
193195
task_id = [6]
194-
size = 100
195-
self._check_list_evaluation_setups(size, task=task_id)
196+
size = 121
197+
self._check_list_evaluation_setups(task=task_id, size=size)

0 commit comments

Comments
 (0)