Skip to content

Commit 1d28529

Browse files
committed
add list_evaluations _setups and fix list_evaluations order
1 parent 88b87ad commit 1d28529

3 files changed

Lines changed: 137 additions & 4 deletions

File tree

openml/evaluations/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .evaluation import OpenMLEvaluation
2-
from .functions import list_evaluations, list_evaluation_measures
2+
from .functions import list_evaluations, list_evaluation_measures, list_evaluations_setups
33

4-
__all__ = ['OpenMLEvaluation', 'list_evaluations', 'list_evaluation_measures']
4+
__all__ = ['OpenMLEvaluation', 'list_evaluations', 'list_evaluation_measures', 'list_evaluations_setups']

openml/evaluations/functions.py

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
import json
22
import xmltodict
33
import pandas as pd
4+
import numpy as np
45
from typing import Union, List, Optional, Dict
56
import collections
67

78
import openml.utils
89
import openml._api_calls
910
from ..evaluations import OpenMLEvaluation
11+
import openml
1012

1113

1214
def list_evaluations(
@@ -209,8 +211,8 @@ def __list_evaluations(api_call, output_format='object'):
209211
'array_data': array_data}
210212

211213
if output_format == 'dataframe':
212-
evals = pd.DataFrame.from_dict(evals, orient='index')
213-
214+
data, index = list(evals.values()), list(evals.keys())
215+
evals = pd.DataFrame(data, index=index)
214216
return evals
215217

216218

@@ -238,3 +240,87 @@ def list_evaluation_measures() -> List[str]:
238240
'"oml:measure" as a list')
239241
qualities = qualities['oml:evaluation_measures']['oml:measures'][0]['oml:measure']
240242
return qualities
243+
244+
245+
def list_evaluations_setups(
246+
function: str,
247+
offset: Optional[int] = None,
248+
size: Optional[int] = None,
249+
id: Optional[List] = None,
250+
task: Optional[List] = None,
251+
setup: Optional[List] = None,
252+
flow: Optional[List] = None,
253+
uploader: Optional[List] = None,
254+
tag: Optional[str] = None,
255+
per_fold: Optional[bool] = None,
256+
sort_order: Optional[str] = None,
257+
output_format: str = 'dataframe'
258+
) -> Union[Dict, pd.DataFrame]:
259+
"""
260+
List all run-evaluation pairs matching all of the given filters.
261+
(Supports large amount of results)
262+
263+
Parameters
264+
----------
265+
function : str
266+
the evaluation function. e.g., predictive_accuracy
267+
offset : int, optional
268+
the number of runs to skip, starting from the first
269+
size : int, optional
270+
the maximum number of runs to show
271+
272+
id : list, optional
273+
274+
task : list, optional
275+
276+
setup: list, optional
277+
278+
flow : list, optional
279+
280+
uploader : list, optional
281+
282+
tag : str, optional
283+
284+
per_fold : bool, optional
285+
286+
sort_order : str, optional
287+
order of sorting evaluations, ascending ("asc") or descending ("desc")
288+
289+
output_format: str, optional (default='object')
290+
The parameter decides the format of the output.
291+
- If 'object' the output is a dict of OpenMLEvaluation objects
292+
- If 'dict' the output is a dict of dict
293+
- If 'dataframe' the output is a pandas DataFrame
294+
295+
296+
Returns
297+
-------
298+
dict or dataframe
299+
"""
300+
# List evaluations
301+
evals = list_evaluations(function=function, offset=offset, size=size, id=id, task=task, setup=setup, flow=flow,
302+
uploader=uploader, tag=tag, per_fold=per_fold, sort_order=sort_order,
303+
output_format='dataframe')
304+
305+
# List setups
306+
# Split setups in evals into chunks of N setups as list_setups does not support long lists
307+
N = 100
308+
setup_chunks = np.split(evals['setup_id'].unique(), ((len(evals['setup_id'].unique()) - 1) // N) + 1)
309+
setups = pd.DataFrame()
310+
for setup in setup_chunks:
311+
result = openml.setups.list_setups(setup=list(setup), output_format='dataframe')
312+
result.drop('flow_id', axis=1, inplace=True)
313+
setups = pd.concat([setups, result], ignore_index=True)
314+
parameters = []
315+
for parameter_dict in setups['parameters']:
316+
if parameter_dict is not None:
317+
parameters.append([tuple([param['parameter_name'], param['value']]) for param in parameter_dict.values()])
318+
else:
319+
parameters.append([])
320+
setups['parameters'] = parameters
321+
# Merge setups with evaluations
322+
df = evals.merge(setups, on='setup_id', how='left')
323+
if output_format == 'dataframe':
324+
return df
325+
else:
326+
return df.to_dict()

tests/test_evaluations/test_evaluation_functions.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,3 +142,50 @@ def test_list_evaluation_measures(self):
142142
measures = openml.evaluations.list_evaluation_measures()
143143
self.assertEqual(isinstance(measures, list), True)
144144
self.assertEqual(all([isinstance(s, str) for s in measures]), True)
145+
146+
def test_list_evaluations_setups_filter_flow(self):
147+
openml.config.server = self.production_server
148+
flow_id = 405
149+
size = 10
150+
evals_setups = openml.evaluations.list_evaluations_setups("predictive_accuracy",
151+
flow=[flow_id], size=size,
152+
sort_order='desc', output_format='dataframe')
153+
evals = openml.evaluations.list_evaluations("predictive_accuracy",
154+
flow=[flow_id], size=size,
155+
sort_order='desc', output_format='dataframe')
156+
157+
# Check if list is non-empty
158+
self.assertGreater(len(evals_setups), 0)
159+
# Check if output and order of list_evaluations is preserved
160+
self.assertTrue((evals_setups['run_id'].values == evals['run_id'].values).all())
161+
# Check if the hyper-parameter column is as accurate and flow_id
162+
for index, row in evals_setups.iterrows():
163+
params = openml.runs.get_run(row['run_id']).parameter_settings
164+
hyper_params = [tuple([param['oml:name'], param['oml:value']]) for param in params]
165+
self.assertTrue((row['parameters'] == hyper_params))
166+
self.assertEqual(row['flow_id'], flow_id)
167+
168+
def test_list_evaluations_setups_filter_task(self):
169+
openml.config.server = self.production_server
170+
task_id = 6
171+
size = 20
172+
evals_setups = openml.evaluations.list_evaluations_setups("predictive_accuracy",
173+
task=[task_id], size=size,
174+
sort_order='desc', output_format='dataframe')
175+
evals = openml.evaluations.list_evaluations("predictive_accuracy",
176+
task=[task_id], size=size,
177+
sort_order='desc', output_format='dataframe')
178+
179+
# Check if list is non-empty
180+
self.assertGreater(len(evals_setups), 0)
181+
# Check if output and order of list_evaluations is preserved
182+
self.assertTrue((evals_setups['run_id'].values == evals['run_id'].values).all())
183+
# Check if the hyper-parameter column is as accurate and task_id
184+
for index, row in evals_setups.iterrows():
185+
params = openml.runs.get_run(row['run_id']).parameter_settings
186+
hyper_params = [tuple([param['oml:name'], param['oml:value']]) for param in params]
187+
self.assertTrue((row['parameters'] == hyper_params))
188+
self.assertEqual(row['task_id'], task_id)
189+
190+
191+

0 commit comments

Comments
 (0)