Skip to content

Commit 1c9f64d

Browse files
sahithyaraviPGijsbers
authored andcommitted
Add #737 (#772)
* add hyperparameter column to list_evaluations_setups
1 parent 3d08c2d commit 1c9f64d

3 files changed

Lines changed: 60 additions & 7 deletions

File tree

examples/fetch_evaluations_tutorial.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
* Sort the obtained results in descending order of the metric
1717
* Plot a cumulative distribution function for the evaluations
1818
* Compare the top 10 performing flows based on the evaluation performance
19+
* Retrieve evaluations with hyperparameter settings
1920
"""
2021

2122
############################################################################
@@ -147,3 +148,30 @@ def plot_flow_compare(evaluations, top_n=10, metric='predictive_accuracy'):
147148
flow_names = evals.flow_name.unique()[:top_n]
148149
for i in range(top_n):
149150
print((flow_ids[i], flow_names[i]))
151+
152+
#############################################################################
153+
# Obtaining evaluations with hyperparameter settings
154+
# ==================================================
155+
# We'll now obtain the evaluations of a task and a flow with the hyperparameters
156+
157+
# List evaluations in descending order based on predictive_accuracy with
158+
# hyperparameters
159+
evals_setups = openml.evaluations.list_evaluations_setups(function='predictive_accuracy', task=[31],
160+
size=100, sort_order='desc')
161+
162+
""
163+
print(evals_setups.head())
164+
165+
""
166+
# Return evaluations for flow_id in descending order based on predictive_accuracy
167+
# with hyperparameters. parameters_in_separate_columns returns parameters in
168+
# separate columns
169+
evals_setups = openml.evaluations.list_evaluations_setups(function='predictive_accuracy',
170+
flow=[6767],
171+
size=100,
172+
parameters_in_separate_columns=True)
173+
174+
""
175+
print(evals_setups.head(10))
176+
177+
""

openml/evaluations/functions.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,8 @@ def list_evaluations_setups(
254254
tag: Optional[str] = None,
255255
per_fold: Optional[bool] = None,
256256
sort_order: Optional[str] = None,
257-
output_format: str = 'dataframe'
257+
output_format: str = 'dataframe',
258+
parameters_in_separate_columns: bool = False
258259
) -> Union[Dict, pd.DataFrame]:
259260
"""
260261
List all run-evaluation pairs matching all of the given filters
@@ -287,12 +288,19 @@ def list_evaluations_setups(
287288
The parameter decides the format of the output.
288289
- If 'dict' the output is a dict of dict
289290
- If 'dataframe' the output is a pandas DataFrame
291+
parameters_in_separate_columns: bool, optional (default= False)
292+
Returns hyperparameters in separate columns if set to True.
293+
Valid only for a single flow
290294
291295
292296
Returns
293297
-------
294298
dict or dataframe with hyperparameter settings as a list of tuples.
295299
"""
300+
if parameters_in_separate_columns and (flow is None or len(flow) != 1):
301+
raise ValueError("Can set parameters_in_separate_columns to true "
302+
"only for single flow_id")
303+
296304
# List evaluations
297305
evals = list_evaluations(function=function, offset=offset, size=size, id=id, task=task,
298306
setup=setup, flow=flow, uploader=uploader, tag=tag,
@@ -315,14 +323,18 @@ def list_evaluations_setups(
315323
# Convert parameters of setup into list of tuples of (hyperparameter, value)
316324
for parameter_dict in setups['parameters']:
317325
if parameter_dict is not None:
318-
parameters.append([tuple([param['parameter_name'], param['value']])
319-
for param in parameter_dict.values()])
326+
parameters.append({param['full_name']: param['value']
327+
for param in parameter_dict.values()})
320328
else:
321-
parameters.append([])
329+
parameters.append({})
322330
setups['parameters'] = parameters
323331
# Merge setups with evaluations
324332
df = pd.merge(evals, setups, on='setup_id', how='left')
325333

334+
if parameters_in_separate_columns:
335+
df = pd.concat([df.drop('parameters', axis=1),
336+
df['parameters'].apply(pd.Series)], axis=1)
337+
326338
if output_format == 'dataframe':
327339
return df
328340
else:

tests/test_evaluations/test_evaluation_functions.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,11 @@ def _check_list_evaluation_setups(self, size, **kwargs):
2727
# Check if the hyper-parameter column is as accurate and flow_id
2828
for index, row in evals_setups.iterrows():
2929
params = openml.runs.get_run(row['run_id']).parameter_settings
30-
hyper_params = [tuple([param['oml:name'], param['oml:value']]) for param in params]
31-
self.assertTrue(sorted(row['parameters']) == sorted(hyper_params))
30+
list1 = [param['oml:value'] for param in params]
31+
list2 = list(row['parameters'].values())
32+
# check if all values are equal
33+
self.assertSequenceEqual(sorted(list1), sorted(list2))
34+
return evals_setups
3235

3336
def test_evaluation_list_filter_task(self):
3437
openml.config.server = self.production_server
@@ -171,7 +174,17 @@ def test_list_evaluations_setups_filter_flow(self):
171174
openml.config.server = self.production_server
172175
flow_id = [405]
173176
size = 100
174-
self._check_list_evaluation_setups(size, flow=flow_id)
177+
evals = self._check_list_evaluation_setups(size, flow=flow_id)
178+
# check if parameters in separate columns works
179+
evals_cols = openml.evaluations.list_evaluations_setups("predictive_accuracy",
180+
flow=flow_id, size=size,
181+
sort_order='desc',
182+
output_format='dataframe',
183+
parameters_in_separate_columns=True
184+
)
185+
columns = (list(evals_cols.columns))
186+
keys = (list(evals['parameters'].values[0].keys()))
187+
self.assertTrue(all(elem in columns for elem in keys))
175188

176189
def test_list_evaluations_setups_filter_task(self):
177190
openml.config.server = self.production_server

0 commit comments

Comments
 (0)