|
1 | 1 | import json |
2 | 2 | import xmltodict |
3 | 3 | import pandas as pd |
| 4 | +import numpy as np |
4 | 5 | from typing import Union, List, Optional, Dict |
5 | 6 | import collections |
6 | 7 |
|
7 | 8 | import openml.utils |
8 | 9 | import openml._api_calls |
9 | 10 | from ..evaluations import OpenMLEvaluation |
| 11 | +import openml |
10 | 12 |
|
11 | 13 |
|
12 | 14 | def list_evaluations( |
@@ -209,8 +211,8 @@ def __list_evaluations(api_call, output_format='object'): |
209 | 211 | 'array_data': array_data} |
210 | 212 |
|
211 | 213 | if output_format == 'dataframe': |
212 | | - evals = pd.DataFrame.from_dict(evals, orient='index') |
213 | | - |
| 214 | + data, index = list(evals.values()), list(evals.keys()) |
| 215 | + evals = pd.DataFrame(data, index=index) |
214 | 216 | return evals |
215 | 217 |
|
216 | 218 |
|
@@ -238,3 +240,87 @@ def list_evaluation_measures() -> List[str]: |
238 | 240 | '"oml:measure" as a list') |
239 | 241 | qualities = qualities['oml:evaluation_measures']['oml:measures'][0]['oml:measure'] |
240 | 242 | return qualities |
| 243 | + |
| 244 | + |
| 245 | +def list_evaluations_setups( |
| 246 | + function: str, |
| 247 | + offset: Optional[int] = None, |
| 248 | + size: Optional[int] = None, |
| 249 | + id: Optional[List] = None, |
| 250 | + task: Optional[List] = None, |
| 251 | + setup: Optional[List] = None, |
| 252 | + flow: Optional[List] = None, |
| 253 | + uploader: Optional[List] = None, |
| 254 | + tag: Optional[str] = None, |
| 255 | + per_fold: Optional[bool] = None, |
| 256 | + sort_order: Optional[str] = None, |
| 257 | + output_format: str = 'dataframe' |
| 258 | +) -> Union[Dict, pd.DataFrame]: |
| 259 | + """ |
| 260 | + List all run-evaluation pairs matching all of the given filters. |
| 261 | + (Supports large amount of results) |
| 262 | +
|
| 263 | + Parameters |
| 264 | + ---------- |
| 265 | + function : str |
| 266 | + the evaluation function. e.g., predictive_accuracy |
| 267 | + offset : int, optional |
| 268 | + the number of runs to skip, starting from the first |
| 269 | + size : int, optional |
| 270 | + the maximum number of runs to show |
| 271 | +
|
| 272 | + id : list, optional |
| 273 | +
|
| 274 | + task : list, optional |
| 275 | +
|
| 276 | + setup: list, optional |
| 277 | +
|
| 278 | + flow : list, optional |
| 279 | +
|
| 280 | + uploader : list, optional |
| 281 | +
|
| 282 | + tag : str, optional |
| 283 | +
|
| 284 | + per_fold : bool, optional |
| 285 | +
|
| 286 | + sort_order : str, optional |
| 287 | + order of sorting evaluations, ascending ("asc") or descending ("desc") |
| 288 | +
|
| 289 | + output_format: str, optional (default='object') |
| 290 | + The parameter decides the format of the output. |
| 291 | + - If 'object' the output is a dict of OpenMLEvaluation objects |
| 292 | + - If 'dict' the output is a dict of dict |
| 293 | + - If 'dataframe' the output is a pandas DataFrame |
| 294 | +
|
| 295 | +
|
| 296 | + Returns |
| 297 | + ------- |
| 298 | + dict or dataframe |
| 299 | + """ |
| 300 | + # List evaluations |
| 301 | + evals = list_evaluations(function=function, offset=offset, size=size, id=id, task=task, setup=setup, flow=flow, |
| 302 | + uploader=uploader, tag=tag, per_fold=per_fold, sort_order=sort_order, |
| 303 | + output_format='dataframe') |
| 304 | + |
| 305 | + # List setups |
| 306 | + # Split setups in evals into chunks of N setups as list_setups does not support long lists |
| 307 | + N = 100 |
| 308 | + setup_chunks = np.split(evals['setup_id'].unique(), ((len(evals['setup_id'].unique()) - 1) // N) + 1) |
| 309 | + setups = pd.DataFrame() |
| 310 | + for setup in setup_chunks: |
| 311 | + result = openml.setups.list_setups(setup=list(setup), output_format='dataframe') |
| 312 | + result.drop('flow_id', axis=1, inplace=True) |
| 313 | + setups = pd.concat([setups, result], ignore_index=True) |
| 314 | + parameters = [] |
| 315 | + for parameter_dict in setups['parameters']: |
| 316 | + if parameter_dict is not None: |
| 317 | + parameters.append([tuple([param['parameter_name'], param['value']]) for param in parameter_dict.values()]) |
| 318 | + else: |
| 319 | + parameters.append([]) |
| 320 | + setups['parameters'] = parameters |
| 321 | + # Merge setups with evaluations |
| 322 | + df = evals.merge(setups, on='setup_id', how='left') |
| 323 | + if output_format == 'dataframe': |
| 324 | + return df |
| 325 | + else: |
| 326 | + return df.to_dict() |
0 commit comments