33from typing import cast , Dict , List , Optional , Union
44import warnings
55
6- import dateutil .parser
76import xmltodict
87import pandas as pd
98
@@ -94,7 +93,6 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
9493 description = result_dict ["oml:description" ]
9594 status = result_dict ["oml:status" ]
9695 creation_date = result_dict ["oml:creation_date" ]
97- creation_date_as_date = dateutil .parser .parse (creation_date )
9896 creator = result_dict ["oml:creator" ]
9997
10098 # tags is legacy. remove once no longer needed.
@@ -106,35 +104,18 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
106104 current_tag ["window_start" ] = tag ["oml:window_start" ]
107105 tags .append (current_tag )
108106
109- if "oml:data" in result_dict :
110- datasets = [int (x ) for x in result_dict ["oml:data" ]["oml:data_id" ]]
111- else :
112- raise ValueError ("No datasets attached to study {}!" .format (id_ ))
113- if "oml:tasks" in result_dict :
114- tasks = [int (x ) for x in result_dict ["oml:tasks" ]["oml:task_id" ]]
115- else :
116- raise ValueError ("No tasks attached to study {}!" .format (id_ ))
107+ def get_nested_ids_from_result_dict (key : str , subkey : str ) -> Optional [List ]:
108+ if result_dict .get (key ) is not None :
109+ return [int (oml_id ) for oml_id in result_dict [key ][subkey ]]
110+ return None
117111
118- if main_entity_type in ["runs" , "run" ]:
112+ datasets = get_nested_ids_from_result_dict ("oml:data" , "oml:data_id" )
113+ tasks = get_nested_ids_from_result_dict ("oml:tasks" , "oml:task_id" )
119114
120- if "oml:flows" in result_dict :
121- flows = [int (x ) for x in result_dict ["oml:flows" ]["oml:flow_id" ]]
122- else :
123- raise ValueError ("No flows attached to study {}!" .format (id_ ))
124- if "oml:setups" in result_dict :
125- setups = [int (x ) for x in result_dict ["oml:setups" ]["oml:setup_id" ]]
126- else :
127- raise ValueError ("No setups attached to study {}!" .format (id_ ))
128- if "oml:runs" in result_dict :
129- runs = [
130- int (x ) for x in result_dict ["oml:runs" ]["oml:run_id" ]
131- ] # type: Optional[List[int]]
132- else :
133- if creation_date_as_date < dateutil .parser .parse ("2019-01-01" ):
134- # Legacy studies did not require runs
135- runs = None
136- else :
137- raise ValueError ("No runs attached to study {}!" .format (id_ ))
115+ if main_entity_type in ["runs" , "run" ]:
116+ flows = get_nested_ids_from_result_dict ("oml:flows" , "oml:flow_id" )
117+ setups = get_nested_ids_from_result_dict ("oml:setups" , "oml:setup_id" )
118+ runs = get_nested_ids_from_result_dict ("oml:runs" , "oml:run_id" )
138119
139120 study = OpenMLStudy (
140121 study_id = study_id ,
@@ -177,9 +158,9 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
177158def create_study (
178159 name : str ,
179160 description : str ,
180- run_ids : List [int ],
181- alias : Optional [str ],
182- benchmark_suite : Optional [int ],
161+ run_ids : Optional [ List [int ]] = None ,
162+ alias : Optional [str ] = None ,
163+ benchmark_suite : Optional [int ] = None ,
183164) -> OpenMLStudy :
184165 """
185166 Creates an OpenML study (collection of data, tasks, flows, setups and run),
@@ -188,16 +169,19 @@ def create_study(
188169
189170 Parameters
190171 ----------
191- alias : str (optional)
192- a string ID, unique on server (url-friendly)
193172 benchmark_suite : int (optional)
194173 the benchmark suite (another study) upon which this study is ran.
195174 name : str
196175 the name of the study (meta-info)
197176 description : str
198177 brief description (meta-info)
199- run_ids : list
200- a list of run ids associated with this study
178+ run_ids : list, optional
179+ a list of run ids associated with this study,
180+ these can also be added later with ``attach_to_study``.
181+ alias : str (optional)
182+ a string ID, unique on server (url-friendly)
183+ benchmark_suite: int (optional)
184+ the ID of the suite for which this study contains run results
201185
202186 Returns
203187 -------
@@ -217,28 +201,29 @@ def create_study(
217201 data = None ,
218202 tasks = None ,
219203 flows = None ,
220- runs = run_ids ,
204+ runs = run_ids if run_ids != [] else None ,
221205 setups = None ,
222206 )
223207
224208
225209def create_benchmark_suite (
226- name : str , description : str , task_ids : List [int ], alias : Optional [str ],
210+ name : str , description : str , task_ids : List [int ], alias : Optional [str ] = None ,
227211) -> OpenMLBenchmarkSuite :
228212 """
229213 Creates an OpenML benchmark suite (collection of entity types, where
230214 the tasks are the linked entity)
231215
232216 Parameters
233217 ----------
234- alias : str (optional)
235- a string ID, unique on server (url-friendly)
236218 name : str
237219 the name of the study (meta-info)
238220 description : str
239221 brief description (meta-info)
240222 task_ids : list
241223 a list of task ids associated with this study
224+ more can be added later with ``attach_to_suite``.
225+ alias : str (optional)
226+ a string ID, unique on server (url-friendly)
242227
243228 Returns
244229 -------
0 commit comments