Skip to content

Commit 302bbc4

Browse files
Merge branch 'develop' into maint/rework_docu
2 parents d56ece4 + bc50a88 commit 302bbc4

8 files changed

Lines changed: 184 additions & 177 deletions

File tree

openml/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@
55
# The following line *must* be the last in the module, exactly as formatted:
66
from __future__ import annotations
77

8-
__version__ = "0.15.1"
8+
__version__ = "0.16.0"

openml/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def _resolve_default_cache_dir() -> Path:
150150
"apikey": "",
151151
"server": "https://www.openml.org/api/v1/xml",
152152
"cachedir": _resolve_default_cache_dir(),
153-
"avoid_duplicate_runs": True,
153+
"avoid_duplicate_runs": False,
154154
"retry_policy": "human",
155155
"connection_n_retries": 5,
156156
"show_progress": False,

openml/runs/functions.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
def run_model_on_task( # noqa: PLR0913
6060
model: Any,
6161
task: int | str | OpenMLTask,
62-
avoid_duplicate_runs: bool = True, # noqa: FBT001, FBT002
62+
avoid_duplicate_runs: bool | None = None,
6363
flow_tags: list[str] | None = None,
6464
seed: int | None = None,
6565
add_local_measures: bool = True, # noqa: FBT001, FBT002
@@ -77,9 +77,10 @@ def run_model_on_task( # noqa: PLR0913
7777
task : OpenMLTask or int or str
7878
Task to perform or Task id.
7979
This may be a model instead if the first argument is an OpenMLTask.
80-
avoid_duplicate_runs : bool, optional (default=True)
80+
avoid_duplicate_runs : bool, optional (default=None)
8181
If True, the run will throw an error if the setup/task combination is already present on
8282
the server. This feature requires an internet connection.
83+
If not set, it will use the default from your openml configuration (False if unset).
8384
flow_tags : List[str], optional (default=None)
8485
A list of tags that the flow should have at creation.
8586
seed: int, optional (default=None)
@@ -104,6 +105,8 @@ def run_model_on_task( # noqa: PLR0913
104105
flow : OpenMLFlow (optional, only if `return_flow` is True).
105106
Flow generated from the model.
106107
"""
108+
if avoid_duplicate_runs is None:
109+
avoid_duplicate_runs = openml.config.avoid_duplicate_runs
107110
if avoid_duplicate_runs and not config.apikey:
108111
warnings.warn(
109112
"avoid_duplicate_runs is set to True, but no API key is set. "
@@ -175,7 +178,7 @@ def get_task_and_type_conversion(_task: int | str | OpenMLTask) -> OpenMLTask:
175178
def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913
176179
flow: OpenMLFlow,
177180
task: OpenMLTask,
178-
avoid_duplicate_runs: bool = True, # noqa: FBT002, FBT001
181+
avoid_duplicate_runs: bool | None = None,
179182
flow_tags: list[str] | None = None,
180183
seed: int | None = None,
181184
add_local_measures: bool = True, # noqa: FBT001, FBT002
@@ -195,9 +198,10 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913
195198
all supervised estimators of scikit learn follow this definition of a model.
196199
task : OpenMLTask
197200
Task to perform. This may be an OpenMLFlow instead if the first argument is an OpenMLTask.
198-
avoid_duplicate_runs : bool, optional (default=True)
201+
avoid_duplicate_runs : bool, optional (default=None)
199202
If True, the run will throw an error if the setup/task combination is already present on
200203
the server. This feature requires an internet connection.
204+
If not set, it will use the default from your openml configuration (False if unset).
201205
flow_tags : List[str], optional (default=None)
202206
A list of tags that the flow should have at creation.
203207
seed: int, optional (default=None)
@@ -221,6 +225,9 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913
221225
if flow_tags is not None and not isinstance(flow_tags, list):
222226
raise ValueError("flow_tags should be a list")
223227

228+
if avoid_duplicate_runs is None:
229+
avoid_duplicate_runs = openml.config.avoid_duplicate_runs
230+
224231
# TODO: At some point in the future do not allow for arguments in old order (changed 6-2018).
225232
# Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019).
226233
if isinstance(flow, OpenMLTask) and isinstance(task, OpenMLFlow):

openml/testing.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None:
101101
self.cached = True
102102
openml.config.apikey = TestBase.apikey
103103
self.production_server = "https://www.openml.org/api/v1/xml"
104-
openml.config.avoid_duplicate_runs = False
105104
openml.config.set_root_cache_directory(str(self.workdir))
106105

107106
# Increase the number of retries to avoid spurious server failures

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ test=[
8282
"openml-sklearn",
8383
"packaging",
8484
"pytest-mock",
85+
"openml-sklearn",
8586
]
8687
examples=[
8788
"matplotlib",

tests/test_openml/test_config.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,13 +175,14 @@ def test_configuration_file_not_overwritten_on_load():
175175

176176
def test_configuration_loads_booleans(tmp_path):
177177
config_file_content = "avoid_duplicate_runs=true\nshow_progress=false"
178-
with (tmp_path / "config").open("w") as config_file:
178+
tmp_file = tmp_path / "config"
179+
with tmp_file.open("w") as config_file:
179180
config_file.write(config_file_content)
180-
read_config = openml.config._parse_config(tmp_path)
181+
read_config = openml.config._parse_config(tmp_file)
181182

182183
# Explicit test to avoid truthy/falsy modes of other types
183-
assert True == read_config["avoid_duplicate_runs"]
184-
assert False == read_config["show_progress"]
184+
assert read_config["avoid_duplicate_runs"] is True
185+
assert read_config["show_progress"] is False
185186

186187

187188
def test_openml_cache_dir_env_var(tmp_path: Path) -> None:

tests/test_runs/test_run.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,6 @@ def test_to_from_filesystem_vanilla(self):
130130
model=model,
131131
task=task,
132132
add_local_measures=False,
133-
avoid_duplicate_runs=False,
134133
upload_flow=True,
135134
)
136135

@@ -174,7 +173,6 @@ def test_to_from_filesystem_search(self):
174173
model=model,
175174
task=task,
176175
add_local_measures=False,
177-
avoid_duplicate_runs=False,
178176
)
179177

180178
cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
@@ -311,7 +309,6 @@ def test_publish_with_local_loaded_flow(self):
311309
flow=flow,
312310
task=task,
313311
add_local_measures=False,
314-
avoid_duplicate_runs=False,
315312
upload_flow=False,
316313
)
317314

@@ -351,7 +348,6 @@ def test_offline_and_online_run_identical(self):
351348
flow=flow,
352349
task=task,
353350
add_local_measures=False,
354-
avoid_duplicate_runs=False,
355351
upload_flow=False,
356352
)
357353

0 commit comments

Comments
 (0)