Skip to content

Commit aa0aca0

Browse files
doc: make all examples use names instead of IDs as reference. (#1367)
Co-authored-by: ArlindKadra <ArlindKadra@users.noreply.github.com>
1 parent 8261a87 commit aa0aca0

9 files changed

Lines changed: 17 additions & 14 deletions

examples/20_basic/simple_datasets_tutorial.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
# ==================
2828

2929
# Iris dataset https://www.openml.org/d/61
30-
dataset = openml.datasets.get_dataset(61)
30+
dataset = openml.datasets.get_dataset(dataset_id="iris", version=1)
3131

3232
# Print a summary
3333
print(

examples/20_basic/simple_flows_and_runs_tutorial.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
# Train a machine learning model
2121
# ==============================
2222

23-
# NOTE: We are using dataset 20 from the test server: https://test.openml.org/d/20
24-
dataset = openml.datasets.get_dataset(20)
23+
# NOTE: We are using dataset "diabetes" from the test server: https://test.openml.org/d/20
24+
dataset = openml.datasets.get_dataset(dataset_id="diabetes", version=1)
2525
X, y, categorical_indicator, attribute_names = dataset.get_data(
2626
target=dataset.default_target_attribute
2727
)

examples/20_basic/simple_suites_tutorial.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@
3939
# Downloading benchmark suites
4040
# ============================
4141

42-
suite = openml.study.get_suite(99)
42+
# OpenML Benchmarking Suites and the OpenML-CC18
43+
# https://www.openml.org/s/99
44+
suite = openml.study.get_suite("OpenML-CC18")
4345
print(suite)
4446

4547
####################################################################################################

examples/30_extended/configure_logging.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
import openml
2626

27-
openml.datasets.get_dataset("iris")
27+
openml.datasets.get_dataset("iris", version=1)
2828

2929
# With default configuration, the above example will show no output to console.
3030
# However, in your cache directory you should find a file named 'openml_python.log',
@@ -39,7 +39,7 @@
3939

4040
openml.config.set_console_log_level(logging.DEBUG)
4141
openml.config.set_file_log_level(logging.WARNING)
42-
openml.datasets.get_dataset("iris")
42+
openml.datasets.get_dataset("iris", version=1)
4343

4444
# Now the log level that was previously written to file should also be shown in the console.
4545
# The message is now no longer written to file as the `file_log` was set to level `WARNING`.

examples/30_extended/datasets_tutorial.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
# =================
5252

5353
# This is done based on the dataset ID.
54-
dataset = openml.datasets.get_dataset(1471)
54+
dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1)
5555

5656
# Print a summary
5757
print(
@@ -87,8 +87,7 @@
8787
# Starting from 0.15, not downloading data will be the default behavior instead.
8888
# The data will be downloading automatically when you try to access it through
8989
# openml objects, e.g., using `dataset.features`.
90-
dataset = openml.datasets.get_dataset(1471, download_data=False)
91-
90+
dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1, download_data=False)
9291
############################################################################
9392
# Exercise 2
9493
# **********

examples/30_extended/flows_and_runs_tutorial.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
# Train a scikit-learn model on the data manually.
2626

2727
# NOTE: We are using dataset 68 from the test server: https://test.openml.org/d/68
28-
dataset = openml.datasets.get_dataset(68)
28+
dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1)
2929
X, y, categorical_indicator, attribute_names = dataset.get_data(
3030
target=dataset.default_target_attribute
3131
)
@@ -36,7 +36,7 @@
3636
# You can also ask for meta-data to automatically preprocess the data.
3737
#
3838
# * e.g. categorical features -> do feature encoding
39-
dataset = openml.datasets.get_dataset(17)
39+
dataset = openml.datasets.get_dataset(dataset_id="credit-g", version=1)
4040
X, y, categorical_indicator, attribute_names = dataset.get_data(
4141
target=dataset.default_target_attribute
4242
)

examples/30_extended/study_tutorial.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@
7979
tasks = [115, 259, 307]
8080

8181
# To verify
82-
suite = openml.study.get_suite(1)
82+
# https://test.openml.org/api/v1/study/1
83+
suite = openml.study.get_suite("OpenML100")
8384
print(all([t_id in suite.tasks for t_id in tasks]))
8485

8586
run_ids = []

examples/30_extended/suites_tutorial.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@
3737

3838
############################################################################
3939
# This is done based on the dataset ID.
40-
suite = openml.study.get_suite(99)
40+
# https://www.openml.org/api/v1/study/99
41+
suite = openml.study.get_suite("OpenML-CC18")
4142
print(suite)
4243

4344
############################################################################

openml/datasets/functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ def get_dataset( # noqa: C901, PLR0912
477477
Parameters
478478
----------
479479
dataset_id : int or str
480-
Dataset ID of the dataset to download
480+
The ID or name of the dataset to download.
481481
download_data : bool (default=False)
482482
If True, also download the data file. Beware that some datasets are large and it might
483483
make the operation noticeably slower. Metadata is also still retrieved.

0 commit comments

Comments
 (0)