doc: make all examples use names instead of IDs as reference. (#1367)

LennartPurucker · ArlindKadra · web-flow · commit aa0aca021d53 · 2024-10-16T11:29:24.000+02:00
Co-authored-by: ArlindKadra &lt;ArlindKadra@users.noreply.github.com&gt;
diff --git a/examples/20_basic/simple_datasets_tutorial.py b/examples/20_basic/simple_datasets_tutorial.py
@@ -27,7 +27,7 @@
 # ==================
 
 # Iris dataset https://www.openml.org/d/61
-dataset = openml.datasets.get_dataset(61)
+dataset = openml.datasets.get_dataset(dataset_id="iris", version=1)
 
 # Print a summary
 print(
diff --git a/examples/20_basic/simple_flows_and_runs_tutorial.py b/examples/20_basic/simple_flows_and_runs_tutorial.py
@@ -20,8 +20,8 @@
 # Train a machine learning model
 # ==============================
 
-# NOTE: We are using dataset 20 from the test server: https://test.openml.org/d/20
-dataset = openml.datasets.get_dataset(20)
+# NOTE: We are using dataset "diabetes" from the test server: https://test.openml.org/d/20
+dataset = openml.datasets.get_dataset(dataset_id="diabetes", version=1)
 X, y, categorical_indicator, attribute_names = dataset.get_data(
     target=dataset.default_target_attribute
 )
diff --git a/examples/20_basic/simple_suites_tutorial.py b/examples/20_basic/simple_suites_tutorial.py
@@ -39,7 +39,9 @@
 # Downloading benchmark suites
 # ============================
 
-suite = openml.study.get_suite(99)
+# OpenML Benchmarking Suites and the OpenML-CC18
+# https://www.openml.org/s/99
+suite = openml.study.get_suite("OpenML-CC18")
 print(suite)
 
 ####################################################################################################
diff --git a/examples/30_extended/configure_logging.py b/examples/30_extended/configure_logging.py
@@ -24,7 +24,7 @@
 
 import openml
 
-openml.datasets.get_dataset("iris")
+openml.datasets.get_dataset("iris", version=1)
 
 # With default configuration, the above example will show no output to console.
 # However, in your cache directory you should find a file named 'openml_python.log',
@@ -39,7 +39,7 @@
 
 openml.config.set_console_log_level(logging.DEBUG)
 openml.config.set_file_log_level(logging.WARNING)
-openml.datasets.get_dataset("iris")
+openml.datasets.get_dataset("iris", version=1)
 
 # Now the log level that was previously written to file should also be shown in the console.
 # The message is now no longer written to file as the `file_log` was set to level `WARNING`.
diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py
@@ -51,7 +51,7 @@
 # =================
 
 # This is done based on the dataset ID.
-dataset = openml.datasets.get_dataset(1471)
+dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1)
 
 # Print a summary
 print(
@@ -87,8 +87,7 @@
 # Starting from 0.15, not downloading data will be the default behavior instead.
 # The data will be downloading automatically when you try to access it through
 # openml objects, e.g., using `dataset.features`.
-dataset = openml.datasets.get_dataset(1471, download_data=False)
-
+dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1, download_data=False)
 ############################################################################
 # Exercise 2
 # **********
diff --git a/examples/30_extended/flows_and_runs_tutorial.py b/examples/30_extended/flows_and_runs_tutorial.py
@@ -25,7 +25,7 @@
 # Train a scikit-learn model on the data manually.
 
 # NOTE: We are using dataset 68 from the test server: https://test.openml.org/d/68
-dataset = openml.datasets.get_dataset(68)
+dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1)
 X, y, categorical_indicator, attribute_names = dataset.get_data(
     target=dataset.default_target_attribute
 )
@@ -36,7 +36,7 @@
 # You can also ask for meta-data to automatically preprocess the data.
 #
 # * e.g. categorical features -> do feature encoding
-dataset = openml.datasets.get_dataset(17)
+dataset = openml.datasets.get_dataset(dataset_id="credit-g", version=1)
 X, y, categorical_indicator, attribute_names = dataset.get_data(
     target=dataset.default_target_attribute
 )
diff --git a/examples/30_extended/study_tutorial.py b/examples/30_extended/study_tutorial.py
@@ -79,7 +79,8 @@
 tasks = [115, 259, 307]
 
 # To verify
-suite = openml.study.get_suite(1)
+# https://test.openml.org/api/v1/study/1
+suite = openml.study.get_suite("OpenML100")
 print(all([t_id in suite.tasks for t_id in tasks]))
 
 run_ids = []
diff --git a/examples/30_extended/suites_tutorial.py b/examples/30_extended/suites_tutorial.py
@@ -37,7 +37,8 @@
 
 ############################################################################
 # This is done based on the dataset ID.
-suite = openml.study.get_suite(99)
+# https://www.openml.org/api/v1/study/99
+suite = openml.study.get_suite("OpenML-CC18")
 print(suite)
 
 ############################################################################
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
@@ -477,7 +477,7 @@ def get_dataset(  # noqa: C901, PLR0912
     Parameters
     ----------
     dataset_id : int or str
-        Dataset ID of the dataset to download
+        The ID or name of the dataset to download.
     download_data : bool (default=False)
         If True, also download the data file. Beware that some datasets are large and it might
         make the operation noticeably slower. Metadata is also still retrieved.

Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@`
`25`	`25`	`# Train a scikit-learn model on the data manually.`
`26`	`26`
`27`	`27`	`# NOTE: We are using dataset 68 from the test server: https://test.openml.org/d/68`
`28`		`-dataset = openml.datasets.get_dataset(68)`
	`28`	`+dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1)`
`29`	`29`	`X, y, categorical_indicator, attribute_names = dataset.get_data(`
`30`	`30`	`target=dataset.default_target_attribute`
`31`	`31`	`)`
`@@ -36,7 +36,7 @@`
`36`	`36`	`# You can also ask for meta-data to automatically preprocess the data.`
`37`	`37`	`#`
`38`	`38`	`# * e.g. categorical features -> do feature encoding`
`39`		`-dataset = openml.datasets.get_dataset(17)`
	`39`	`+dataset = openml.datasets.get_dataset(dataset_id="credit-g", version=1)`
`40`	`40`	`X, y, categorical_indicator, attribute_names = dataset.get_data(`
`41`	`41`	`target=dataset.default_target_attribute`
`42`	`42`	`)`