Merge pull request #684 from openml/update_examples

PGijsbers · web-flow · commit 79c1953c98f2 · 2019-04-23T10:28:23.000+02:00
Added notice to all examples for using the test server. Use test serv…
diff --git a/doc/conf.py b/doc/conf.py
@@ -17,12 +17,6 @@
 import sphinx_bootstrap_theme
 import openml
 
-
-# amueller's read/write key
-openml.config.server = "https://test.openml.org/api/v1/xml"
-openml.config.apikey = "610344db6388d9ba34f6db45a3cf71de"
-
-
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
diff --git a/examples/create_upload_tutorial.py b/examples/create_upload_tutorial.py
@@ -13,9 +13,12 @@
 from openml.datasets.functions import create_dataset
 
 ############################################################################
-# For this tutorial we will upload to the test server to not pollute the live
-# server with countless copies of the same dataset.
-openml.config.server = 'https://test.openml.org/api/v1/xml'
+# .. warning:: This example uploads data. For that reason, this example
+#   connects to the test server at test.openml.org. This prevents the main
+#   server from crowding with example datasets, tasks, runs, and so on.
+
+openml.config.start_using_configuration_for_example()
+############################################################################
 
 ############################################################################
 # Below we will cover the following cases of the dataset object:
@@ -309,3 +312,7 @@
 
 upload_did = xor_dataset.publish()
 print('URL for dataset: %s/data/%d' % (openml.config.server, upload_did))
+
+
+############################################################################
+openml.config.stop_using_configuration_for_example()
diff --git a/examples/datasets_tutorial.py b/examples/datasets_tutorial.py
@@ -5,7 +5,7 @@
 
 How to list and download datasets.
 """
-
+############################################################################
 import openml
 import pandas as pd
 
@@ -43,9 +43,8 @@
 # Download datasets
 # =================
 
-# This is done based on the dataset ID ('did').
-dataset = openml.datasets.get_dataset(68)
-# NOTE: Dataset 68 exists on the test server https://test.openml.org/d/68
+# This is done based on the dataset ID.
+dataset = openml.datasets.get_dataset(1471)
 
 # Print a summary
 print("This is dataset '%s', the target feature is '%s'" %
@@ -84,8 +83,7 @@
 # data file. The dataset object can be used as normal.
 # Whenever you use any functionality that requires the data,
 # such as `get_data`, the data will be downloaded.
-dataset = openml.datasets.get_dataset(68, download_data=False)
-# NOTE: Dataset 68 exists on the test server https://test.openml.org/d/68
+dataset = openml.datasets.get_dataset(1471, download_data=False)
 
 ############################################################################
 # Exercise 2
diff --git a/examples/flows_and_runs_tutorial.py b/examples/flows_and_runs_tutorial.py
@@ -14,8 +14,13 @@
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
 # Train a scikit-learn model on the data manually.
+#
+# .. warning:: This example uploads data. For that reason, this example
+#   connects to the test server at test.openml.org. This prevents the main
+#   server from crowding with example datasets, tasks, runs, and so on.
 
-# NOTE: Dataset 68 exists on the test server https://test.openml.org/d/68
+openml.config.start_using_configuration_for_example()
+# NOTE: We are using dataset 68 from the test server: https://test.openml.org/d/68
 dataset = openml.datasets.get_dataset(68)
 X, y = dataset.get_data(
     dataset_format='array',
@@ -159,3 +164,7 @@
     run = openml.runs.run_model_on_task(clf, task, avoid_duplicate_runs=False)
     myrun = run.publish()
     print("kNN on %s: http://test.openml.org/r/%d" % (data.name, myrun.run_id))
+
+
+############################################################################
+openml.config.stop_using_configuration_for_example()
diff --git a/examples/introduction_tutorial.py b/examples/introduction_tutorial.py
@@ -45,12 +45,21 @@
 #   file must be in the directory ~/.openml/config and exist prior to
 #   importing the openml module.
 # * Run the code below, replacing 'YOURKEY' with your API key.
-
+#
+# .. warning:: This example uploads data. For that reason, this example
+#   connects to the test server instead. This prevents the live server from
+#   crowding with example datasets, tasks, studies, and so on.
 ############################################################################
 import openml
 from sklearn import neighbors
 
-# Uncomment and set your OpenML key. Don't share your key with others.
+openml.config.start_using_configuration_for_example()
+
+############################################################################
+# When using the main server, instead make sure your apikey is configured.
+# This can be done with the following line of code (uncomment it!).
+# Never share your apikey with others.
+
 # openml.config.apikey = 'YOURKEY'
 
 ############################################################################
@@ -80,6 +89,9 @@
 run = openml.runs.run_model_on_task(clf, task, avoid_duplicate_runs=False)
 # Publish the experiment on OpenML (optional, requires an API key).
 # For this tutorial, our configuration publishes to the test server
-# as to not pollute the main server.
+# as to not crowd the main server with runs created by examples.
 myrun = run.publish()
 print("kNN on %s: http://test.openml.org/r/%d" % (data.name, myrun.run_id))
+
+############################################################################
+openml.config.stop_using_configuration_for_example()
diff --git a/examples/run_setup_tutorial.py b/examples/run_setup_tutorial.py
@@ -25,6 +25,9 @@
        and solve the same task again;
     3) We will verify that the obtained results are exactly the same.
 
+.. warning:: This example uploads data. For that reason, this example
+   connects to the test server at test.openml.org. This prevents the main
+   server from crowding with example datasets, tasks, runs, and so on.
 """
 import logging
 import numpy as np
@@ -36,6 +39,7 @@
 
 root = logging.getLogger()
 root.setLevel(logging.INFO)
+openml.config.start_using_configuration_for_example()
 
 ###############################################################################
 # 1) Create a flow and use it to solve a task
@@ -100,3 +104,7 @@
 # the run has stored all predictions in the field data content
 np.testing.assert_array_equal(run_original.data_content,
                               run_duplicate.data_content)
+
+###############################################################################
+
+openml.config.stop_using_configuration_for_example()
diff --git a/examples/sklearn/openml_run_example.py b/examples/sklearn/openml_run_example.py
@@ -7,6 +7,14 @@
 import openml
 from sklearn import tree, preprocessing, pipeline
 
+############################################################################
+# .. warning:: This example uploads data. For that reason, this example
+#   connects to the test server at test.openml.org. This prevents the main
+#   server from crowding with example datasets, tasks, runs, and so on.
+
+openml.config.start_using_configuration_for_example()
+############################################################################
+
 # Uncomment and set your OpenML key. Don't share your key with others.
 # openml.config.apikey = 'YOURKEY'
 
@@ -27,3 +35,6 @@
 run.publish()
 
 print('URL for run: %s/run/%d' % (openml.config.server, run.run_id))
+
+############################################################################
+openml.config.stop_using_configuration_for_example()
diff --git a/examples/tasks_tutorial.py b/examples/tasks_tutorial.py
@@ -79,7 +79,7 @@
 ############################################################################
 # Furthermore, we can list tasks based on the dataset id:
 
-tasks = openml.tasks.list_tasks(data_id=61)
+tasks = openml.tasks.list_tasks(data_id=1471)
 tasks = pd.DataFrame.from_dict(tasks, orient='index')
 print("First 5 of %s tasks:" % len(tasks))
 pprint(tasks.head())
@@ -124,7 +124,7 @@
 # single task by its ID, and one which takes a list of IDs and downloads
 # all of these tasks:
 
-task_id = 1
+task_id = 31
 task = openml.tasks.get_task(task_id)
 
 ############################################################################
@@ -135,6 +135,6 @@
 ############################################################################
 # And:
 
-ids = [1, 2, 19, 97, 403]
+ids = [2, 1891, 31, 9983]
 tasks = openml.tasks.get_tasks(ids)
 pprint(tasks[0])