Skip to content

Commit dafe5ac

Browse files
authored
Configuration ci (#1049)
* Add a non-functional entry point * Allow setting of API key through CLI - Add function to set any field in the configuration file - Add function to read out the configuration file - Towards full configurability from CLI * Remove autocomplete promise, use _defaults Autocomplete seems to be incompatible with `choices`, so I'll ignore that for now. We also use `config._defaults` instead of an explicit list to avoid duplication. * Add server configuration * Allow fields to be set directly non-interactively With the `openml configure FIELD VALUE` command. * Combine error and check functionalities Otherwise you have to duplicate all checks in the error message function. * Share logic about setting/collecting the value * Complete CLI for other fields. Max_retries is excluded because it should not be user configurable, and will most likely be removed. Verbosity is configurable but is currently not actually used. * Bring back sanitizing user input And extend it to the bool inputs. * Add small bit of info about the command line tool * Add API key configuration note in the introduction * Add to progress log * Refactor flow of wait_until_valid_input
1 parent 72576bd commit dafe5ac

7 files changed

Lines changed: 378 additions & 10 deletions

File tree

.flake8

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ select = C,E,F,W,B,T
55
ignore = E203, E402, W503
66
per-file-ignores =
77
*__init__.py:F401
8+
*cli.py:T001
89
exclude =
910
venv
1011
examples

doc/progress.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Changelog
1515
* DOC #1051: Document existing extensions to OpenML-Python besides the shipped scikit-learn
1616
extension.
1717
* FIX #1035: Render class attributes and methods again.
18+
* ADD #1049: Add a command line tool for configuration openml-python.
1819
* FIX #1042: Fixes a rare concurrency issue with OpenML-Python and joblib which caused the joblib
1920
worker pool to fail.
2021
* FIX #1053: Fixes a bug which could prevent importing the package in a docker container.

doc/usage.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ which are separated by newlines. The following keys are defined:
5959
* 1: info output
6060
* 2: debug output
6161

62+
This file is easily configurable by the ``openml`` command line interface.
63+
To see where the file is stored, and what its values are, use `openml configure none`.
64+
Set any field with ``openml configure FIELD`` or even all fields with just ``openml configure``.
65+
6266
~~~~~~~~~~~~
6367
Key concepts
6468
~~~~~~~~~~~~

examples/20_basic/introduction_tutorial.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,17 @@
4242
# * After logging in, open your account page (avatar on the top right)
4343
# * Open 'Account Settings', then 'API authentication' to find your API key.
4444
#
45-
# There are two ways to authenticate:
45+
# There are two ways to permanently authenticate:
4646
#
47+
# * Use the ``openml`` CLI tool with ``openml configure apikey MYKEY``,
48+
# replacing **MYKEY** with your API key.
4749
# * Create a plain text file **~/.openml/config** with the line
4850
# **'apikey=MYKEY'**, replacing **MYKEY** with your API key. The config
4951
# file must be in the directory ~/.openml/config and exist prior to
5052
# importing the openml module.
51-
# * Run the code below, replacing 'YOURKEY' with your API key.
53+
#
54+
# Alternatively, by running the code below and replacing 'YOURKEY' with your API key,
55+
# you authenticate for the duration of the python process.
5256
#
5357
# .. warning:: This example uploads data. For that reason, this example
5458
# connects to the test server instead. This prevents the live server from

openml/cli.py

Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,331 @@
1+
"""" Command Line Interface for `openml` to configure its settings. """
2+
3+
import argparse
4+
import os
5+
import pathlib
6+
import string
7+
from typing import Union, Callable
8+
from urllib.parse import urlparse
9+
10+
11+
from openml import config
12+
13+
14+
def is_hex(string_: str) -> bool:
15+
return all(c in string.hexdigits for c in string_)
16+
17+
18+
def looks_like_url(url: str) -> bool:
19+
# There's no thorough url parser, but we only seem to use netloc.
20+
try:
21+
return bool(urlparse(url).netloc)
22+
except Exception:
23+
return False
24+
25+
26+
def wait_until_valid_input(
27+
prompt: str, check: Callable[[str], str], sanitize: Union[Callable[[str], str], None]
28+
) -> str:
29+
""" Asks `prompt` until an input is received which returns True for `check`.
30+
31+
Parameters
32+
----------
33+
prompt: str
34+
message to display
35+
check: Callable[[str], str]
36+
function to call with the given input, that provides an error message if the input is not
37+
valid otherwise, and False-like otherwise.
38+
sanitize: Callable[[str], str], optional
39+
A function which attempts to sanitize the user input (e.g. auto-complete).
40+
41+
Returns
42+
-------
43+
valid input
44+
45+
"""
46+
47+
while True:
48+
response = input(prompt)
49+
if sanitize:
50+
response = sanitize(response)
51+
error_message = check(response)
52+
if error_message:
53+
print(error_message, end="\n\n")
54+
else:
55+
return response
56+
57+
58+
def print_configuration():
59+
file = config.determine_config_file_path()
60+
header = f"File '{file}' contains (or defaults to):"
61+
print(header)
62+
63+
max_key_length = max(map(len, config.get_config_as_dict()))
64+
for field, value in config.get_config_as_dict().items():
65+
print(f"{field.ljust(max_key_length)}: {value}")
66+
67+
68+
def verbose_set(field, value):
69+
config.set_field_in_config_file(field, value)
70+
print(f"{field} set to '{value}'.")
71+
72+
73+
def configure_apikey(value: str) -> None:
74+
def check_apikey(apikey: str) -> str:
75+
if len(apikey) != 32:
76+
return f"The key should contain 32 characters but contains {len(apikey)}."
77+
if not is_hex(apikey):
78+
return "Some characters are not hexadecimal."
79+
return ""
80+
81+
instructions = (
82+
f"Your current API key is set to: '{config.apikey}'. "
83+
"You can get an API key at https://new.openml.org. "
84+
"You must create an account if you don't have one yet:\n"
85+
" 1. Log in with the account.\n"
86+
" 2. Navigate to the profile page (top right circle > Your Profile). \n"
87+
" 3. Click the API Key button to reach the page with your API key.\n"
88+
"If you have any difficulty following these instructions, let us know on Github."
89+
)
90+
91+
configure_field(
92+
field="apikey",
93+
value=value,
94+
check_with_message=check_apikey,
95+
intro_message=instructions,
96+
input_message="Please enter your API key:",
97+
)
98+
99+
100+
def configure_server(value: str) -> None:
101+
def check_server(server: str) -> str:
102+
is_shorthand = server in ["test", "production"]
103+
if is_shorthand or looks_like_url(server):
104+
return ""
105+
return "Must be 'test', 'production' or a url."
106+
107+
def replace_shorthand(server: str) -> str:
108+
if server == "test":
109+
return "https://test.openml.org/api/v1/xml"
110+
if server == "production":
111+
return "https://www.openml.org/api/v1/xml"
112+
return server
113+
114+
configure_field(
115+
field="server",
116+
value=value,
117+
check_with_message=check_server,
118+
intro_message="Specify which server you wish to connect to.",
119+
input_message="Specify a url or use 'test' or 'production' as a shorthand: ",
120+
sanitize=replace_shorthand,
121+
)
122+
123+
124+
def configure_cachedir(value: str) -> None:
125+
def check_cache_dir(path: str) -> str:
126+
p = pathlib.Path(path)
127+
if p.is_file():
128+
return f"'{path}' is a file, not a directory."
129+
expanded = p.expanduser()
130+
if not expanded.is_absolute():
131+
return f"'{path}' is not absolute (even after expanding '~')."
132+
if not expanded.exists():
133+
try:
134+
os.mkdir(expanded)
135+
except PermissionError:
136+
return f"'{path}' does not exist and there are not enough permissions to create it."
137+
return ""
138+
139+
configure_field(
140+
field="cachedir",
141+
value=value,
142+
check_with_message=check_cache_dir,
143+
intro_message="Configuring the cache directory. It can not be a relative path.",
144+
input_message="Specify the directory to use (or create) as cache directory: ",
145+
)
146+
print("NOTE: Data from your old cache directory is not moved over.")
147+
148+
149+
def configure_connection_n_retries(value: str) -> None:
150+
def valid_connection_retries(n: str) -> str:
151+
if not n.isdigit():
152+
return f"Must be an integer number (smaller than {config.max_retries})."
153+
if int(n) > config.max_retries:
154+
return f"connection_n_retries may not exceed {config.max_retries}."
155+
if int(n) == 0:
156+
return "connection_n_retries must be non-zero."
157+
return ""
158+
159+
configure_field(
160+
field="connection_n_retries",
161+
value=value,
162+
check_with_message=valid_connection_retries,
163+
intro_message="Configuring the number of times to attempt to connect to the OpenML Server",
164+
input_message=f"Enter an integer between 0 and {config.max_retries}: ",
165+
)
166+
167+
168+
def configure_avoid_duplicate_runs(value: str) -> None:
169+
def is_python_bool(bool_: str) -> str:
170+
if bool_ in ["True", "False"]:
171+
return ""
172+
return "Must be 'True' or 'False' (mind the capital)."
173+
174+
def autocomplete_bool(bool_: str) -> str:
175+
if bool_.lower() in ["n", "no", "f", "false", "0"]:
176+
return "False"
177+
if bool_.lower() in ["y", "yes", "t", "true", "1"]:
178+
return "True"
179+
return bool_
180+
181+
intro_message = (
182+
"If set to True, when `run_flow_on_task` or similar methods are called a lookup is "
183+
"performed to see if there already exists such a run on the server. "
184+
"If so, download those results instead. "
185+
"If set to False, runs will always be executed."
186+
)
187+
188+
configure_field(
189+
field="avoid_duplicate_runs",
190+
value=value,
191+
check_with_message=is_python_bool,
192+
intro_message=intro_message,
193+
input_message="Enter 'True' or 'False': ",
194+
sanitize=autocomplete_bool,
195+
)
196+
197+
198+
def configure_verbosity(value: str) -> None:
199+
def is_zero_through_two(verbosity: str) -> str:
200+
if verbosity in ["0", "1", "2"]:
201+
return ""
202+
return "Must be '0', '1' or '2'."
203+
204+
intro_message = (
205+
"Set the verbosity of log messages which should be shown by openml-python."
206+
" 0: normal output (warnings and errors)"
207+
" 1: info output (some high-level progress output)"
208+
" 2: debug output (detailed information (for developers))"
209+
)
210+
211+
configure_field(
212+
field="verbosity",
213+
value=value,
214+
check_with_message=is_zero_through_two,
215+
intro_message=intro_message,
216+
input_message="Enter '0', '1' or '2': ",
217+
)
218+
219+
220+
def configure_field(
221+
field: str,
222+
value: Union[None, str],
223+
check_with_message: Callable[[str], str],
224+
intro_message: str,
225+
input_message: str,
226+
sanitize: Union[Callable[[str], str], None] = None,
227+
) -> None:
228+
""" Configure `field` with `value`. If `value` is None ask the user for input.
229+
230+
`value` and user input are first corrected/auto-completed with `convert_value` if provided,
231+
then validated with `check_with_message` function.
232+
If the user input a wrong value in interactive mode, the user gets to input a new value.
233+
The new valid value is saved in the openml configuration file.
234+
In case an invalid `value` is supplied directly (non-interactive), no changes are made.
235+
236+
Parameters
237+
----------
238+
field: str
239+
Field to set.
240+
value: str, None
241+
Value to field to. If `None` will ask user for input.
242+
check_with_message: Callable[[str], str]
243+
Function which validates `value` or user input, and returns either an error message if it
244+
is invalid, or a False-like value if `value` is valid.
245+
intro_message: str
246+
Message that is printed once if user input is requested (e.g. instructions).
247+
input_message: str
248+
Message that comes with the input prompt.
249+
sanitize: Union[Callable[[str], str], None]
250+
A function to convert user input to 'more acceptable' input, e.g. for auto-complete.
251+
If no correction of user input is possible, return the original value.
252+
If no function is provided, don't attempt to correct/auto-complete input.
253+
"""
254+
if value is not None:
255+
if sanitize:
256+
value = sanitize(value)
257+
malformed_input = check_with_message(value)
258+
if malformed_input:
259+
print(malformed_input)
260+
quit()
261+
else:
262+
print(intro_message)
263+
value = wait_until_valid_input(
264+
prompt=input_message, check=check_with_message, sanitize=sanitize,
265+
)
266+
verbose_set(field, value)
267+
268+
269+
def configure(args: argparse.Namespace):
270+
""" Calls the right submenu(s) to edit `args.field` in the configuration file. """
271+
set_functions = {
272+
"apikey": configure_apikey,
273+
"server": configure_server,
274+
"cachedir": configure_cachedir,
275+
"connection_n_retries": configure_connection_n_retries,
276+
"avoid_duplicate_runs": configure_avoid_duplicate_runs,
277+
"verbosity": configure_verbosity,
278+
}
279+
280+
def not_supported_yet(_):
281+
print(f"Setting '{args.field}' is not supported yet.")
282+
283+
if args.field not in ["all", "none"]:
284+
set_functions.get(args.field, not_supported_yet)(args.value)
285+
else:
286+
if args.value is not None:
287+
print(f"Can not set value ('{args.value}') when field is specified as '{args.field}'.")
288+
quit()
289+
print_configuration()
290+
291+
if args.field == "all":
292+
for set_field_function in set_functions.values():
293+
print() # Visually separating the output by field.
294+
set_field_function(args.value)
295+
296+
297+
def main() -> None:
298+
subroutines = {"configure": configure}
299+
300+
parser = argparse.ArgumentParser()
301+
subparsers = parser.add_subparsers(dest="subroutine")
302+
303+
parser_configure = subparsers.add_parser(
304+
"configure",
305+
description="Set or read variables in your configuration file. For more help also see "
306+
"'https://openml.github.io/openml-python/master/usage.html#configuration'.",
307+
)
308+
309+
configurable_fields = [f for f in config._defaults if f not in ["max_retries"]]
310+
311+
parser_configure.add_argument(
312+
"field",
313+
type=str,
314+
choices=[*configurable_fields, "all", "none"],
315+
default="all",
316+
nargs="?",
317+
help="The field you wish to edit. "
318+
"Choosing 'all' lets you configure all fields one by one. "
319+
"Choosing 'none' will print out the current configuration.",
320+
)
321+
322+
parser_configure.add_argument(
323+
"value", type=str, default=None, nargs="?", help="The value to set the FIELD to.",
324+
)
325+
326+
args = parser.parse_args()
327+
subroutines.get(args.subroutine, lambda _: parser.print_help())(args)
328+
329+
330+
if __name__ == "__main__":
331+
main()

0 commit comments

Comments
 (0)