Skip to content

Commit c6fab8e

Browse files
authored
pre-commit update (#1150)
* Update to latest versions * Updated Black formatting Black was bumped from 19.10b0 to 22.6.0. Changes in the files are reduced to: - No whitespace at the start and end of a docstring. - All comma separated "lists" (for example in function calls) are now one item per line, regardless if they would fit on one line. * Update error code for "print" Changed in flake8-print 5.0.0: https://pypi.org/project/flake8-print/ * Shorten comment to observe line length codestyle * Install stubs for requests for mypy * Add dependency for mypy dateutil type stubs * Resolve mypy warnings * Add update pre-commit dependencies notice
1 parent c911d6d commit c6fab8e

51 files changed

Lines changed: 659 additions & 299 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.flake8

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ select = C,E,F,W,B,T
55
ignore = E203, E402, W503
66
per-file-ignores =
77
*__init__.py:F401
8-
*cli.py:T001
8+
*cli.py:T201
99
exclude =
1010
venv
1111
examples

.pre-commit-config.yaml

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,34 @@
11
repos:
22
- repo: https://github.com/psf/black
3-
rev: 19.10b0
3+
rev: 22.6.0
44
hooks:
55
- id: black
66
args: [--line-length=100]
77
- repo: https://github.com/pre-commit/mirrors-mypy
8-
rev: v0.761
8+
rev: v0.961
99
hooks:
1010
- id: mypy
1111
name: mypy openml
1212
files: openml/.*
13+
additional_dependencies:
14+
- types-requests
15+
- types-python-dateutil
1316
- id: mypy
1417
name: mypy tests
1518
files: tests/.*
19+
additional_dependencies:
20+
- types-requests
21+
- types-python-dateutil
1622
- repo: https://gitlab.com/pycqa/flake8
17-
rev: 3.8.3
23+
rev: 4.0.1
1824
hooks:
1925
- id: flake8
2026
name: flake8 openml
2127
files: openml/.*
2228
additional_dependencies:
23-
- flake8-print==3.1.4
29+
- flake8-print==5.0.0
2430
- id: flake8
2531
name: flake8 tests
2632
files: tests/.*
2733
additional_dependencies:
28-
- flake8-print==3.1.4
34+
- flake8-print==5.0.0

doc/progress.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ Changelog
1212
* FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
1313
* FIX#1147: ``openml.flow.flow_exists`` no longer requires an API key.
1414
* MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.
15+
* MAIN#1146: Update the pre-commit dependencies.
1516
* ADD#1103: Add a ``predictions`` property to OpenMLRun for easy accessibility of prediction data.
1617

1718

examples/30_extended/custom_flow_.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,9 @@
8585
# but that does not matter for this demonstration.
8686

8787
autosklearn_flow = openml.flows.get_flow(9313) # auto-sklearn 0.5.1
88-
subflow = dict(components=OrderedDict(automl_tool=autosklearn_flow),)
88+
subflow = dict(
89+
components=OrderedDict(automl_tool=autosklearn_flow),
90+
)
8991

9092
####################################################################################################
9193
# With all parameters of the flow defined, we can now initialize the OpenMLFlow and publish.
@@ -98,7 +100,10 @@
98100
# the model of the flow to `None`.
99101

100102
autosklearn_amlb_flow = openml.flows.OpenMLFlow(
101-
**general, **flow_hyperparameters, **subflow, model=None,
103+
**general,
104+
**flow_hyperparameters,
105+
**subflow,
106+
model=None,
102107
)
103108
autosklearn_amlb_flow.publish()
104109
print(f"autosklearn flow created: {autosklearn_amlb_flow.flow_id}")

examples/30_extended/fetch_runtimes_tutorial.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,10 @@
7272
n_repeats, n_folds, n_samples = task.get_split_dimensions()
7373
print(
7474
"Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
75-
task_id, n_repeats, n_folds, n_samples,
75+
task_id,
76+
n_repeats,
77+
n_folds,
78+
n_samples,
7679
)
7780
)
7881

@@ -97,7 +100,10 @@ def print_compare_runtimes(measures):
97100
clf = RandomForestClassifier(n_estimators=10)
98101

99102
run1 = openml.runs.run_model_on_task(
100-
model=clf, task=task, upload_flow=False, avoid_duplicate_runs=False,
103+
model=clf,
104+
task=task,
105+
upload_flow=False,
106+
avoid_duplicate_runs=False,
101107
)
102108
measures = run1.fold_evaluations
103109

examples/30_extended/flows_and_runs_tutorial.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,11 @@
176176

177177
# The following lines can then be executed offline:
178178
run = openml.runs.run_model_on_task(
179-
pipe, task, avoid_duplicate_runs=False, upload_flow=False, dataset_format="array",
179+
pipe,
180+
task,
181+
avoid_duplicate_runs=False,
182+
upload_flow=False,
183+
dataset_format="array",
180184
)
181185

182186
# The run may be stored offline, and the flow will be stored along with it:

examples/30_extended/run_setup_tutorial.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,18 @@
5757
# easy as you want it to be
5858

5959

60-
cat_imp = make_pipeline(OneHotEncoder(handle_unknown="ignore", sparse=False), TruncatedSVD(),)
60+
cat_imp = make_pipeline(
61+
OneHotEncoder(handle_unknown="ignore", sparse=False),
62+
TruncatedSVD(),
63+
)
6164
cont_imp = SimpleImputer(strategy="median")
6265
ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
63-
model_original = Pipeline(steps=[("transform", ct), ("estimator", RandomForestClassifier()),])
66+
model_original = Pipeline(
67+
steps=[
68+
("transform", ct),
69+
("estimator", RandomForestClassifier()),
70+
]
71+
)
6472

6573
# Let's change some hyperparameters. Of course, in any good application we
6674
# would tune them using, e.g., Random Search or Bayesian Optimization, but for

examples/30_extended/study_tutorial.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,9 @@
5151
# And we can use the evaluation listing functionality to learn more about
5252
# the evaluations available for the conducted runs:
5353
evaluations = openml.evaluations.list_evaluations(
54-
function="predictive_accuracy", output_format="dataframe", study=study.study_id,
54+
function="predictive_accuracy",
55+
output_format="dataframe",
56+
study=study.study_id,
5557
)
5658
print(evaluations.head())
5759

examples/30_extended/task_manual_iteration_tutorial.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,10 @@
4444

4545
print(
4646
"Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
47-
task_id, n_repeats, n_folds, n_samples,
47+
task_id,
48+
n_repeats,
49+
n_folds,
50+
n_samples,
4851
)
4952
)
5053

@@ -53,7 +56,11 @@
5356
# samples (indexing is zero-based). Usually, one would loop over all repeats, folds and sample
5457
# sizes, but we can neglect this here as there is only a single repetition.
5558

56-
train_indices, test_indices = task.get_train_test_split_indices(repeat=0, fold=0, sample=0,)
59+
train_indices, test_indices = task.get_train_test_split_indices(
60+
repeat=0,
61+
fold=0,
62+
sample=0,
63+
)
5764

5865
print(train_indices.shape, train_indices.dtype)
5966
print(test_indices.shape, test_indices.dtype)
@@ -69,7 +76,10 @@
6976

7077
print(
7178
"X_train.shape: {}, y_train.shape: {}, X_test.shape: {}, y_test.shape: {}".format(
72-
X_train.shape, y_train.shape, X_test.shape, y_test.shape,
79+
X_train.shape,
80+
y_train.shape,
81+
X_test.shape,
82+
y_test.shape,
7383
)
7484
)
7585

@@ -82,7 +92,10 @@
8292
n_repeats, n_folds, n_samples = task.get_split_dimensions()
8393
print(
8494
"Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
85-
task_id, n_repeats, n_folds, n_samples,
95+
task_id,
96+
n_repeats,
97+
n_folds,
98+
n_samples,
8699
)
87100
)
88101

@@ -92,7 +105,9 @@
92105
for fold_idx in range(n_folds):
93106
for sample_idx in range(n_samples):
94107
train_indices, test_indices = task.get_train_test_split_indices(
95-
repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
108+
repeat=repeat_idx,
109+
fold=fold_idx,
110+
sample=sample_idx,
96111
)
97112
X_train = X.iloc[train_indices]
98113
y_train = y.iloc[train_indices]
@@ -121,7 +136,10 @@
121136
n_repeats, n_folds, n_samples = task.get_split_dimensions()
122137
print(
123138
"Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
124-
task_id, n_repeats, n_folds, n_samples,
139+
task_id,
140+
n_repeats,
141+
n_folds,
142+
n_samples,
125143
)
126144
)
127145

@@ -131,7 +149,9 @@
131149
for fold_idx in range(n_folds):
132150
for sample_idx in range(n_samples):
133151
train_indices, test_indices = task.get_train_test_split_indices(
134-
repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
152+
repeat=repeat_idx,
153+
fold=fold_idx,
154+
sample=sample_idx,
135155
)
136156
X_train = X.iloc[train_indices]
137157
y_train = y.iloc[train_indices]
@@ -160,7 +180,10 @@
160180
n_repeats, n_folds, n_samples = task.get_split_dimensions()
161181
print(
162182
"Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
163-
task_id, n_repeats, n_folds, n_samples,
183+
task_id,
184+
n_repeats,
185+
n_folds,
186+
n_samples,
164187
)
165188
)
166189

@@ -170,7 +193,9 @@
170193
for fold_idx in range(n_folds):
171194
for sample_idx in range(n_samples):
172195
train_indices, test_indices = task.get_train_test_split_indices(
173-
repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
196+
repeat=repeat_idx,
197+
fold=fold_idx,
198+
sample=sample_idx,
174199
)
175200
X_train = X.iloc[train_indices]
176201
y_train = y.iloc[train_indices]

openml/_api_calls.py

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,20 @@ def _perform_api_call(call, request_method, data=None, file_elements=None):
6969
__check_response(response, url, file_elements)
7070

7171
logging.info(
72-
"%.7fs taken for [%s] request for the URL %s", time.time() - start, request_method, url,
72+
"%.7fs taken for [%s] request for the URL %s",
73+
time.time() - start,
74+
request_method,
75+
url,
7376
)
7477
return response.text
7578

7679

7780
def _download_minio_file(
78-
source: str, destination: Union[str, pathlib.Path], exists_ok: bool = True,
81+
source: str,
82+
destination: Union[str, pathlib.Path],
83+
exists_ok: bool = True,
7984
) -> None:
80-
""" Download file ``source`` from a MinIO Bucket and store it at ``destination``.
85+
"""Download file ``source`` from a MinIO Bucket and store it at ``destination``.
8186
8287
Parameters
8388
----------
@@ -103,7 +108,9 @@ def _download_minio_file(
103108

104109
try:
105110
client.fget_object(
106-
bucket_name=bucket, object_name=object_name, file_path=str(destination),
111+
bucket_name=bucket,
112+
object_name=object_name,
113+
file_path=str(destination),
107114
)
108115
except minio.error.S3Error as e:
109116
if e.message.startswith("Object does not exist"):
@@ -120,7 +127,7 @@ def _download_text_file(
120127
exists_ok: bool = True,
121128
encoding: str = "utf8",
122129
) -> Optional[str]:
123-
""" Download the text file at `source` and store it in `output_path`.
130+
"""Download the text file at `source` and store it in `output_path`.
124131
125132
By default, do nothing if a file already exists in `output_path`.
126133
The downloaded file can be checked against an expected md5 checksum.
@@ -156,7 +163,10 @@ def _download_text_file(
156163

157164
if output_path is None:
158165
logging.info(
159-
"%.7fs taken for [%s] request for the URL %s", time.time() - start, "get", source,
166+
"%.7fs taken for [%s] request for the URL %s",
167+
time.time() - start,
168+
"get",
169+
source,
160170
)
161171
return downloaded_file
162172

@@ -165,7 +175,10 @@ def _download_text_file(
165175
fh.write(downloaded_file)
166176

167177
logging.info(
168-
"%.7fs taken for [%s] request for the URL %s", time.time() - start, "get", source,
178+
"%.7fs taken for [%s] request for the URL %s",
179+
time.time() - start,
180+
"get",
181+
source,
169182
)
170183

171184
del downloaded_file
@@ -174,8 +187,8 @@ def _download_text_file(
174187

175188
def _file_id_to_url(file_id, filename=None):
176189
"""
177-
Presents the URL how to download a given file id
178-
filename is optional
190+
Presents the URL how to download a given file id
191+
filename is optional
179192
"""
180193
openml_url = config.server.split("/api/")
181194
url = openml_url[0] + "/data/download/%s" % file_id
@@ -194,7 +207,12 @@ def _read_url_files(url, data=None, file_elements=None):
194207
file_elements = {}
195208
# Using requests.post sets header 'Accept-encoding' automatically to
196209
# 'gzip,deflate'
197-
response = _send_request(request_method="post", url=url, data=data, files=file_elements,)
210+
response = _send_request(
211+
request_method="post",
212+
url=url,
213+
data=data,
214+
files=file_elements,
215+
)
198216
return response
199217

200218

@@ -258,7 +276,9 @@ def _send_request(request_method, url, data, files=None, md5_checksum=None):
258276
raise OpenMLServerError(
259277
"Unexpected server error when calling {}. Please contact the "
260278
"developers!\nStatus code: {}\n{}".format(
261-
url, response.status_code, response.text,
279+
url,
280+
response.status_code,
281+
response.text,
262282
)
263283
)
264284
if retry_counter >= n_retries:
@@ -290,7 +310,9 @@ def __check_response(response, url, file_elements):
290310

291311

292312
def __parse_server_exception(
293-
response: requests.Response, url: str, file_elements: Dict,
313+
response: requests.Response,
314+
url: str,
315+
file_elements: Dict,
294316
) -> OpenMLServerError:
295317

296318
if response.status_code == 414:
@@ -319,12 +341,17 @@ def __parse_server_exception(
319341

320342
# 512 for runs, 372 for datasets, 500 for flows
321343
# 482 for tasks, 542 for evaluations, 674 for setups
322-
return OpenMLServerNoResult(code=code, message=full_message,)
344+
return OpenMLServerNoResult(
345+
code=code,
346+
message=full_message,
347+
)
323348
# 163: failure to validate flow XML (https://www.openml.org/api_docs#!/flow/post_flow)
324349
if code in [163] and file_elements is not None and "description" in file_elements:
325350
# file_elements['description'] is the XML file description of the flow
326351
full_message = "\n{}\n{} - {}".format(
327-
file_elements["description"], message, additional_information,
352+
file_elements["description"],
353+
message,
354+
additional_information,
328355
)
329356
else:
330357
full_message = "{} - {}".format(message, additional_information)

0 commit comments

Comments
 (0)