openml
diff --git a/‎.github/workflows/pre-commit.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/pre-commit.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 3 additions & 3 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/progress.rst‎
Lines changed: 2 additions & 1 deletion b/‎doc/progress.rst‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/30_extended/fetch_runtimes_tutorial.py‎
Lines changed: 1 addition & 0 deletions b/‎examples/30_extended/fetch_runtimes_tutorial.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎openml/_api_calls.py‎
Lines changed: 1 addition & 2 deletions b/‎openml/_api_calls.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎openml/datasets/dataset.py‎
Lines changed: 0 additions & 1 deletion b/‎openml/datasets/dataset.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎openml/datasets/functions.py‎
Lines changed: 7 additions & 7 deletions b/‎openml/datasets/functions.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎openml/exceptions.py‎
Lines changed: 3 additions & 1 deletion b/‎openml/exceptions.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎openml/extensions/extension_interface.py‎
Lines changed: 1 addition & 1 deletion b/‎openml/extensions/extension_interface.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openml/extensions/sklearn/extension.py‎
Lines changed: 0 additions & 3 deletions b/‎openml/extensions/sklearn/extension.py‎
Lines changed: 0 additions & 3 deletions
@@ -7,10 +7,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v3
-    - name: Setup Python 3.7
+    - name: Setup Python 3.8
       uses: actions/setup-python@v4
       with:
-        python-version: 3.7
+        python-version: 3.8
     - name: Install pre-commit
       run: |
         pip install pre-commit
 
@@ -1,11 +1,11 @@
 repos:
   - repo: https://github.com/psf/black
-    rev: 22.6.0
+    rev: 23.3.0
     hooks:
       - id: black
         args: [--line-length=100]
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.961
+    rev: v1.2.0
     hooks:
       - id: mypy
         name: mypy openml
@@ -20,7 +20,7 @@ repos:
           - types-requests
           - types-python-dateutil
   - repo: https://github.com/pycqa/flake8
-    rev: 4.0.1
+    rev: 6.0.0
     hooks:
       - id: flake8
         name: flake8 openml
 
@@ -9,15 +9,16 @@ Changelog
 0.13.1
 ~~~~~~
 
- * DOC #1241 #1229 #1231: Minor documentation fixes and resolve documentation examples not working.
  * ADD #1028: Add functions to delete runs, flows, datasets, and tasks (e.g., ``openml.datasets.delete_dataset``).
  * ADD #1144: Add locally computed results to the ``OpenMLRun`` object's representation if the run was created locally and not downloaded from the server.
  * ADD #1180: Improve the error message when the checksum of a downloaded dataset does not match the checksum provided by the API.
  * ADD #1201: Make ``OpenMLTraceIteration`` a dataclass.
  * DOC #1069: Add argument documentation for the ``OpenMLRun`` class.
+ * DOC #1241 #1229 #1231: Minor documentation fixes and resolve documentation examples not working.
  * FIX #1197 #559 #1131: Fix the order of ground truth and predictions in the ``OpenMLRun`` object and in ``format_prediction``.
  * FIX #1198: Support numpy 1.24 and higher.
  * FIX #1216: Allow unknown task types on the server. This is only relevant when new task types are added to the test server.
+ * FIX #1223: Fix mypy errors for implicit optional typing.
  * MAINT #1155: Add dependabot github action to automatically update other github actions.
  * MAINT #1199: Obtain pre-commit's flake8 from github.com instead of gitlab.com.
  * MAINT #1215: Support latest numpy version.
 
@@ -79,6 +79,7 @@
     )
 )
 
+
 # Creating utility function
 def print_compare_runtimes(measures):
     for repeat, val1 in measures["usercpu_time_millis_training"].items():
 
@@ -195,7 +195,7 @@ def _download_minio_bucket(
 def _download_text_file(
     source: str,
     output_path: Optional[str] = None,
-    md5_checksum: str = None,
+    md5_checksum: Optional[str] = None,
     exists_ok: bool = True,
     encoding: str = "utf8",
 ) -> Optional[str]:
@@ -326,7 +326,6 @@ def _send_request(request_method, url, data, files=None, md5_checksum=None):
                 if request_method == "get" and not __is_checksum_equal(
                     response.text.encode("utf-8"), md5_checksum
                 ):
-
                     # -- Check if encoding is not UTF-8 perhaps
                     if __is_checksum_equal(response.content, md5_checksum):
                         raise OpenMLHashException(
 
@@ -274,7 +274,6 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
         return [(key, fields[key]) for key in order if key in fields]
 
     def __eq__(self, other):
-
         if not isinstance(other, OpenMLDataset):
             return False
 
 
@@ -74,7 +74,6 @@ def list_datasets(
     output_format: str = "dict",
     **kwargs,
 ) -> Union[Dict, pd.DataFrame]:
-
     """
     Return a list of all dataset which are on OpenML.
     Supports large amount of results.
@@ -182,7 +181,6 @@ def _list_datasets(data_id: Optional[List] = None, output_format="dict", **kwarg
 
 
 def __list_datasets(api_call, output_format="dict"):
-
     xml_string = openml._api_calls._perform_api_call(api_call, "get")
     datasets_dict = xmltodict.parse(xml_string, force_list=("oml:dataset",))
 
@@ -353,7 +351,7 @@ def get_datasets(
 def get_dataset(
     dataset_id: Union[int, str],
     download_data: bool = True,
-    version: int = None,
+    version: Optional[int] = None,
     error_if_multiple: bool = False,
     cache_format: str = "pickle",
     download_qualities: bool = True,
@@ -984,7 +982,7 @@ def _get_dataset_description(did_cache_dir, dataset_id):
 
 def _get_dataset_parquet(
     description: Union[Dict, OpenMLDataset],
-    cache_directory: str = None,
+    cache_directory: Optional[str] = None,
     download_all_files: bool = False,
 ) -> Optional[str]:
     """Return the path to the local parquet file of the dataset. If is not cached, it is downloaded.
@@ -1051,7 +1049,9 @@ def _get_dataset_parquet(
     return output_file_path
 
 
-def _get_dataset_arff(description: Union[Dict, OpenMLDataset], cache_directory: str = None) -> str:
+def _get_dataset_arff(
+    description: Union[Dict, OpenMLDataset], cache_directory: Optional[str] = None
+) -> str:
     """Return the path to the local arff file of the dataset. If is not cached, it is downloaded.
 
     Checks if the file is in the cache, if yes, return the path to the file.
@@ -1173,8 +1173,8 @@ def _create_dataset_from_description(
     description: Dict[str, str],
     features_file: str,
     qualities_file: str,
-    arff_file: str = None,
-    parquet_file: str = None,
+    arff_file: Optional[str] = None,
+    parquet_file: Optional[str] = None,
     cache_format: str = "pickle",
 ) -> OpenMLDataset:
     """Create a dataset object from a description dict.
 
@@ -1,5 +1,7 @@
 # License: BSD 3-Clause
 
+from typing import Optional
+
 
 class PyOpenMLError(Exception):
     def __init__(self, message: str):
@@ -20,7 +22,7 @@ class OpenMLServerException(OpenMLServerError):
 
     # Code needs to be optional to allow the exception to be picklable:
     # https://stackoverflow.com/questions/16244923/how-to-make-a-custom-exception-class-with-multiple-init-args-pickleable  # noqa: E501
-    def __init__(self, message: str, code: int = None, url: str = None):
+    def __init__(self, message: str, code: Optional[int] = None, url: Optional[str] = None):
         self.message = message
         self.code = code
         self.url = url
 
@@ -166,7 +166,7 @@ def _run_model_on_fold(
         y_train: Optional[np.ndarray] = None,
         X_test: Optional[Union[np.ndarray, scipy.sparse.spmatrix]] = None,
     ) -> Tuple[np.ndarray, np.ndarray, "OrderedDict[str, float]", Optional["OpenMLRunTrace"]]:
-        """Run a model on a repeat,fold,subsample triplet of the task and return prediction information.
+        """Run a model on a repeat, fold, subsample triplet of the task.
 
         Returns the data that is necessary to construct the OpenML Run object. Is used by
         :func:`openml.runs.run_flow_on_task`.
 
@@ -1021,7 +1021,6 @@ def flatten_all(list_):
                     # when deserializing the parameter
                     sub_components_explicit.add(identifier)
                     if isinstance(sub_component, str):
-
                         external_version = self._get_external_version_string(None, {})
                         dependencies = self._get_dependencies()
                         tags = self._get_tags()
@@ -1072,7 +1071,6 @@ def flatten_all(list_):
                 parameters[k] = parameter_json
 
             elif isinstance(rval, OpenMLFlow):
-
                 # A subcomponent, for example the base model in
                 # AdaBoostClassifier
                 sub_components[k] = rval
@@ -1762,7 +1760,6 @@ def _prediction_to_probabilities(
         )
 
         if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
-
             try:
                 proba_y = model_copy.predict_proba(X_test)
                 proba_y = pd.DataFrame(proba_y, columns=model_classes)  # handles X_test as numpy
Original file line number	Diff line number	Diff line change
`@@ -79,6 +79,7 @@`
`79`	`79`	`)`
`80`	`80`	`)`
`81`	`81`
	`82`	`+`
`82`	`83`	`# Creating utility function`
`83`	`84`	`def print_compare_runtimes(measures):`
`84`	`85`	`for repeat, val1 in measures["usercpu_time_millis_training"].items():`