|
3 | 3 | import io |
4 | 4 | import logging |
5 | 5 | import os |
| 6 | +from pyexpat import ExpatError |
6 | 7 | from typing import List, Dict, Union, Optional, cast |
7 | 8 |
|
8 | 9 | import numpy as np |
|
19 | 20 | from .dataset import OpenMLDataset |
20 | 21 | from ..exceptions import ( |
21 | 22 | OpenMLHashException, |
| 23 | + OpenMLServerError, |
22 | 24 | OpenMLServerException, |
23 | 25 | OpenMLPrivateDatasetError, |
24 | 26 | ) |
@@ -437,7 +439,7 @@ def get_dataset( |
437 | 439 | parquet_file = None |
438 | 440 | remove_dataset_cache = False |
439 | 441 | except OpenMLServerException as e: |
440 | | - # if there was an exception, |
| 442 | + # if there was an exception |
441 | 443 | # check if the user had access to the dataset |
442 | 444 | if e.code == 112: |
443 | 445 | raise OpenMLPrivateDatasetError(e.message) from None |
@@ -949,14 +951,18 @@ def _get_dataset_description(did_cache_dir, dataset_id): |
949 | 951 | try: |
950 | 952 | with io.open(description_file, encoding="utf8") as fh: |
951 | 953 | dataset_xml = fh.read() |
| 954 | + description = xmltodict.parse(dataset_xml)["oml:data_set_description"] |
952 | 955 | except Exception: |
953 | 956 | url_extension = "data/{}".format(dataset_id) |
954 | 957 | dataset_xml = openml._api_calls._perform_api_call(url_extension, "get") |
| 958 | + try: |
| 959 | + description = xmltodict.parse(dataset_xml)["oml:data_set_description"] |
| 960 | + except ExpatError as e: |
| 961 | + url = openml._api_calls._create_url_from_endpoint(url_extension) |
| 962 | + raise OpenMLServerError(f"Dataset description XML at '{url}' is malformed.") from e |
955 | 963 | with io.open(description_file, "w", encoding="utf8") as fh: |
956 | 964 | fh.write(dataset_xml) |
957 | 965 |
|
958 | | - description = xmltodict.parse(dataset_xml)["oml:data_set_description"] |
959 | | - |
960 | 966 | return description |
961 | 967 |
|
962 | 968 |
|
|
0 commit comments