|
11 | 11 | import numpy as np |
12 | 12 | import pandas as pd |
13 | 13 | import scipy.sparse |
14 | | -import xmltodict |
15 | 14 | from warnings import warn |
16 | 15 |
|
17 | | -import openml._api_calls |
18 | 16 | from openml.base import OpenMLBase |
19 | 17 | from .data_feature import OpenMLDataFeature |
20 | 18 | from ..exceptions import PyOpenMLError |
@@ -728,49 +726,28 @@ def get_features_by_type(self, data_type, exclude=None, |
728 | 726 | result.append(idx - offset) |
729 | 727 | return result |
730 | 728 |
|
731 | | - def publish(self): |
732 | | - """Publish the dataset on the OpenML server. |
| 729 | + def _get_file_elements(self) -> Dict: |
| 730 | + """ Adds the 'dataset' to file elements. """ |
| 731 | + file_elements = {} |
| 732 | + path = None if self.data_file is None else os.path.abspath(self.data_file) |
733 | 733 |
|
734 | | - Upload the dataset description and dataset content to openml. |
735 | | -
|
736 | | - Returns |
737 | | - ------- |
738 | | - dataset_id: int |
739 | | - Id of the dataset uploaded to the server. |
740 | | - """ |
741 | | - file_elements = {'description': self._to_xml()} |
742 | | - |
743 | | - # the arff dataset string is available |
744 | 734 | if self._dataset is not None: |
745 | 735 | file_elements['dataset'] = self._dataset |
746 | | - else: |
747 | | - # the path to the arff dataset is given |
748 | | - if self.data_file is not None: |
749 | | - path = os.path.abspath(self.data_file) |
750 | | - if os.path.exists(path): |
751 | | - try: |
752 | | - |
753 | | - with io.open(path, encoding='utf8') as fh: |
754 | | - # check if arff is valid |
755 | | - decoder = arff.ArffDecoder() |
756 | | - decoder.decode(fh, encode_nominal=True) |
757 | | - except arff.ArffException: |
758 | | - raise ValueError("The file you have provided is not " |
759 | | - "a valid arff file.") |
760 | | - |
761 | | - with open(path, 'rb') as fp: |
762 | | - file_elements['dataset'] = fp.read() |
763 | | - else: |
764 | | - if self.url is None: |
765 | | - raise ValueError("No url/path to the data file was given") |
766 | | - |
767 | | - return_value = openml._api_calls._perform_api_call( |
768 | | - "data/", 'post', |
769 | | - file_elements=file_elements, |
770 | | - ) |
771 | | - response = xmltodict.parse(return_value) |
772 | | - self.dataset_id = int(response['oml:upload_data_set']['oml:id']) |
773 | | - return self.dataset_id |
| 736 | + elif path is not None and os.path.exists(path): |
| 737 | + with open(path, 'rb') as fp: |
| 738 | + file_elements['dataset'] = fp.read() |
| 739 | + try: |
| 740 | + dataset_utf8 = str(file_elements['dataset'], 'utf8') |
| 741 | + arff.ArffDecoder().decode(dataset_utf8, encode_nominal=True) |
| 742 | + except arff.ArffException: |
| 743 | + raise ValueError("The file you have provided is not a valid arff file.") |
| 744 | + elif self.url is None: |
| 745 | + raise ValueError("No valid url/path to the data file was given.") |
| 746 | + return file_elements |
| 747 | + |
| 748 | + def _parse_publish_response(self, xml_response: Dict): |
| 749 | + """ Parse the id from the xml_response and assign it to self. """ |
| 750 | + self.dataset_id = int(xml_response['oml:upload_data_set']['oml:id']) |
774 | 751 |
|
775 | 752 | def _to_dict(self) -> 'OrderedDict[str, OrderedDict]': |
776 | 753 | """ Creates a dictionary representation of self. """ |
|
0 commit comments