Skip to content

Commit 5adcf77

Browse files
authored
Merge pull request #153 from openml/amueller-push_cleanup
Copy of 114
2 parents 244c585 + 6261425 commit 5adcf77

8 files changed

Lines changed: 116 additions & 117 deletions

File tree

examples/OpenMLDemo.ipynb

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@
118118
"name": "stdout",
119119
"output_type": "stream",
120120
"text": [
121-
"First 10 of 2806 datasets...\n",
121+
"First 10 of 2855 datasets...\n",
122122
" did name NumberOfInstances NumberOfFeatures\n",
123123
"0 1 anneal 898 39\n",
124124
"1 2 anneal 898 39\n",
@@ -274,6 +274,7 @@
274274
" 'creator': 'R.A. Fisher',\n",
275275
" 'data_file': '/home/andy/.openml/cache/datasets/61/dataset.arff',\n",
276276
" 'data_pickle_file': '/home/andy/.openml/cache/datasets/61/dataset.pkl',\n",
277+
" 'dataset_id': 61,\n",
277278
" 'default_target_attribute': 'class',\n",
278279
" 'description': '**Author**: R.A. Fisher \\n'\n",
279280
" '**Source**: '\n",
@@ -304,7 +305,6 @@
304305
" ' -- Iris Versicolour\\n'\n",
305306
" ' -- Iris Virginica',\n",
306307
" 'format': 'ARFF',\n",
307-
" 'id': 61,\n",
308308
" 'ignore_attributes': None,\n",
309309
" 'language': None,\n",
310310
" 'licence': 'Public',\n",
@@ -629,7 +629,6 @@
629629
"name": "stdout",
630630
"output_type": "stream",
631631
"text": [
632-
"2823\n",
633632
"RandomForest has run on the task.\n"
634633
]
635634
}
@@ -657,24 +656,18 @@
657656
},
658657
"outputs": [
659658
{
660-
"name": "stdout",
661-
"output_type": "stream",
662-
"text": [
663-
"Uploaded run with id 538241\n",
664-
"Check it at www.openml.org/r/538241\n"
665-
]
659+
"data": {
660+
"text/plain": [
661+
"<openml.runs.run.OpenMLRun at 0x7fb31ecec668>"
662+
]
663+
},
664+
"execution_count": 17,
665+
"metadata": {},
666+
"output_type": "execute_result"
666667
}
667668
],
668669
"source": [
669-
"import xmltodict\n",
670-
"\n",
671-
"return_code, response = run.publish()\n",
672-
"\n",
673-
"if(return_code == 200):\n",
674-
" response_dict = xmltodict.parse(response)\n",
675-
" run_id = response_dict['oml:upload_run']['oml:run_id']\n",
676-
" print(\"Uploaded run with id %s\" % (run_id))\n",
677-
" print(\"Check it at www.openml.org/r/%s\" % (run_id))"
670+
"run.publish()"
678671
]
679672
},
680673
{

openml/datasets/dataset.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import numpy as np
1010
import scipy.sparse
11+
import xmltodict
1112

1213
if sys.version_info[0] >= 3:
1314
import pickle
@@ -17,6 +18,7 @@
1718
except:
1819
import pickle
1920

21+
2022
from ..util import is_string
2123
from .._api_calls import _perform_api_call
2224

@@ -36,7 +38,7 @@ class OpenMLDataset(object):
3638
Description of the dataset
3739
FIXME : which of these do we actually nee?
3840
"""
39-
def __init__(self, id=None, name=None, version=None, description=None,
41+
def __init__(self, dataset_id=None, name=None, version=None, description=None,
4042
format=None, creator=None, contributor=None,
4143
collection_date=None, upload_date=None, language=None,
4244
licence=None, url=None, default_target_attribute=None,
@@ -45,7 +47,7 @@ def __init__(self, id=None, name=None, version=None, description=None,
4547
original_data_url=None, paper_url=None, update_comment=None,
4648
md5_checksum=None, data_file=None):
4749
# Attributes received by querying the RESTful API
48-
self.id = int(id) if id is not None else None
50+
self.dataset_id = int(dataset_id) if dataset_id is not None else None
4951
self.name = name
5052
self.version = int(version)
5153
self.description = description
@@ -100,7 +102,7 @@ def __init__(self, id=None, name=None, version=None, description=None,
100102
with open(self.data_pickle_file, "wb") as fh:
101103
pickle.dump((X, categorical, attribute_names), fh, -1)
102104
logger.debug("Saved dataset %d: %s to file %s" %
103-
(self.id, self.name, self.data_pickle_file))
105+
(self.dataset_id, self.name, self.data_pickle_file))
104106

105107
def __eq__(self, other):
106108
if type(other) != OpenMLDataset:
@@ -281,7 +283,8 @@ def publish(self):
281283
"/data/", file_dictionary=file_dictionary,
282284
file_elements=file_elements)
283285

284-
return return_code, return_value
286+
self.dataset_id = int(xmltodict.parse(return_value)['oml:upload_data_set']['oml:id'])
287+
return self
285288

286289
def _to_xml(self):
287290
"""Serialize object to xml for upload
@@ -292,7 +295,7 @@ def _to_xml(self):
292295
XML description of the data.
293296
"""
294297
xml_dataset = ('<oml:data_set_description '
295-
'xmlns:oml="http://openml.org/openml">')
298+
'xmlns:oml="http://openml.org/openml">\n')
296299
props = ['id', 'name', 'version', 'description', 'format', 'creator',
297300
'contributor', 'collection_date', 'upload_date', 'language',
298301
'licence', 'url', 'default_target_attribute',
@@ -302,6 +305,6 @@ def _to_xml(self):
302305
for prop in props:
303306
content = getattr(self, prop, None)
304307
if content is not None:
305-
xml_dataset += "<oml:{0}>{1}</oml:{0}>".format(prop, content)
308+
xml_dataset += "<oml:{0}>{1}</oml:{0}>\n".format(prop, content)
306309
xml_dataset += "</oml:data_set_description>"
307310
return xml_dataset

0 commit comments

Comments
 (0)