Skip to content

Commit 974a6ab

Browse files
added notebook example
1 parent 628d8b9 commit 974a6ab

1 file changed

Lines changed: 132 additions & 0 deletions

File tree

OpenMLDemo.ipynb

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
{
2+
"worksheets": [
3+
{
4+
"cells": [
5+
{
6+
"cell_type": "markdown",
7+
"source": "This downloads a classification task from OpenML, uses sklearn to build a RandomForest classifier, and uploads the results to the server.\n"
8+
},
9+
{
10+
"cell_type": "markdown",
11+
"source": "Initialization and login. This assumes you have a .openml dir in your homedir with a subdir for caches and a file with your API key. You find your API key in your account settings on openml.org."
12+
},
13+
{
14+
"cell_type": "code",
15+
"input": "from sklearn import preprocessing, ensemble\nimport openml\nimport os\n\nhome_dir = os.path.expanduser(\"~\")\nopenml_dir = os.path.join(home_dir, \".openml\")\ncache_dir = os.path.join(openml_dir, \"cache\")\nwith open(os.path.join(openml_dir, \"apikey.txt\"), 'r') as fh:\n\tkey = fh.readline().rstrip('\\n')\n\nopenml = APIConnector(cache_directory=cache_dir, apikey=key)",
16+
"outputs": [],
17+
"language": "python",
18+
"metadata": {
19+
"cellView": null,
20+
"executionInfo": {
21+
"content": {
22+
"status": "ok",
23+
"execution_count": 28,
24+
"payload": [],
25+
"user_expressions": {},
26+
"user_variables": {}
27+
},
28+
"timestamp": 1455279958823,
29+
"user_tz": -60,
30+
"user": {
31+
"sessionId": "284b6c114ba378fe",
32+
"userId": "109221074076178034989",
33+
"permissionId": "09574386662628411592",
34+
"displayName": "Joaquin Vanschoren",
35+
"color": "#1FA15D",
36+
"isMe": true,
37+
"isAnonymous": false,
38+
"photoUrl": "//lh6.googleusercontent.com/-9g2BD5NUkx4/AAAAAAAAAAI/AAAAAAAACC4/sACE-8zBzX4/s50-c-k-no/photo.jpg"
39+
}
40+
}
41+
}
42+
},
43+
{
44+
"cell_type": "markdown",
45+
"source": "Download a single OpenML task (id=10), create a scikit-learn classifier (RandomForest), and run it on the task"
46+
},
47+
{
48+
"cell_type": "code",
49+
"input": "task = openml.download_task(10)\nprint task\nclf = ensemble.RandomForestClassifier()\nX, y = task.get_X_and_Y()\nclf.fit(X, y)\nprint clf\nprediction_path, description_path = openml_run(task, clf)\n\n#import json\n#print(json.dumps(xmltodict.parse(open(os.path.abspath(description_path), \"r\").read()), indent=4))",
50+
"outputs": [
51+
{
52+
"output_type": "stream",
53+
"text": "OpenMLTask instance.\nTask ID: 10\nTask type: Supervised Classification\nDataset id: 10\nRandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n max_depth=None, max_features='auto', max_leaf_nodes=None,\n min_samples_leaf=1, min_samples_split=2,\n min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n oob_score=False, random_state=None, verbose=0,\n warm_start=False)\n2638\n",
54+
"stream": "stdout"
55+
}
56+
],
57+
"language": "python",
58+
"metadata": {
59+
"cellView": null,
60+
"executionInfo": {
61+
"content": {
62+
"status": "ok",
63+
"execution_count": 35,
64+
"payload": [],
65+
"user_expressions": {},
66+
"user_variables": {}
67+
},
68+
"timestamp": 1455280601582,
69+
"user_tz": -60,
70+
"user": {
71+
"sessionId": "284b6c114ba378fe",
72+
"userId": "109221074076178034989",
73+
"permissionId": "09574386662628411592",
74+
"displayName": "Joaquin Vanschoren",
75+
"color": "#1FA15D",
76+
"isMe": true,
77+
"isAnonymous": false,
78+
"photoUrl": "//lh6.googleusercontent.com/-9g2BD5NUkx4/AAAAAAAAAAI/AAAAAAAACC4/sACE-8zBzX4/s50-c-k-no/photo.jpg"
79+
}
80+
}
81+
}
82+
},
83+
{
84+
"cell_type": "markdown",
85+
"source": "Upload the run to the OpenML server"
86+
},
87+
{
88+
"cell_type": "code",
89+
"input": "import xmltodict\n\nprediction_abspath = os.path.abspath(prediction_path)\ndescription_abspath = os.path.abspath(description_path)\n\nreturn_code, response = openml.upload_run(prediction_abspath, description_abspath)\n\nif(return_code == 200):\n\tresponse_dict = xmltodict.parse(response.content)\n\trun_id = response_dict['oml:upload_run']['oml:run_id']\n\tprint(\"Uploaded run with id %s\" % (run_id))\n\tprint(\"Check it at www.openml.org/r/%s\" % (run_id))\n",
90+
"outputs": [
91+
{
92+
"output_type": "stream",
93+
"text": "Uploaded run with id 524013\nCheck it at www.openml.org/r/524013\n",
94+
"stream": "stdout"
95+
}
96+
],
97+
"language": "python",
98+
"metadata": {
99+
"cellView": null,
100+
"executionInfo": {
101+
"content": {
102+
"status": "ok",
103+
"execution_count": 31,
104+
"payload": [],
105+
"user_expressions": {},
106+
"user_variables": {}
107+
},
108+
"timestamp": 1455279991565,
109+
"user_tz": -60,
110+
"user": {
111+
"sessionId": "284b6c114ba378fe",
112+
"userId": "109221074076178034989",
113+
"permissionId": "09574386662628411592",
114+
"displayName": "Joaquin Vanschoren",
115+
"color": "#1FA15D",
116+
"isMe": true,
117+
"isAnonymous": false,
118+
"photoUrl": "//lh6.googleusercontent.com/-9g2BD5NUkx4/AAAAAAAAAAI/AAAAAAAACC4/sACE-8zBzX4/s50-c-k-no/photo.jpg"
119+
}
120+
}
121+
}
122+
}
123+
]
124+
}
125+
],
126+
"metadata": {
127+
"name": "OpenMLDemo.ipynb",
128+
"colabVersion": "0.1"
129+
},
130+
"nbformat": 3,
131+
"nbformat_minor": 0
132+
}

0 commit comments

Comments
 (0)