Skip to content

Commit c40e474

Browse files
authored
add new example regarding svm hyperparameter plotting (#834)
* add new example regarding svm hyperparameter plotting * implement Neeratyoy's suggestions * add title & fix pep8
1 parent 34d784a commit c40e474

2 files changed

Lines changed: 109 additions & 0 deletions

File tree

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
"""
2+
================================
3+
Plotting hyperparameter surfaces
4+
================================
5+
"""
6+
import openml
7+
import numpy as np
8+
9+
####################################################################################################
10+
# First step - obtaining the data
11+
# ===============================
12+
# First, we nood to choose an SVM flow, for example 8353, and a task. Finding the IDs of them are
13+
# not part of this tutorial, this could for example be done via the website.
14+
#
15+
# For this we use the function ``list_evaluations_setup`` which can automatically join
16+
# evaluations conducted by the server with the hyperparameter settings extracted from the
17+
# uploaded runs (called *setup*).
18+
df = openml.evaluations.list_evaluations_setups(
19+
function='predictive_accuracy',
20+
flow=[8353],
21+
task=[6],
22+
output_format='dataframe',
23+
# Using this flag incorporates the hyperparameters into the returned dataframe. Otherwise,
24+
# the dataframe would contain a field ``paramaters`` containing an unparsed dictionary.
25+
parameters_in_separate_columns=True,
26+
)
27+
print(df.head(n=10))
28+
29+
####################################################################################################
30+
# We can see all the hyperparameter names in the columns of the dataframe:
31+
for name in df.columns:
32+
print(name)
33+
34+
####################################################################################################
35+
# Next, we cast and transform the hyperparameters of interest (``C`` and ``gamma``) so that we
36+
# can nicely plot them.
37+
hyperparameters = ['sklearn.svm.classes.SVC(16)_C', 'sklearn.svm.classes.SVC(16)_gamma']
38+
df[hyperparameters] = df[hyperparameters].astype(float).apply(np.log)
39+
40+
####################################################################################################
41+
# Option 1 - plotting via the pandas helper functions
42+
# ===================================================
43+
#
44+
df.plot.hexbin(
45+
x='sklearn.svm.classes.SVC(16)_C',
46+
y='sklearn.svm.classes.SVC(16)_gamma',
47+
C='value',
48+
reduce_C_function=np.mean,
49+
gridsize=25,
50+
title='SVM performance landscape',
51+
)
52+
53+
####################################################################################################
54+
# Option 2 - plotting via matplotlib
55+
# ==================================
56+
#
57+
import matplotlib.pyplot as plt
58+
59+
fig, ax = plt.subplots()
60+
61+
C = df['sklearn.svm.classes.SVC(16)_C']
62+
gamma = df['sklearn.svm.classes.SVC(16)_gamma']
63+
score = df['value']
64+
65+
# Plotting all evaluations:
66+
ax.plot(C, gamma, 'ko', ms=1)
67+
# Create a contour plot
68+
cntr = ax.tricontourf(C, gamma, score, levels=12, cmap="RdBu_r")
69+
# Adjusting the colorbar
70+
fig.colorbar(cntr, ax=ax, label="accuracy")
71+
# Adjusting the axis limits
72+
ax.set(
73+
xlim=(min(C), max(C)),
74+
ylim=(min(gamma), max(gamma)),
75+
xlabel="C (log10)",
76+
ylabel="gamma (log10)",
77+
)
78+
ax.set_title('SVM performance landscape')
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import unittest
2+
3+
4+
class TestEvaluationsExample(unittest.TestCase):
5+
6+
def test_example_python_paper(self):
7+
# Example script which will appear in the upcoming OpenML-Python paper
8+
# This test ensures that the example will keep running!
9+
10+
import openml
11+
import numpy as np
12+
import matplotlib.pyplot as plt
13+
14+
df = openml.evaluations.list_evaluations_setups(
15+
'predictive_accuracy',
16+
flow=[8353],
17+
task=[6],
18+
output_format='dataframe',
19+
parameters_in_separate_columns=True,
20+
) # Choose an SVM flow, for example 8353, and a task.
21+
22+
hp_names = ['sklearn.svm.classes.SVC(16)_C', 'sklearn.svm.classes.SVC(16)_gamma']
23+
df[hp_names] = df[hp_names].astype(float).apply(np.log)
24+
C, gamma, score = df[hp_names[0]], df[hp_names[1]], df['value']
25+
26+
cntr = plt.tricontourf(C, gamma, score, levels=12, cmap="RdBu_r")
27+
plt.colorbar(cntr, label="accuracy")
28+
plt.xlim((min(C), max(C)))
29+
plt.ylim((min(gamma), max(gamma)))
30+
plt.xlabel("C (log10)")
31+
plt.ylabel("gamma (log10)")

0 commit comments

Comments
 (0)