|
| 1 | +""" |
| 2 | +================================ |
| 3 | +Plotting hyperparameter surfaces |
| 4 | +================================ |
| 5 | +""" |
| 6 | +import openml |
| 7 | +import numpy as np |
| 8 | + |
| 9 | +#################################################################################################### |
| 10 | +# First step - obtaining the data |
| 11 | +# =============================== |
| 12 | +# First, we nood to choose an SVM flow, for example 8353, and a task. Finding the IDs of them are |
| 13 | +# not part of this tutorial, this could for example be done via the website. |
| 14 | +# |
| 15 | +# For this we use the function ``list_evaluations_setup`` which can automatically join |
| 16 | +# evaluations conducted by the server with the hyperparameter settings extracted from the |
| 17 | +# uploaded runs (called *setup*). |
| 18 | +df = openml.evaluations.list_evaluations_setups( |
| 19 | + function='predictive_accuracy', |
| 20 | + flow=[8353], |
| 21 | + task=[6], |
| 22 | + output_format='dataframe', |
| 23 | + # Using this flag incorporates the hyperparameters into the returned dataframe. Otherwise, |
| 24 | + # the dataframe would contain a field ``paramaters`` containing an unparsed dictionary. |
| 25 | + parameters_in_separate_columns=True, |
| 26 | +) |
| 27 | +print(df.head(n=10)) |
| 28 | + |
| 29 | +#################################################################################################### |
| 30 | +# We can see all the hyperparameter names in the columns of the dataframe: |
| 31 | +for name in df.columns: |
| 32 | + print(name) |
| 33 | + |
| 34 | +#################################################################################################### |
| 35 | +# Next, we cast and transform the hyperparameters of interest (``C`` and ``gamma``) so that we |
| 36 | +# can nicely plot them. |
| 37 | +hyperparameters = ['sklearn.svm.classes.SVC(16)_C', 'sklearn.svm.classes.SVC(16)_gamma'] |
| 38 | +df[hyperparameters] = df[hyperparameters].astype(float).apply(np.log) |
| 39 | + |
| 40 | +#################################################################################################### |
| 41 | +# Option 1 - plotting via the pandas helper functions |
| 42 | +# =================================================== |
| 43 | +# |
| 44 | +df.plot.hexbin( |
| 45 | + x='sklearn.svm.classes.SVC(16)_C', |
| 46 | + y='sklearn.svm.classes.SVC(16)_gamma', |
| 47 | + C='value', |
| 48 | + reduce_C_function=np.mean, |
| 49 | + gridsize=25, |
| 50 | + title='SVM performance landscape', |
| 51 | +) |
| 52 | + |
| 53 | +#################################################################################################### |
| 54 | +# Option 2 - plotting via matplotlib |
| 55 | +# ================================== |
| 56 | +# |
| 57 | +import matplotlib.pyplot as plt |
| 58 | + |
| 59 | +fig, ax = plt.subplots() |
| 60 | + |
| 61 | +C = df['sklearn.svm.classes.SVC(16)_C'] |
| 62 | +gamma = df['sklearn.svm.classes.SVC(16)_gamma'] |
| 63 | +score = df['value'] |
| 64 | + |
| 65 | +# Plotting all evaluations: |
| 66 | +ax.plot(C, gamma, 'ko', ms=1) |
| 67 | +# Create a contour plot |
| 68 | +cntr = ax.tricontourf(C, gamma, score, levels=12, cmap="RdBu_r") |
| 69 | +# Adjusting the colorbar |
| 70 | +fig.colorbar(cntr, ax=ax, label="accuracy") |
| 71 | +# Adjusting the axis limits |
| 72 | +ax.set( |
| 73 | + xlim=(min(C), max(C)), |
| 74 | + ylim=(min(gamma), max(gamma)), |
| 75 | + xlabel="C (log10)", |
| 76 | + ylabel="gamma (log10)", |
| 77 | +) |
| 78 | +ax.set_title('SVM performance landscape') |
0 commit comments