Skip to content

Commit cd3ba29

Browse files
committed
minor reformatting
1 parent 9ca9d87 commit cd3ba29

1 file changed

Lines changed: 32 additions & 30 deletions

File tree

examples/40_paper/2018_neurips_perrone_example.py

Lines changed: 32 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717
1818
In the following section, we shall do the following:
1919
20-
* Retrieve tasks and flows as used in the experiments by Perrone et al.
21-
* Build a tabular data by fetching the evaluations uploaded to OpenML
20+
* Retrieve tasks and flows as used in the experiments by Perrone et al. (2018).
21+
* Build a tabular data by fetching the evaluations uploaded to OpenML.
2222
* Impute missing values and handle categorical data before building a Random Forest model that
23-
maps hyperparameter values to the area under curve score
23+
maps hyperparameter values to the area under curve score.
2424
"""
2525

2626
############################################################################
@@ -35,15 +35,11 @@
3535
from sklearn.preprocessing import OneHotEncoder
3636
from sklearn.ensemble import RandomForestRegressor
3737

38-
39-
user_id = 2702
4038
flow_type = 'svm' # this example will use the smaller svm flow evaluations
4139
############################################################################
42-
43-
"""
44-
The subsequent functions are defined to fetch tasks, flows, evaluations and preprocess them into
45-
a tabular format that can be used to build models.
46-
"""
40+
# The subsequent functions are defined to fetch tasks, flows, evaluations and preprocess them into
41+
# a tabular format that can be used to build models.
42+
#
4743

4844
def fetch_evaluations(run_full=False,
4945
flow_type='svm',
@@ -69,15 +65,20 @@ def fetch_evaluations(run_full=False,
6965
'''
7066
# Collecting task IDs as used by the experiments from the paper
7167
if flow_type == 'svm' and run_full:
72-
task_ids = [10101, 145878, 146064, 14951, 34537, 3485, 3492, 3493, 3494, 37, 3889, 3891,
73-
3899, 3902, 3903, 3913, 3918, 3950, 9889, 9914, 9946, 9952, 9967, 9971, 9976,
74-
9978, 9980, 9983]
68+
task_ids = [
69+
10101, 145878, 146064, 14951, 34537, 3485, 3492, 3493, 3494,
70+
37, 3889, 3891, 3899, 3902, 3903, 3913, 3918, 3950, 9889,
71+
9914, 9946, 9952, 9967, 9971, 9976, 9978, 9980, 9983,
72+
]
7573
elif flow_type == 'svm' and not run_full:
7674
task_ids = [9983, 3485, 3902, 3903, 145878]
7775
elif flow_type == 'xgboost' and run_full:
78-
task_ids = [10093, 10101, 125923, 145847, 145857, 145862, 145872, 145878, 145953, 145972,
79-
145976, 145979, 146064, 14951, 31, 3485, 3492, 3493, 37, 3896, 3903, 3913,
80-
3917, 3918, 3, 49, 9914, 9946, 9952, 9967]
76+
task_ids = [
77+
10093, 10101, 125923, 145847, 145857, 145862, 145872, 145878,
78+
145953, 145972, 145976, 145979, 146064, 14951, 31, 3485,
79+
3492, 3493, 37, 3896, 3903, 3913, 3917, 3918, 3, 49, 9914,
80+
9946, 9952, 9967,
81+
]
8182
else: #flow_type == 'xgboost' and not run_full:
8283
task_ids = [3903, 37, 3485, 49, 3913]
8384

@@ -123,23 +124,24 @@ def create_table_from_evaluations(eval_df,
123124
if task_ids is not None:
124125
eval_df = eval_df[eval_df['task_id'].isin(task_ids)]
125126
if flow_type == 'svm':
126-
ncols = 4
127127
colnames = ['cost', 'degree', 'gamma', 'kernel']
128128
else:
129-
ncols = 10
130-
colnames = ['alpha', 'booster', 'colsample_bylevel', 'colsample_bytree', 'eta', 'lambda',
131-
'max_depth', 'min_child_weight', 'nrounds', 'subsample']
129+
colnames = [
130+
'alpha', 'booster', 'colsample_bylevel', 'colsample_bytree',
131+
'eta', 'lambda', 'max_depth', 'min_child_weight', 'nrounds',
132+
'subsample',
133+
]
132134
eval_df = eval_df.sample(frac=1) # shuffling rows
133-
run_ids = eval_df.loc[:,"run_id"][:run_count]
135+
run_ids = eval_df["run_id"][:run_count]
134136
eval_table = pd.DataFrame(np.nan, index=run_ids, columns=colnames)
135137
values = []
136-
for run_id in run_ids:
137-
r = openml.runs.get_run(run_id)
138+
runs = openml.runs.get_runs(run_ids)
139+
for r in runs:
138140
params = r.parameter_settings
139141
for p in params:
140142
name, value = p['oml:name'], p['oml:value']
141143
if name in colnames:
142-
eval_table.loc[run_id, name] = value
144+
eval_table.loc[r.run_id, name] = value
143145
values.append(r.evaluations[metric])
144146
return eval_table, values
145147

@@ -153,13 +155,14 @@ def list_categorical_attributes(flow_type='svm'):
153155
#############################################################################
154156
# Fetching the data from OpenML
155157
# *****************************
156-
# To read all the tasks and evaluations for them and collate into a table. Here, we are reading
157-
# all the tasks and evaluations for the SVM flow and pre-processing all retrieved evaluations.
158+
# Now, we read all the tasks and evaluations for them and collate into a table.
159+
# Here, we are reading all the tasks and evaluations for the SVM flow and
160+
# pre-processing all retrieved evaluations.
158161

159162
eval_df, task_ids, flow_id = fetch_evaluations(run_full=False, flow_type=flow_type)
160163
# run_count can not be passed if all the results are required
161-
# it is set to 1000 here arbitrarily to get results quickly
162-
X, y = create_table_from_evaluations(eval_df, run_count=1000, flow_type=flow_type)
164+
# it is set to 500 here arbitrarily to get results quickly
165+
X, y = create_table_from_evaluations(eval_df, run_count=500, flow_type=flow_type)
163166
print(X.head())
164167
print("Y : ", y[:5])
165168

@@ -218,6 +221,7 @@ def list_categorical_attributes(flow_type='svm'):
218221
# The surrogate model built from a task's evaluations fetched from OpenML will be put into
219222
# trivial action here, where we shall randomly sample configurations and observe the trajectory
220223
# of the area under curve (auc) we can obtain from the surrogate we've built.
224+
#
221225
# NOTE: This section is written exclusively for the SVM flow
222226

223227
# Sampling random configurations
@@ -246,8 +250,6 @@ def random_sample_configurations(num_samples=100):
246250

247251
# plotting the regret curve
248252
plt.plot(regret)
249-
# plt.yscale('log')
250253
plt.title('AUC regret for Random Search on surrogate')
251254
plt.xlabel('Numbe of function evaluations')
252255
plt.ylabel('Regret')
253-
plt.show()

0 commit comments

Comments
 (0)