|
6 | 6 | """ |
7 | 7 |
|
8 | 8 | import openml |
9 | | -from pprint import pprint |
10 | 9 | from sklearn import compose, ensemble, impute, neighbors, preprocessing, pipeline, tree |
11 | 10 |
|
12 | 11 | ############################################################################ |
|
58 | 57 | # Run the flow |
59 | 58 | run = openml.runs.run_model_on_task(clf, task) |
60 | 59 |
|
61 | | -# pprint(vars(run), depth=2) |
| 60 | +print(run) |
62 | 61 |
|
63 | 62 | ############################################################################ |
64 | 63 | # Share the run on the OpenML server |
|
75 | 74 | # We can now also inspect the flow object which was automatically created: |
76 | 75 |
|
77 | 76 | flow = openml.flows.get_flow(run.flow_id) |
78 | | -pprint(vars(flow), depth=1) |
| 77 | +print(flow) |
79 | 78 |
|
80 | 79 | ############################################################################ |
81 | 80 | # It also works with pipelines |
82 | 81 | # ############################ |
83 | 82 | # |
84 | 83 | # When you need to handle 'dirty' data, build pipelines to model then automatically. |
85 | | -task = openml.tasks.get_task(115) |
| 84 | +task = openml.tasks.get_task(1) |
| 85 | +features = task.get_dataset().features |
| 86 | +nominal_feature_indices = [ |
| 87 | + i for i in range(len(features)) |
| 88 | + if features[i].name != task.target_name and features[i].data_type == 'nominal' |
| 89 | +] |
86 | 90 | pipe = pipeline.Pipeline(steps=[ |
87 | | - ('Imputer', impute.SimpleImputer(strategy='median')), |
88 | | - ('OneHotEncoder', preprocessing.OneHotEncoder(sparse=False, handle_unknown='ignore')), |
| 91 | + ( |
| 92 | + 'Preprocessing', |
| 93 | + compose.ColumnTransformer([ |
| 94 | + ('Nominal', pipeline.Pipeline( |
| 95 | + [ |
| 96 | + ('Imputer', impute.SimpleImputer(strategy='most_frequent')), |
| 97 | + ( |
| 98 | + 'Encoder', |
| 99 | + preprocessing.OneHotEncoder( |
| 100 | + sparse=False, handle_unknown='ignore', |
| 101 | + ) |
| 102 | + ), |
| 103 | + ]), |
| 104 | + nominal_feature_indices, |
| 105 | + ), |
| 106 | + ]), |
| 107 | + ), |
89 | 108 | ('Classifier', ensemble.RandomForestClassifier(n_estimators=10)) |
90 | 109 | ]) |
91 | 110 |
|
|
0 commit comments