ML_Assignment1/LogisticRegression.py at master · DiarmaidMckeagney/ML_Assignment1 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from sklearn import linear_model, metrics
import SupportVectorMachines as svm # NB: Very important that the SVM file is in the same directory as this file uses some function from that file.
import timeit

if __name__ == '__main__':
    start_time = timeit.default_timer() # getting start time

    training_data, training_labels = svm.importDataFromCSV('wildfires_training.csv') # importing dataset
    test_data, test_labels = svm.importDataFromCSV('wildfires_test.csv')

    c_values = [0.001, 0.01, 0.1, 1, 10, 100] # values for C hyperparameter
    solvers = ['newton-cg','newton-cholesky','lbfgs','liblinear','sag','saga'] # values for Solver hyperparameter

    with open('output_data_lr.txt', 'w') as output_file:
        # Since the default values of LogR are included in the combinations, I don't need to run dedicated model for it.

        for C in c_values:
            for solver in solvers: # testing all combinations of hyperparameters
                model = linear_model.LogisticRegression(penalty='l2', C=C, solver=solver, max_iter=10000) #L2 is the only penalty type that works with all solvers. The 10000 iterations ensures that the model has time to converge.
                model.fit(training_data, training_labels)
                predictions = model.predict(test_data)

                output_file.write(f"{C},{solver}:\n{metrics.classification_report(test_labels, predictions, output_dict=True)}\n") # writing report to output file

        output_file.close()
        svm.processOutputData('output_data_lr.txt', False) # generating graphs

        time_elapsed = timeit.default_timer() - start_time # getting end time
        print(f"Time elapsed: {time_elapsed:.2f} seconds")