-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathAlgorithm_for_Spam Detection with Logistic Regression.py
More file actions
34 lines (27 loc) · 1.24 KB
/
Algorithm_for_Spam Detection with Logistic Regression.py
File metadata and controls
34 lines (27 loc) · 1.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
# Existing Sample Data
data = {'Keywords': [0, 1, 0, 2, 1], 'Length': [200, 800, 300, 1000, 700], 'Links': [1, 3, 0, 5, 2], 'Spam': [0, 1, 0, 1, 1]}
df = pd.DataFrame(data)
# Increase the dataset size by generating more samples
additional_data = df.sample(20, replace=True, random_state=42) # Increase the number here as needed
enlarged_df = pd.concat([df, additional_data]).reset_index(drop=True)
# Model Selection and Feature Selection
X = enlarged_df[['Keywords', 'Length', 'Links']] # Independent variables
y = enlarged_df['Spam'] # Dependent variable
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Model Training
model = LogisticRegression()
model.fit(X_train, y_train)
# Model Validation
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Confusion Matrix:")
print(conf_matrix)