Question
could someone help me change all the names and function names and make this code very unique since i get high plagrism import pandas
could someone help me change all the names and function names and make this code very unique since i get high plagrism
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix, recall_score, precision_recall_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns
# Load the dataset
data_path = r'C:\Users\john3\Desktop\cyber security analytics sit 384\10.1HD\creditcard.csv'
data = pd.read_csv(data_path)
# Preprocess the dataset
scaler = StandardScaler()
data['scaled_amount'] = scaler.fit_transform(data['Amount'].values.reshape(-1, 1))
data['scaled_time'] = scaler.fit_transform(data['Time'].values.reshape(-1, 1))
data.drop(['Time', 'Amount'], axis=1, inplace=True)
# Split the dataset into train and test sets
X = data.drop('Class', axis=1)
y = data['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
# Create the undersampled dataset
fraud_indices = np.array(data[data.Class == 1].index)
normal_indices = np.array(data[data.Class == 0].index)
undersample_size = len(fraud_indices)
random_normal_indices = np.random.choice(normal_indices, undersample_size, replace=False)
random_normal_indices = np.array(random_normal_indices)
undersampled_indices = np.concatenate([fraud_indices, random_normal_indices])
undersampled_data = data.iloc[undersampled_indices, :]
X_undersampled = undersampled_data.drop('Class', axis=1)
y_undersampled = undersampled_data['Class']
# Split the undersampled dataset into train and test sets
X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample = train_test_split(X_undersampled, y_undersampled, test_size=0.3, random_state=0)
def print_gridsearch_scores(clf, param, x_train, y_train):
grid_clf = GridSearchCV(clf, param, scoring='recall', cv=5)
grid_clf.fit(x_train, y_train)
print(f"Best parameters: {grid_clf.best_params_}")
print(f"Best score: {grid_clf.best_score_}")
return grid_clf.best_params_
def plot_confusion_matrix(cm, title):
sns.heatmap(cm, annot=True, cmap="YlGnBu", fmt='d', linewidths=.5)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title(title)
def predict_plot_test(clf, x_train, y_train, x_test, y_test):
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
cm = confusion_matrix(y_test, y_pred)
plot_confusion_matrix(cm, f"Confusion Matrix for {clf.__class__.__name__}")
def plot_recall_for_threshold(clf, x_train, y_train, x_test, y_test, thresholds):
clf.fit(x_train, y_train)
y_pred_proba = clf.predict_proba(x_test)[:, 1]
recalls = []
for t in thresholds:
y_pred = (y_pred_proba >= t).astype(int)
recalls.append(recall_score(y_test, y_pred))
plt.plot(thresholds, recalls)
plt.xlabel("Threshold")
plt.ylabel("Recall")
plt.title(f"Recall for different thresholds for {clf.__class__.__name__}")
def plot_precision_recall(clf, x_train, y_train, x_test, y_test):
clf.fit(x_train, y_train)
y_pred_proba = clf.predict_proba(x_test)[:, 1]
precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)
pr_auc = auc(recall, precision)
plt.plot(recall, precision)
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title(f"Precision-Recall curve for {clf.__class__.__name__} (AUC = {pr_auc:0.2f})")
# Parameters for classifiers
knn_params = {'n_neighbors': [1, 2, 3, 4, 5]}
dt_params = {'max_leaf_nodes': [10, 15, 20, 25, 30]}
rf_params = {'n_estimators': [5, 10, 20, 50]}
svc_params = {'gamma': [0.001, 0.01, 0.1, 1, 10], 'C': [0.01, 0.1, 1, 10, 100]}
# Initialize classifiers
knn = KNeighborsClassifier()
dt = DecisionTreeClassifier(random_state=0)
rf = RandomForestClassifier(random_state=0)
svc = SVC(random_state=0, probability=True)
# Perform the tasks for each classifier
classifiers = [
(knn, knn_params),
(dt, dt_params),
(rf, rf_params),
(svc, svc_params)
]
thresholds = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
for clf, params in classifiers:
best_params = print_gridsearch_scores(clf, params, X_train_undersample, y_train_undersample)
clf.set_params(**best_params)
plt.figure()
predict_plot_test(clf, X_train_undersample, y_train_undersample, X_test_undersample, y_test_undersample)
plt.figure()
plot_recall_for_threshold(clf, X_train_undersample, y_train_undersample, X_test_undersample, y_test_undersample, thresholds)
plt.figure()
plot_precision_recall(clf, X_train_undersample, y_train_undersample, X_test_undersample, y_test_undersample)
plt.show()
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started