Answered step by step

Verified Expert Solution

Link Copied!

Question

1 Approved Answer

Posted on Feb 29, 2024

could someone help me change all the names and function names and make this code very unique since i get high plagrism import pandas

could someone help me change all the names and function names and make this code very unique since i get high plagrism

import pandas as pd

import numpy as np

from sklearn.neighbors import KNeighborsClassifier

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier

from sklearn.svm import SVC

from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.metrics import confusion_matrix, recall_score, precision_recall_curve, auc

import matplotlib.pyplot as plt

import seaborn as sns

# Load the dataset

data_path = r'C:\Users\john3\Desktop\cyber security analytics sit 384\10.1HD\creditcard.csv'

data = pd.read_csv(data_path)

# Preprocess the dataset

scaler = StandardScaler()

data['scaled_amount'] = scaler.fit_transform(data['Amount'].values.reshape(-1, 1))

data['scaled_time'] = scaler.fit_transform(data['Time'].values.reshape(-1, 1))

data.drop(['Time', 'Amount'], axis=1, inplace=True)

# Split the dataset into train and test sets

X = data.drop('Class', axis=1)

y = data['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Create the undersampled dataset

fraud_indices = np.array(data[data.Class == 1].index)

normal_indices = np.array(data[data.Class == 0].index)

undersample_size = len(fraud_indices)

random_normal_indices = np.random.choice(normal_indices, undersample_size, replace=False)

random_normal_indices = np.array(random_normal_indices)

undersampled_indices = np.concatenate([fraud_indices, random_normal_indices])

undersampled_data = data.iloc[undersampled_indices, :]

X_undersampled = undersampled_data.drop('Class', axis=1)

y_undersampled = undersampled_data['Class']

# Split the undersampled dataset into train and test sets

X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample = train_test_split(X_undersampled, y_undersampled, test_size=0.3, random_state=0)

def print_gridsearch_scores(clf, param, x_train, y_train):

grid_clf = GridSearchCV(clf, param, scoring='recall', cv=5)

grid_clf.fit(x_train, y_train)

print(f"Best parameters: {grid_clf.best_params_}")

print(f"Best score: {grid_clf.best_score_}")

return grid_clf.best_params_

def plot_confusion_matrix(cm, title):

sns.heatmap(cm, annot=True, cmap="YlGnBu", fmt='d', linewidths=.5)

plt.xlabel("Predicted")

plt.ylabel("True")

plt.title(title)

def predict_plot_test(clf, x_train, y_train, x_test, y_test):

clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)

cm = confusion_matrix(y_test, y_pred)

plot_confusion_matrix(cm, f"Confusion Matrix for {clf.__class__.__name__}")

def plot_recall_for_threshold(clf, x_train, y_train, x_test, y_test, thresholds):

clf.fit(x_train, y_train)

y_pred_proba = clf.predict_proba(x_test)[:, 1]

recalls = []

for t in thresholds:

y_pred = (y_pred_proba >= t).astype(int)

recalls.append(recall_score(y_test, y_pred))

plt.plot(thresholds, recalls)

plt.xlabel("Threshold")

plt.ylabel("Recall")

plt.title(f"Recall for different thresholds for {clf.__class__.__name__}")

def plot_precision_recall(clf, x_train, y_train, x_test, y_test):

clf.fit(x_train, y_train)

y_pred_proba = clf.predict_proba(x_test)[:, 1]

precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)

pr_auc = auc(recall, precision)

plt.plot(recall, precision)

plt.xlabel("Recall")

plt.ylabel("Precision")

plt.title(f"Precision-Recall curve for {clf.__class__.__name__} (AUC = {pr_auc:0.2f})")

# Parameters for classifiers

knn_params = {'n_neighbors': [1, 2, 3, 4, 5]}

dt_params = {'max_leaf_nodes': [10, 15, 20, 25, 30]}

rf_params = {'n_estimators': [5, 10, 20, 50]}

svc_params = {'gamma': [0.001, 0.01, 0.1, 1, 10], 'C': [0.01, 0.1, 1, 10, 100]}

# Initialize classifiers

knn = KNeighborsClassifier()

dt = DecisionTreeClassifier(random_state=0)

rf = RandomForestClassifier(random_state=0)

svc = SVC(random_state=0, probability=True)

# Perform the tasks for each classifier

classifiers = [

(knn, knn_params),

(dt, dt_params),

(rf, rf_params),

(svc, svc_params)

]

thresholds = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

for clf, params in classifiers:

best_params = print_gridsearch_scores(clf, params, X_train_undersample, y_train_undersample)

clf.set_params(**best_params)

plt.figure()

predict_plot_test(clf, X_train_undersample, y_train_undersample, X_test_undersample, y_test_undersample)

plt.figure()

plot_recall_for_threshold(clf, X_train_undersample, y_train_undersample, X_test_undersample, y_test_undersample, thresholds)

plt.figure()

plot_precision_recall(clf, X_train_undersample, y_train_undersample, X_test_undersample, y_test_undersample)

plt.show()

Step by Step Solution

There are 3 Steps involved in it

Step: 1

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

Step: 3

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

Global Strategy

Authors: Mike W. Peng

5th Edition

0357512367, 978-0357512364

More Books

Students also viewed these Programming questions

Question

Wagenlucht Ice Cream Company is always trying to create new flavors of ice cream. They are market testing three kinds to find out which one has the best chance of becoming popular. They give small...

Answered: 1 week ago

Question

network. Consider the following model for a growing simple We adopt the following notation: N and L indicate respectively the total number of nodes and links of the network, A,, indicates the generic...

Answered: 1 week ago

★★★★★

22. When you wiggle your eyes back and forth, why dont you see a blur?

Answered: 1 week ago

Previous Question Next Question