Answered step by step
Verified Expert Solution
Link Copied!

Question

1 Approved Answer

could someone help me change all the names and function names and make this code very unique since i get high plagrism import pandas

could someone help me change all the names and function names and make this code very unique since i get high plagrism

 

import pandas as pd

import numpy as np

from sklearn.neighbors import KNeighborsClassifier

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier

from sklearn.svm import SVC

from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.metrics import confusion_matrix, recall_score, precision_recall_curve, auc

import matplotlib.pyplot as plt

import seaborn as sns


 

# Load the dataset

data_path = r'C:\Users\john3\Desktop\cyber security analytics sit 384\10.1HD\creditcard.csv'

data = pd.read_csv(data_path)


 

# Preprocess the dataset

scaler = StandardScaler()

data['scaled_amount'] = scaler.fit_transform(data['Amount'].values.reshape(-1, 1))

data['scaled_time'] = scaler.fit_transform(data['Time'].values.reshape(-1, 1))

data.drop(['Time', 'Amount'], axis=1, inplace=True)


 

# Split the dataset into train and test sets

X = data.drop('Class', axis=1)

y = data['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)


 

# Create the undersampled dataset

fraud_indices = np.array(data[data.Class == 1].index)

normal_indices = np.array(data[data.Class == 0].index)

undersample_size = len(fraud_indices)

random_normal_indices = np.random.choice(normal_indices, undersample_size, replace=False)

random_normal_indices = np.array(random_normal_indices)

undersampled_indices = np.concatenate([fraud_indices, random_normal_indices])


 

undersampled_data = data.iloc[undersampled_indices, :]

X_undersampled = undersampled_data.drop('Class', axis=1)

y_undersampled = undersampled_data['Class']


 

# Split the undersampled dataset into train and test sets

X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample = train_test_split(X_undersampled, y_undersampled, test_size=0.3, random_state=0)

def print_gridsearch_scores(clf, param, x_train, y_train):

    grid_clf = GridSearchCV(clf, param, scoring='recall', cv=5)

    grid_clf.fit(x_train, y_train)

   

    print(f"Best parameters: {grid_clf.best_params_}")

    print(f"Best score: {grid_clf.best_score_}")

    return grid_clf.best_params_


 

def plot_confusion_matrix(cm, title):

    sns.heatmap(cm, annot=True, cmap="YlGnBu", fmt='d', linewidths=.5)

    plt.xlabel("Predicted")

    plt.ylabel("True")

    plt.title(title)


 

def predict_plot_test(clf, x_train, y_train, x_test, y_test):

    clf.fit(x_train, y_train)

    y_pred = clf.predict(x_test)

   

    cm = confusion_matrix(y_test, y_pred)

    plot_confusion_matrix(cm, f"Confusion Matrix for {clf.__class__.__name__}")


 

def plot_recall_for_threshold(clf, x_train, y_train, x_test, y_test, thresholds):

    clf.fit(x_train, y_train)

    y_pred_proba = clf.predict_proba(x_test)[:, 1]

   

    recalls = []

    for t in thresholds:

        y_pred = (y_pred_proba >= t).astype(int)

        recalls.append(recall_score(y_test, y_pred))

   

    plt.plot(thresholds, recalls)

    plt.xlabel("Threshold")

    plt.ylabel("Recall")

    plt.title(f"Recall for different thresholds for {clf.__class__.__name__}")


 

def plot_precision_recall(clf, x_train, y_train, x_test, y_test):

    clf.fit(x_train, y_train)

    y_pred_proba = clf.predict_proba(x_test)[:, 1]

    precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)

    pr_auc = auc(recall, precision)


 

    plt.plot(recall, precision)

    plt.xlabel("Recall")

    plt.ylabel("Precision")

    plt.title(f"Precision-Recall curve for {clf.__class__.__name__} (AUC = {pr_auc:0.2f})")


 

# Parameters for classifiers

knn_params = {'n_neighbors': [1, 2, 3, 4, 5]}

dt_params = {'max_leaf_nodes': [10, 15, 20, 25, 30]}

rf_params = {'n_estimators': [5, 10, 20, 50]}

svc_params = {'gamma': [0.001, 0.01, 0.1, 1, 10], 'C': [0.01, 0.1, 1, 10, 100]}


 

# Initialize classifiers

knn = KNeighborsClassifier()

dt = DecisionTreeClassifier(random_state=0)

rf = RandomForestClassifier(random_state=0)

svc = SVC(random_state=0, probability=True)


 

# Perform the tasks for each classifier

classifiers = [

    (knn, knn_params),

    (dt, dt_params),

    (rf, rf_params),

    (svc, svc_params)

]


 

thresholds = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]


 

for clf, params in classifiers:

    best_params = print_gridsearch_scores(clf, params, X_train_undersample, y_train_undersample)

    clf.set_params(**best_params)

   

    plt.figure()

    predict_plot_test(clf, X_train_undersample, y_train_undersample, X_test_undersample, y_test_undersample)

   

    plt.figure()

    plot_recall_for_threshold(clf, X_train_undersample, y_train_undersample, X_test_undersample, y_test_undersample, thresholds)

   

    plt.figure()

    plot_precision_recall(clf, X_train_undersample, y_train_undersample, X_test_undersample, y_test_undersample)

   

plt.show()

Step by Step Solution

There are 3 Steps involved in it

Step: 1

blur-text-image

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

blur-text-image

Step: 3

blur-text-image

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

Global Strategy

Authors: Mike W. Peng

5th Edition

0357512367, 978-0357512364

More Books

Students also viewed these Programming questions

Question

3. Vary your pace and volume in speaking. Use silence for emphasis.

Answered: 1 week ago

Question

Discuss the various types of policies ?

Answered: 1 week ago

Question

Briefly explain the various types of leadership ?

Answered: 1 week ago

Question

Explain the need for and importance of co-ordination?

Answered: 1 week ago

Question

Explain the contribution of Peter F. Drucker to Management .

Answered: 1 week ago

Question

2. How do we perceive middle-frequency sounds (100 to 4000 Hz)?

Answered: 1 week ago

Question

20. What is a feature detector?

Answered: 1 week ago