Answered step by step

Verified Expert Solution

Link Copied!

Question

1 Approved Answer

Posted on Sep 08, 2024

import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.model _ selection import train _ test _ split from sklearn.utils import

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

from sklearn.model

_

selection import train

_

test

_

split

from sklearn.utils import resample

from sklearn.naive

_

bayes import GaussianNB

from sklearn.neighbors import KNeighborsClassifier

from sklearn.svm import SVC

from sklearn.linear

_

model import LogisticRegression

from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier

from xgboost import XGBClassifier

from sklearn.metrics import accuracy

_

score, precision

_

score, recall

_

score, f

1_

score, roc

_

auc

_

score

from sklearn.model

_

selection import GridSearchCV

# Load the dataset

=

.

read

_

csv

("

dataset

.

csv

")

(

)

Calculate the number of cases of manipulators versus non

-

manipulators in the dataset and draw a bar plot.

# Count the number of manipulators and non

-

manipulators

manipulator

_

counts

=

['

MANIPULATOR

'] .

value

_

counts

()

# Plot the bar plot

plt

.

bar

(

manipulator

_

counts.index, manipulator

_

counts.values

)

plt

.

xlabel

('

Manipulator

')

plt

.

ylabel

('

Count

')

plt

.

title

('

Manipulator vs Non

-

Manipulator Counts'

)

plt

.

xticks

([0, 1], ['

Non

-

Manipulator', 'Manipulator'

])

plt

.

show

()

(

)

Create an

80

20

partition and find the number of positives in the test data.

# Split the data into train and test sets

_

train, X

_

test, y

_

train, y

_

test

=

train

_

test

_

split

(

.

drop

('

MANIPULATOR

',

axis

= 1),

['

MANIPULATOR

'],

test

_

size

= 0.2,

random

_

state

= 42)

# Count the number of positives in the test data

positives

_

_

test

=

_

test.sum

()

("

Number of positives in the test data:", positives

_

_

test

)

(

)

Upsample the dataset to create a balanced dataset.

# Separate majority and minority classes

majority

_

class

=

[

['

MANIPULATOR

'] = = 0]

minority

_

class

=

[

['

MANIPULATOR

'] = = 1]

# Upsample minority class

minority

_

upsampled

=

resample

(

minority

_

class, replace

=

True, n

_

samples

=

len

(

majority

_

class

),

random

_

state

= 42)

# Combine majority class with upsampled minority class

balanced

_

=

.

concat

([

majority

_

class, minority

_

upsampled

])

# Check the class distribution in the balanced dataset

(

balanced

_

['

MANIPULATOR

'] .

value

_

counts

())

(

)

Build models using this balanced dataset.

# Define features and target variable

_

balanced

=

balanced

_

.

drop

('

MANIPULATOR

',

axis

= 1)

_

balanced

=

balanced

_

['

MANIPULATOR

']

# Initialize models

models

= {

"

ve Bayes": GaussianNB

(),

"

KNN

"

: KNeighborsClassifier

(),

"

SVM

"

: SVC

(),

"Logistic Regression": LogisticRegression

(),

"Random Forest": RandomForestClassifier

(),

"Adaboost": AdaBoostClassifier

(),

"Gradientboost": GradientBoostingClassifier

(),

"XGBoost": XGBClassifier

()

}

(

)

Comment on which metric should be given preference for this dataset.

# Since the dataset has imbalanced classes, precision, recall, and F

1 -

score are preferred metrics.

# We can also consider area under the ROC curve

(

ROC AUC

)

to evaluate the model's performance.

(

)

Finalize the model for each technique after Hyperparameter tuning using GridsearchCV based on the selected metric.

# Initialize results dictionary to store evaluation metrics

results

= {}

# Loop through each model

for name, model in models.items

()

# Perform GridSearchCV for hyperparameter tuning

grid

_

=

GridSearchCV

(

model

,

param

_

grid

= {},

scoring

='

1')

grid

_

search.fit

(

_

balanced, y

_

balanced

)

# Predict on test data

_

pred

=

grid

_

search.predict

(

_

test

)

# Evaluate model

accuracy

=

accuracy

_

score

(

_

test, y

_

pred

)

precision

=

precision

_

score

(

_

test, y

_

pred

)

recall

=

recall

_

score

(

_

test, y

_

pred

)

1 =

1_

score

(

_

test, y

_

pred

)

roc

_

auc

=

roc

_

auc

_

score

(

_

test, y

_

pred

)

# Store evaluation metrics in results dictionary

results

[

name

] = {'

Accuracy

'

: accuracy, 'Precision': precision, 'Recall': recall,

'

1

Score': f

1,

'ROC AUC': roc

_

auc

}

(

)

Compare the model performances with respect to different evaluation metrics.

results

_

=

.

DataFrame

(

results

)

(

results

_

)

(

)

Comment on the most important features for predicting the manipulators.

# We can use feature importance scores from models like Random Forest or XGBoost to determine the most important features.

(

)

Downsample the dataset to create a balanced dataset.

# We'll skip this part since we've already upsampled the dataset.

(

)

Finalize the model for each technique after Hyperparameter tuning using GridsearchCV based on the selected metric.

# Initialize results dictionary to store evaluation metrics

results

= {}

# Loop through each model

for name, model in models.items

()

# Perform GridSearchCV for hyperparameter tuning

if name

= = "

SVM

"

param

_

grid

= {'

'

[0.1, 1, 10, 100],

'gamma':

[1, 0.1, 0.01, 0.001],

'kernel':

['

rbf

',

'linear'

]}

elif name

= =

"Random Forest":

param

_

grid

= {'

_

estimators':

[100, 200, 300],

'max

_

features':

['

auto

',

'sqrt', 'log

2'],

'max

_

depth':

[10, 20, 30, 40, 50]}

elif nam

Code is not perfect started getting error after

# Split the balanced data into train and test sets

_

train

_

balanced, X

_

Step by Step Solution

There are 3 Steps involved in it

Step: 1

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

Step: 3

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

AutoCAD Database Connectivity

Authors: Scott McFarlane

1st Edition

★★★★★

f. What was your neighborhood like? Discuss your answers with classmates. Analyze how your own cultural position is unique and how it is similar to that of others.

Answered: 1 week ago

Previous Question Next Question