Answered step by step
Verified Expert Solution
Question
1 Approved Answer
import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.model _ selection import train _ test _ split from sklearn.utils import
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.modelselection import traintestsplit
from sklearn.utils import resample
from sklearn.naivebayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linearmodel import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracyscore, precisionscore, recallscore, fscore, rocaucscore
from sklearn.modelselection import GridSearchCV
# Load the dataset
df pdreadcsvdatasetcsv
# A Calculate the number of cases of manipulators versus nonmanipulators in the dataset and draw a bar plot.
# Count the number of manipulators and nonmanipulators
manipulatorcounts dfMANIPULATORvaluecounts
# Plot the bar plot
pltbarmanipulatorcounts.index, manipulatorcounts.values
pltxlabelManipulator
pltylabelCount
plttitleManipulator vs NonManipulator Counts'
pltxticksNonManipulator', 'Manipulator'
pltshow
# B Create an : partition and find the number of positives in the test data.
# Split the data into train and test sets
Xtrain, Xtest, ytrain, ytest traintestsplitdfdropMANIPULATOR axis dfMANIPULATOR testsize randomstate
# Count the number of positives in the test data
positivesintest ytest.sum
printNumber of positives in the test data:", positivesintest
# C Upsample the dataset to create a balanced dataset.
# Separate majority and minority classes
majorityclass dfdfMANIPULATOR
minorityclass dfdfMANIPULATOR
# Upsample minority class
minorityupsampled resampleminorityclass, replaceTrue, nsampleslenmajorityclass randomstate
# Combine majority class with upsampled minority class
balanceddf pdconcatmajorityclass, minorityupsampled
# Check the class distribution in the balanced dataset
printbalanceddfMANIPULATORvaluecounts
# D Build models using this balanced dataset.
# Define features and target variable
Xbalanced balanceddfdropMANIPULATOR axis
ybalanced balanceddfMANIPULATOR
# Initialize models
models
Nave Bayes": GaussianNB
KNN: KNeighborsClassifier
SVM: SVC
"Logistic Regression": LogisticRegression
"Random Forest": RandomForestClassifier
"Adaboost": AdaBoostClassifier
"Gradientboost": GradientBoostingClassifier
"XGBoost": XGBClassifier
# E Comment on which metric should be given preference for this dataset.
# Since the dataset has imbalanced classes, precision, recall, and Fscore are preferred metrics.
# We can also consider area under the ROC curve ROC AUC to evaluate the model's performance.
# F Finalize the model for each technique after Hyperparameter tuning using GridsearchCV based on the selected metric.
# Initialize results dictionary to store evaluation metrics
results
# Loop through each model
for name, model in models.items:
# Perform GridSearchCV for hyperparameter tuning
gridsearch GridSearchCVmodel paramgrid scoringf
gridsearch.fitXbalanced, ybalanced
# Predict on test data
ypred gridsearch.predictXtest
# Evaluate model
accuracy accuracyscoreytest, ypred
precision precisionscoreytest, ypred
recall recallscoreytest, ypred
f fscoreytest, ypred
rocauc rocaucscoreytest, ypred
# Store evaluation metrics in results dictionary
resultsnameAccuracy: accuracy, 'Precision': precision, 'Recall': recall, F Score': f 'ROC AUC': rocauc
# G Compare the model performances with respect to different evaluation metrics.
resultsdf pdDataFrameresults
printresultsdf
# H Comment on the most important features for predicting the manipulators.
# We can use feature importance scores from models like Random Forest or XGBoost to determine the most important features.
# I Downsample the dataset to create a balanced dataset.
# We'll skip this part since we've already upsampled the dataset.
# F Finalize the model for each technique after Hyperparameter tuning using GridsearchCV based on the selected metric.
# Initialize results dictionary to store evaluation metrics
results
# Loop through each model
for name, model in models.items:
# Perform GridSearchCV for hyperparameter tuning
if name SVM:
paramgrid C: 'gamma': 'kernel': rbf 'linear'
elif name "Random Forest":
paramgrid nestimators': 'maxfeatures': auto 'sqrt', 'log 'maxdepth':
elif nam
Code is not perfect started getting error after
# Split the balanced data into train and test sets
Xtrainbalanced, Xt
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started