Question
I am working on this code for my project, but the accuracy is 0.9516013654843938, I need to improve the accuracy by using feature selection and
I am working on this code for my project, but the accuracy is 0.9516013654843938, I need to improve the accuracy by using feature selection and pre-processing to get a higher result, could you please modify the code in a way that I can achieve a better outcome?
#Importing Libraries (ANN)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import roc_auc_score
import warnings
malData = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/MalwareData.csv", sep='|')
malData.head()
malData.shape
malData.describe()
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.hist(malData['legitimate'],20)
plt.show()
#Data Cleaning
y=malData['legitimate']
malData = malData.drop(['legitimate'], axis=1)
malData = malData.drop(['Name'], axis=1)
malData = malData.drop(['md5'], axis=1)
print("The Name and md5 variables are reomved successfully")
#Spliting the dataset into test and train
x_train, x_test, y_train, y_test = train_test_split(malData,y,test_size=0.2, random_state=4)
x_train.shape
NNmodel = Sequential()
NNmodel.add(Dense(32, input_dim=54, activation = "relu"))
NNmodel.add(Dense(16, activation= "relu"))
NNmodel.add(Dense(8, activation= "sigmoid"))
NNmodel.add(Dense(4, activation= "relu"))
NNmodel.add(Dense(1, activation= "sigmoid"))
NNmodel.summary()
NNmodel.compile(loss = "binary_crossentropy", optimizer = "rmsprop", metrics = ["accuracy"])
#fit Model
NNmodel.fit(x_train, y_train, epochs=5, batch_size=32)
# Accuracy on the training dataset
trainPred = NNmodel.predict(x_train)
trainPred = [1 if y>=0.5 else 0 for y in trainPred]
print(accuracy_score(y_train, trainPred))
# Accuracy of the test dataset
y_prediction=NNmodel.predict(x_test)
y_prediction=[1 if y>=0.5 else 0 for y in y_prediction]
precision = precision_score(y_test, y_prediction)
print('Precision: %f' % precision)
# recall: tp / (tp + fn)
recall = recall_score(y_test, y_prediction)
print('Recall: %f' % recall)
# f1: 2 tp / (2 tp + fp + fn
# f1: 2 tp / (2 tp + fp + fn)
f1 = f1_score(y_test, y_prediction)
print('F1 score: %f' % f1)
# kappa
kappa = cohen_kappa_score(y_test, y_prediction)
print('Cohens kappa: %f' % kappa)
# ROC AUC
auc = roc_auc_score(y_test, y_prediction)
print('ROC AUC: %f' % auc)
# confusion matrix
matrix = confusion_matrix(y_test, y_prediction)
print(matrix)
print(accuracy_score(y_test, y_prediction))
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started