Question
Hello, I hope you are doing well, I am doing my project, which is Ransomware attack detection using Deep learning CNNs I got a result
Hello, I hope you are doing well,
I am doing my project, which is Ransomware attack detection using Deep learning CNNs
I got a result of 0.93, which is not good enough, I have tried to improve my accuracy, but I gave up. Could you modify the code feature selection or pre-processing to get a higher result? Even for that result, I have to wait a long time to get it because I increased the epoch, and could you explain the added part?
import os
import numpy as np
import pandas as pd
import keras
import tensorflow as tf
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152
# os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
from keras.preprocessing.image import ImageDataGenerator #, load_img
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
print(os.listdir("D:\RansomSecondApproach\Ransomware_Detection_using _CNN\MixImages"))
# Define Constants
FAST_RUN = False
IMAGE_WIDTH=128 # maybe 256
IMAGE_HEIGHT=128 # maybe 256
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNELS=3 # maybe not need
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print(physical_devices)
if physical_devices:
tf.config.experimental.set_memory_growth(physical_devices[0], True)
# Prepare Traning Data
filenames = os.listdir("D:\RansomSecondApproach\Ransomware_Detection_using _CNN\MixImages")
categories = []
for filename in filenames:
category = filename.split('l')[0]
if category == 'image_benign_':
categories.append(0)
else:
categories.append(1)
df = pd.DataFrame({
'filename': filenames,
'category': categories
})
print(df.head())
print(df.tail())
# in collab it will work df['category'].value_counts().plot.bar()
# See sample image
# sample = random.choice(filenames)
# image = load_img("D:\Ransomware_Detection_using _CNN\MixImages"+sample)
# plt.imshow(image)
# in collab it will work df['category'].value_counts().plot.bar()
# Build Model
#===============================================================================================================
# Testing
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax')) # 2 because we have cat and dog classes
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
model.summary()
# Callbacks
## Early Stop To prevent over fitting we will stop the learning after 10 epochs and val_loss value not decreased
earlystop = EarlyStopping(patience=10)
# Learning Rate Reduction
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc',
patience=2,
verbose=1,
factor=0.5,
min_lr=0.00001)
callbacks = [earlystop, learning_rate_reduction]
# Prepare data
df["category"] = df["category"].replace({0: 'benign', 1: 'malware'})
train_df, validate_df = train_test_split(df, test_size=0.20, random_state=42)
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)
##plot in collab
###train_df['category'].value_counts().plot.bar()
###validate_df['category'].value_counts().plot.bar()
total_train = train_df.shape[0]
total_validate = validate_df.shape[0]
batch_size = 512
# Traning Generator
train_datagen = ImageDataGenerator(
rotation_range=15,
rescale=1./255,
shear_range=0.1,
zoom_range=0.2,
horizontal_flip=True,
width_shift_range=0.1,
height_shift_range=0.1
)
train_generator = train_datagen.flow_from_dataframe(
train_df,
"D:\RansomSecondApproach\Ransomware_Detection_using _CNN\MixImages",
x_col='filename',
y_col='category',
target_size=IMAGE_SIZE,
class_mode='categorical',
batch_size=batch_size
)
# Validation Generator
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
validate_df,
"D:\RansomSecondApproach\Ransomware_Detection_using _CNN\MixImagess",
x_col='filename',
y_col='category',
target_size=IMAGE_SIZE,
class_mode='categorical',
batch_size=batch_size
)
# See how our generator work
example_df = train_df.sample(n=1).reset_index(drop=True)
example_generator = train_datagen.flow_from_dataframe(
example_df,
"D:\RansomSecondApproach\Ransomware_Detection_using _CNN\MixImages",
x_col='filename',
y_col='category',
target_size=IMAGE_SIZE,
class_mode='categorical'
)
epochs = 30 # if FAST_RUN else 50
history = model.fit(
train_generator,
epochs=epochs,
validation_data=validation_generator,
validation_steps=total_validate//batch_size,
steps_per_epoch=total_train//batch_size,
callbacks=callbacks
)
# Save Model
model.save_weights("model.h5")
test_filenames = os.listdir("D:\RansomSecondApproach\Ransomware_Detection_using _CNN\MixImages")
test_df = pd.DataFrame({
'filename': test_filenames
})
nb_samples = test_df.shape[0]
# Create Testing Generator
# output Found 12500 images in kaggle.
test_gen = ImageDataGenerator(rescale=1./255)
test_generator = test_gen.flow_from_dataframe(
test_df,
"D:\RansomSecondApproach\Ransomware_Detection_using _CNN\MixImages",
x_col='filename',
y_col=None,
class_mode=None,
target_size=IMAGE_SIZE,
batch_size=batch_size,
shuffle=False
)
# Predict
predict = model.predict(test_generator, steps=np.ceil(nb_samples/batch_size))
test_df['category'] = np.argmax(predict, axis=-1)
label_map = dict((v,k) for k,v in train_generator.class_indices.items())
test_df['category'] = test_df['category'].replace(label_map)
test_df['category'] = test_df['category'].replace({ 'malware': 1, 'bengin': 0 })
# Submission
submission_df = test_df.copy()
submission_df['id'] = submission_df['filename'].str.split('.').str[0]
submission_df['label'] = submission_df['category']
submission_df.drop(['filename', 'category'], axis=1, inplace=True)
submission_df.to_csv('submission.csv', index=False)
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started