Answered step by step
Verified Expert Solution
Link Copied!

Question

1 Approved Answer

Hello, I hope you are doing well, I am doing my project, which is Ransomware attack detection using Deep learning CNNs I got a result

Hello, I hope you are doing well,

I am doing my project, which is Ransomware attack detection using Deep learning CNNs

I got a result of 0.93, which is not good enough, I have tried to improve my accuracy, but I gave up. Could you modify the code feature selection or pre-processing to get a higher result? Even for that result, I have to wait a long time to get it because I increased the epoch, and could you explain the added part?

import os

import numpy as np

import pandas as pd

import keras

import tensorflow as tf

# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152

# os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

from keras.preprocessing.image import ImageDataGenerator #, load_img

from keras.utils import to_categorical

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

import random

from keras.models import Sequential

from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization

from keras.callbacks import EarlyStopping, ReduceLROnPlateau

print(os.listdir("D:\RansomSecondApproach\Ransomware_Detection_using _CNN\MixImages"))

# Define Constants

FAST_RUN = False

IMAGE_WIDTH=128 # maybe 256

IMAGE_HEIGHT=128 # maybe 256

IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)

IMAGE_CHANNELS=3 # maybe not need

physical_devices = tf.config.experimental.list_physical_devices('GPU')

print(physical_devices)

if physical_devices:

tf.config.experimental.set_memory_growth(physical_devices[0], True)

# Prepare Traning Data

filenames = os.listdir("D:\RansomSecondApproach\Ransomware_Detection_using _CNN\MixImages")

categories = []

for filename in filenames:

category = filename.split('l')[0]

if category == 'image_benign_':

categories.append(0)

else:

categories.append(1)

df = pd.DataFrame({

'filename': filenames,

'category': categories

})

print(df.head())

print(df.tail())

# in collab it will work df['category'].value_counts().plot.bar()

# See sample image

# sample = random.choice(filenames)

# image = load_img("D:\Ransomware_Detection_using _CNN\MixImages"+sample)

# plt.imshow(image)

# in collab it will work df['category'].value_counts().plot.bar()

# Build Model

#===============================================================================================================

# Testing

model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS)))

model.add(BatchNormalization())

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu'))

model.add(BatchNormalization())

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), activation='relu'))

model.add(BatchNormalization())

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Dropout(0.25))

model.add(Conv2D(256, (3, 3), activation='relu'))

model.add(BatchNormalization())

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(512, activation='relu'))

model.add(BatchNormalization())

model.add(Dropout(0.5))

model.add(Dense(2, activation='softmax')) # 2 because we have cat and dog classes

model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

model.summary()

# Callbacks

## Early Stop To prevent over fitting we will stop the learning after 10 epochs and val_loss value not decreased

earlystop = EarlyStopping(patience=10)

# Learning Rate Reduction

learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc',

patience=2,

verbose=1,

factor=0.5,

min_lr=0.00001)

callbacks = [earlystop, learning_rate_reduction]

# Prepare data

df["category"] = df["category"].replace({0: 'benign', 1: 'malware'})

train_df, validate_df = train_test_split(df, test_size=0.20, random_state=42)

train_df = train_df.reset_index(drop=True)

validate_df = validate_df.reset_index(drop=True)

##plot in collab

###train_df['category'].value_counts().plot.bar()

###validate_df['category'].value_counts().plot.bar()

total_train = train_df.shape[0]

total_validate = validate_df.shape[0]

batch_size = 512

# Traning Generator

train_datagen = ImageDataGenerator(

rotation_range=15,

rescale=1./255,

shear_range=0.1,

zoom_range=0.2,

horizontal_flip=True,

width_shift_range=0.1,

height_shift_range=0.1

)

train_generator = train_datagen.flow_from_dataframe(

train_df,

"D:\RansomSecondApproach\Ransomware_Detection_using _CNN\MixImages",

x_col='filename',

y_col='category',

target_size=IMAGE_SIZE,

class_mode='categorical',

batch_size=batch_size

)

# Validation Generator

validation_datagen = ImageDataGenerator(rescale=1./255)

validation_generator = validation_datagen.flow_from_dataframe(

validate_df,

"D:\RansomSecondApproach\Ransomware_Detection_using _CNN\MixImagess",

x_col='filename',

y_col='category',

target_size=IMAGE_SIZE,

class_mode='categorical',

batch_size=batch_size

)

# See how our generator work

example_df = train_df.sample(n=1).reset_index(drop=True)

example_generator = train_datagen.flow_from_dataframe(

example_df,

"D:\RansomSecondApproach\Ransomware_Detection_using _CNN\MixImages",

x_col='filename',

y_col='category',

target_size=IMAGE_SIZE,

class_mode='categorical'

)

epochs = 30 # if FAST_RUN else 50

history = model.fit(

train_generator,

epochs=epochs,

validation_data=validation_generator,

validation_steps=total_validate//batch_size,

steps_per_epoch=total_train//batch_size,

callbacks=callbacks

)

# Save Model

model.save_weights("model.h5")

test_filenames = os.listdir("D:\RansomSecondApproach\Ransomware_Detection_using _CNN\MixImages")

test_df = pd.DataFrame({

'filename': test_filenames

})

nb_samples = test_df.shape[0]

# Create Testing Generator

# output Found 12500 images in kaggle.

test_gen = ImageDataGenerator(rescale=1./255)

test_generator = test_gen.flow_from_dataframe(

test_df,

"D:\RansomSecondApproach\Ransomware_Detection_using _CNN\MixImages",

x_col='filename',

y_col=None,

class_mode=None,

target_size=IMAGE_SIZE,

batch_size=batch_size,

shuffle=False

)

# Predict

predict = model.predict(test_generator, steps=np.ceil(nb_samples/batch_size))

test_df['category'] = np.argmax(predict, axis=-1)

label_map = dict((v,k) for k,v in train_generator.class_indices.items())

test_df['category'] = test_df['category'].replace(label_map)

test_df['category'] = test_df['category'].replace({ 'malware': 1, 'bengin': 0 })

# Submission

submission_df = test_df.copy()

submission_df['id'] = submission_df['filename'].str.split('.').str[0]

submission_df['label'] = submission_df['category']

submission_df.drop(['filename', 'category'], axis=1, inplace=True)

submission_df.to_csv('submission.csv', index=False)

Step by Step Solution

There are 3 Steps involved in it

Step: 1

blur-text-image

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

blur-text-image

Step: 3

blur-text-image

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Students also viewed these Databases questions