Answered step by step
Verified Expert Solution
Question
1 Approved Answer
#Data Visualization import seaborn as sns import matplotlib.pyplot as plt sns . barplot ( x = class, y = data [ class ]
#Data Visualization import seaborn as sns import matplotlib.pyplot as plt snsbarplotx"class", ydataclassindex, palette'mako', datamushroomdata #The number of poisonous mushrooms is almost twice the number of normal mushrooms. There is an imbalance data problem. #We will be using Matplotlib pyplot and Seaborn to plot our data. # from sklearn import preprocessing #Label encoding is used to convert categorical features to numerical values. def labelencodefitmushroomdata, columns: result mushroomdata.copy encoders for column in columns: encoder preprocessing.LabelEncoder resultcolumn encoder.fittransformresultcolumn encoderscolumn encoder return result, encoders # data encoders labelencodefitdatadata.columns datahead # def correlationmapmushroomdata, method: corr mushroomdata.corrmethod ix corr.sortvaluesclass ascendingFalseindex dfsortedbycorrelation mushroomdata.loc: ix corr dfsortedbycorrelation.corrmethod pltsubplotsfigsize with snsaxesstylewhite: # display a correlation heatmap ax snsheatmapcorr annotTrue pltshow # correlationmapdata method"spearman" #Gillsize has the highest correlation with class. It should be included to the model. #There some highly correlated variables such as gillcolor & ringtype, gillcolor & bruises, bruises & stalksurfacebelowring etc. These highly correlated variables ohuld be discarded from the model to obtain more accurate results. # y dataclass # contains only "class", target, variable. X datailoc:: # contains independent variable. # from sklearn.featureselection import SelectKBest import numpy as np def SelectKBestCustomizedmushroomdata, k scorefunc, target"class": Xmushroomdata.dropcolumnstarget ymushroomdatatarget nprandom.seed # for mutualinfo regression fs SelectKBestscorefuncscorefunc, kk fsfitX y mask fsgetsupport selectedfeatures feature for bool, feature in zipmask Xcolumns if bool return selectedfeatures # from sklearn.featureselection import mutualinfoclassif mutualinfoclassifX y randomstate # mutualinfoselection SelectKBestCustomizeddata mutualinfoclassif # mutualinfoselection # Xnew Xodor'gillsize', 'gillcolor', 'stalksurfaceabovering', 'stalksurfacebelowring', 'stalkcolorabovering', 'stalkcolorbelowring', 'ringtype', 'sporeprintcolor' # dataselectedfeatures dataodor 'gillsize', 'gillcolor', 'stalksurfaceabovering', 'stalksurfacebelowring', 'stalkcolorabovering', 'stalkcolorbelowring', 'ringtype', 'sporeprintcolor', 'class' # a # number of rows b # number of columns c # initialize plot counter fig pltfigurefigsize for i in dataselectedfeatures: pltsubplota b c #plttitle subplot: formati a b c pltxlabeli snsbarplotxi ydataselectedfeaturesiindex, palette'Setr hue"class", datadataselectedfeatures c c pltshow THE PYTHON CODE GIVEN ABOVE IS RELATED TO RANDOM FOREST CLASSIFICATION IN THE DATA SCIENCE COURSE. PLEASE INTERPRET THIS CODE AND PREPARE A REPORT ACCORDING TO THE SUBJECTS AND CODES.
#Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt
snsbarplotx"class", ydataclassindex, palette'mako', datamushroomdata
#The number of poisonous mushrooms is almost twice the number of normal mushrooms. There is an imbalance data problem.
#We will be using Matplotlib pyplot and Seaborn to plot our data.
#
from sklearn import preprocessing
#Label encoding is used to convert categorical features to numerical values.
def labelencodefitmushroomdata, columns:
result mushroomdata.copy
encoders
for column in columns:
encoder preprocessing.LabelEncoder
resultcolumn encoder.fittransformresultcolumn
encoderscolumn encoder
return result, encoders
#
data encoders labelencodefitdatadata.columns
datahead
#
def correlationmapmushroomdata, method:
corr mushroomdata.corrmethod
ix corr.sortvaluesclass ascendingFalseindex
dfsortedbycorrelation mushroomdata.loc: ix
corr dfsortedbycorrelation.corrmethod
pltsubplotsfigsize
with snsaxesstylewhite:
# display a correlation heatmap
ax snsheatmapcorr annotTrue
pltshow
#
correlationmapdata method"spearman"
#Gillsize has the highest correlation with class. It should be included to the model.
#There some highly correlated variables such as gillcolor & ringtype, gillcolor & bruises, bruises & stalksurfacebelowring etc. These highly correlated variables ohuld be discarded from the model to obtain more accurate results.
#
y dataclass # contains only "class", target, variable.
X datailoc:: # contains independent variable.
#
from sklearn.featureselection import SelectKBest
import numpy as np
def SelectKBestCustomizedmushroomdata, k scorefunc, target"class":
Xmushroomdata.dropcolumnstarget
ymushroomdatatarget
nprandom.seed # for mutualinfo regression
fs SelectKBestscorefuncscorefunc, kk
fsfitX y
mask fsgetsupport
selectedfeatures feature for bool, feature in zipmask Xcolumns if bool
return selectedfeatures
#
from sklearn.featureselection import mutualinfoclassif
mutualinfoclassifX y randomstate
#
mutualinfoselection SelectKBestCustomizeddata mutualinfoclassif
#
mutualinfoselection
#
Xnew Xodor'gillsize',
'gillcolor',
'stalksurfaceabovering',
'stalksurfacebelowring',
'stalkcolorabovering',
'stalkcolorbelowring',
'ringtype',
'sporeprintcolor'
#
dataselectedfeatures dataodor
'gillsize',
'gillcolor',
'stalksurfaceabovering',
'stalksurfacebelowring',
'stalkcolorabovering',
'stalkcolorbelowring',
'ringtype',
'sporeprintcolor',
'class'
#
a # number of rows
b # number of columns
c # initialize plot counter
fig pltfigurefigsize
for i in dataselectedfeatures:
pltsubplota b c
#plttitle subplot: formati a b c
pltxlabeli
snsbarplotxi ydataselectedfeaturesiindex, palette'Setr hue"class", datadataselectedfeatures
c c
pltshow THE PYTHON CODE GIVEN ABOVE IS RELATED TO RANDOM FOREST CLASSIFICATION IN THE DATA SCIENCE COURSE.
PLEASE INTERPRET THIS CODE AND PREPARE A REPORT ACCORDING TO THE SUBJECTS AND CODES.
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started