Answered step by step
Verified Expert Solution
Link Copied!

Question

1 Approved Answer

# A klay c dosya olan 'agaricus - lepiota.names' dosyas n okuyarak i eri ine bak yoruz . file _ path = 'agaricus - lepiota.names'

# Aklayc dosya olan 'agaricus-lepiota.names' dosyasn okuyarak ieriine bakyoruz.
file_path = 'agaricus-lepiota.names'
# Dosyann ieriini okuma
with open(file_path, 'r') as file:
names_content = file.read()
# eriin ilk 10000 karakterini gstermek iin ksaltma yaparak ierii gsterelim
print(names_content[:10000]) # lk 10000 karakteri gsteriyoruz.
#%%
import pandas as pd
# Veri setini okuyarak DataFrame oluturma
data_file_path = 'agaricus-lepiota.data.csv'
column_names =['class', 'cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor',
'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color',
'stalk-shape', 'stalk-root', 'stalk-surface-above-ring',
'stalk-surface-below-ring', 'stalk-color-above-ring',
'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number',
'ring-type', 'spore-print-color', 'population', 'habitat']
mushroom_data = pd.read_csv(data_file_path, header=None, names=column_names)
# lk 5 satr gstererek veri yapsn inceleyelim
mushroom_data.head()
#%%
print(mushroom_data.describe())
#%%
mushroom_data.info()
#All features are categorical but given as 'object' type. First, it is converted into 'category' type.
#%%
from plotly.subplots import make_subplots
import plotly.graph_objects as go
specs =[[{'type':'domain'},{'type':'domain'},{'type':'domain'},{'type':'domain'},{'type':'domain'}],[{'type':'domain'},{'type':'domain'},{'type':'domain'},{'type':'domain'},{'type':'domain'}],[{'type':'domain'},{'type':'domain'},{'type':'domain'},{'type':'domain'},{'type':'domain'}],[{'type':'domain'},{'type':'domain'},{'type':'domain'},{'type':'domain'},{'type':'domain'}],[{'type':'domain'},{'type':'domain'},{'type':'domain'},{'type':'domain'},{'type':'domain'}]]
fig = make_subplots(rows=5, cols=5, specs=specs)
a=1
b=1
xx=-0.172
yy=1.267
l=[]
for col in mushroom_data.columns:
fig.add_trace(go.Pie(labels=[n for n in mushroom_data[col].value_counts().index], values=[v for v in mushroom_data[col].value_counts()*100/sum(mushroom_data[col].value_counts())], name=col),
a, b)
l.append(dict(text=col, x=xx+(0.225*b), y=yy-(0.222*a), font_size=10, showarrow=False))
a+=1
if a>5:
a=1
b+=1
fig.update(layout_title_text='Features',
layout_showlegend=False)
fig.update_layout(
title_font_family="Arial",
title_font_size=25,
annotations=l)
fig.update_traces(hole=.4, hoverinfo="label+percent+name",textinfo='none')
fig.show()
#%%
def make_categorical(mushroom_data, columns):
for column in columns:
mushroom_data[column]= pd.Categorical(mushroom_data[column])
return mushroom_data
#%%
columns = mushroom_data.columns
data = make_categorical(mushroom_data, columns)
#%%
mushroom_data.info()
#%%
mushroom_data.shape
#There 8124 observation and 23 features in mushroom dataset.
#%%
mushroom_data.isnull().any()
#There is no missing value in mushroom dataset.
#%%
# Veri trleri incelemesi
data_types = mushroom_data.dtypes
data_types
#%%
mushroom_data_habitat = mushroom_data['habitat'].groupby(mushroom_data['class']).value_counts(normalize= True).rename('frequency').to_frame().reset_index()
mushroom_data_habitat
#%%
from matplotlib import pyplot as plt
import seaborn as sns
# Let`s create a bar plot to show the Habitat Distribution
sns.barplot(data = mushroom_data_habitat,x='habitat',y='frequency',hue='class')
plt.xlabel('Habitat')
plt.ylabel('frequenncy(%)')
plt.title("Habitat Distribution: Edible Vs Poisonous");
#%%
mushroom_data_popu = mushroom_data['population'].groupby(mushroom_data['class']).value_counts(normalize= True).rename('frequency').to_frame().reset_index()
mushroom_data_popu
#%%
## Let`s create a bar plot to show the population: Edible Vs Poisonous
import plotly.express as px
fig = px.bar(data_frame= mushroom_data_popu , x= 'population', y= 'frequency',color= 'class', barmode= 'group', title='population: Edible Vs Poisonous')
fig.update_layout(xaxis_title= 'population' , yaxis_title= 'frequenncy(%)')
#%%
ring_num ={
'o': 1,
't': 2,
'n':0
}
ring_num
mushroom_data_ring = mushroom_data['ring-number'].replace(ring_num).groupby(mushroom_data['class']).value_counts(normalize= True).rename('frequency').to_frame().reset_index()
mushroom_data_ring
#%%
#Let`s create a barchart to visualize the number of rings in each class
fig = px.bar(data_frame= mushroom_data_ring , x = 'ring-number' , y= 'frequency' , color = 'class' , barmode= 'group',title= 'Number of rings: Edible Vs Poisonous')
fig.update_layout(xaxis_title= 'number of rings' , yaxis_title = 'frequency (%)') THE PYTHON CODE GIVEN ABOVE IS RELATED TO RANDOM FOREST CLASSIFICATION IN THE DATA SCIENCE COURSE.
PLEASE INTERPRET THIS CODE AND PREPARE A REPORT ACCORDING TO THE SUBJECTS AND CODES.

Step by Step Solution

There are 3 Steps involved in it

Step: 1

blur-text-image

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

blur-text-image

Step: 3

blur-text-image

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

More Books

Students also viewed these Databases questions