Answered step by step
Verified Expert Solution
Question
1 Approved Answer
User import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.compose import ColumnTransformer from sklearn.pipeline import
User
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from kmodes.kprototypes import KPrototypes
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.metrics import silhouettescore, calinskiharabaszscore, daviesbouldinscore
# Load the preprocessed dataset
filepath rC:UsersankitDownloadsAnkitprojAnkitprojClusteringAirTrafficPassengerStatistics.csv
data pdreadcsvfilepath
# Select the features for clustering
features Published Airline', 'GEO Region', 'Passenger Count', 'Adjusted Passenger Count', 'Year'
# Define the preprocessing for numerical and categorical features
numericalfeatures Passenger Count', 'Adjusted Passenger Count', 'Year'
categoricalfeatures Published Airline', 'GEO Region'
# Create a column transformer for preprocessing
preprocessor ColumnTransformer
transformers
num StandardScaler numericalfeatures
cat OneHotEncoder categoricalfeatures
remainder'passthrough'
# Convert categorical columns to onehot encoding
preprocessor ColumnTransformer
transformers
num StandardScaler numericalfeatures
cat OneHotEncoderdrop'first' categoricalfeatures # Use drop'first' to handle multicollinearity
remainder'passthrough'
# Modify preprocessing to handle categorical variables and sparse data
preprocessor ColumnTransformer
transformers
num StandardScaler numericalfeatures
cat OneHotEncoderdrop'first' categoricalfeatures # Adjust encoding method as needed
remainder'passthrough'
# Preprocess the data
try:
datapreprocessed preprocessor.fittransformdata
except ValueError as e:
printError during preprocessing:", e
# Handle the error, potentially by inspecting the data further or adjusting preprocessing steps
# Determine categorical feature indices for KPrototypes
catfeaturesindices listrangelennumericalfeatures lennumericalfeatures lencategoricalfeatures
# Perform KPrototypes clustering if data preprocessing was successful
if 'datapreprocessed' in locals:
# Perform KPrototypes clustering
kproto KPrototypesnclusters init'Cao', ninit verbose
clusterlabelskproto kproto.fitpredictdatapreprocessed, categoricalcatfeaturesindices
# Add cluster labels to the original dataframe
dataClusterKPrototypes' clusterlabelskproto
# Extract the cluster centroids
centroids kproto.clustercentroids
# Apply hierarchical clustering to the centroids
Z linkagecentroids method'ward'
# Plot dendrogram for the hierarchical clustering
pltfigurefigsize
dendrogramZ labelsfCluster i for i in rangelencentroids
plttitleHierarchical Clustering Dendrogram'
pltxlabelCluster
pltylabelDistance
pltshow
# Plot clusters in a scatter plot using original features
pltfigurefigsize
snsscatterplotx'Passenger Count', y'Adjusted Passenger Count', hue'ClusterKPrototypes', datadata, palette'viridis', alpha
pltxlabelPassenger Count'
pltylabelAdjusted Passenger Count'
plttitleHybrid Clustering: KPrototypes Hierarchical Clustering'
pltlegendtitle'Cluster'
pltshow
# Compute cluster evaluation metrics
# Silhouette Score
silhouetteavg silhouettescoredatapreprocessed, clusterlabelskproto
printfSilhouette Score: silhouetteavg:f
# CalinskiHarabasz Index
calinskiharabasz calinskiharabaszscoredatapreprocessed, clusterlabelskproto
printfCalinskiHarabasz Index: calinskiharabasz:f
# DaviesBouldin Index
daviesbouldin daviesbouldinscoredatapreprocessed, clusterlabelskproto
printfDaviesBouldin Index: daviesbouldin:f
Error during preprocessing: For a sparse output, all columns should be a numeric or convertible to a numeric.
my dataset columns value are index int
Activity Period int
Operating Airline object
Operating Airline IATA Code object
Published Airline object
Published Airline IATA Code object
GEO Summary object
GEO Region object
Activity Type Code object
Price Category Code object
Terminal object
Boarding Area object
Passenger Count int
Adjusted Activity Type Code object
Adjusted Passenger Count int
Year int
Month object
dtype: object
resolve the error, and provide error free code with the output.Kindly refr
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started