Answered step by step
Verified Expert Solution
Question
1 Approved Answer
User import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.compose import ColumnTransformer from sklearn.pipeline import
User import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from kmodes.kprototypes import KPrototypes from scipy.cluster.hierarchy import dendrogram, linkage from sklearn.metrics import silhouettescore, calinskiharabaszscore, daviesbouldinscore # Load the preprocessed dataset filepath rC:UsersankitDownloadsAnkitprojAnkitprojClusteringAirTrafficPassengerStatistics.csv data pdreadcsvfilepath # Select the features for clustering features Published Airline', 'GEO Region', 'Passenger Count', 'Adjusted Passenger Count', 'Year' # Define the preprocessing for numerical and categorical features numericalfeatures Passenger Count', 'Adjusted Passenger Count', 'Year' categoricalfeatures Published Airline', 'GEO Region' # Create a column transformer for preprocessing preprocessor ColumnTransformer transformers num StandardScaler numericalfeatures cat OneHotEncoder categoricalfeatures remainder'passthrough' # Convert categorical columns to onehot encoding preprocessor ColumnTransformer transformers num StandardScaler numericalfeatures cat OneHotEncoderdrop'first' categoricalfeatures # Use drop'first' to handle multicollinearity remainder'passthrough' # Modify preprocessing to handle categorical variables and sparse data preprocessor ColumnTransformer transformers num StandardScaler numericalfeatures cat OneHotEncoderdrop'first' categoricalfeatures # Adjust encoding method as needed remainder'passthrough' # Preprocess the data try: datapreprocessed preprocessor.fittransformdata except ValueError as e: printError during preprocessing:", e # Handle the error, potentially by inspecting the data further or adjusting preprocessing steps # Determine categorical feature indices for KPrototypes catfeaturesindices listrangelennumericalfeatures lennumericalfeatures lencategoricalfeatures # Perform KPrototypes clustering if data preprocessing was successful if 'datapreprocessed' in locals: # Perform KPrototypes clustering kproto KPrototypesnclusters init'Cao', ninit verbose clusterlabelskproto kproto.fitpredictdatapreprocessed, categoricalcatfeaturesindices # Add cluster labels to the original dataframe dataClusterKPrototypes' clusterlabelskproto # Extract the cluster centroids centroids kproto.clustercentroids # Apply hierarchical clustering to the centroids Z linkagecentroids method'ward' # Plot dendrogram for the hierarchical clustering pltfigurefigsize dendrogramZ labelsfCluster i for i in rangelencentroids plttitleHierarchical Clustering Dendrogram' pltxlabelCluster pltylabelDistance pltshow # Plot clusters in a scatter plot using original features pltfigurefigsize snsscatterplotx'Passenger Count', y'Adjusted Passenger Count', hue'ClusterKPrototypes', datadata, palette'viridis', alpha pltxlabelPassenger Count' pltylabelAdjusted Passenger Count' plttitleHybrid Clustering: KPrototypes Hierarchical Clustering' pltlegendtitle'Cluster' pltshow # Compute cluster evaluation metrics # Silhouette Score silhouetteavg silhouettescoredatapreprocessed, clusterlabelskproto printfSilhouette Score: silhouetteavg:f # CalinskiHarabasz Index calinskiharabasz calinskiharabaszscoredatapreprocessed, clusterlabelskproto printfCalinskiHarabasz Index: calinskiharabasz:f # DaviesBouldin Index daviesbouldin daviesbouldinscoredatapreprocessed, clusterlabelskproto printfDaviesBouldin Index: daviesbouldin:f Error during preprocessing: For a sparse output, all columns should be a numeric or convertible to a numeric. TypeError: kproto my dataset columns value are index int Activity Period int Operating Airline object Operating Airline IATA Code object Published Airline object Published Airline IATA Code object GEO Summary object GEO Region object Activity Type Code object Price Category Code object Terminal object Boarding Area object Passenger Count int Adjusted Activity Type Code object Adjusted Passenger Count int Year int Month object dtype: object resolve the error, and provide error free code with the output.Kindly refr
User
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from kmodes.kprototypes import KPrototypes
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.metrics import silhouettescore, calinskiharabaszscore, daviesbouldinscore
# Load the preprocessed dataset
filepath rC:UsersankitDownloadsAnkitprojAnkitprojClusteringAirTrafficPassengerStatistics.csv
data pdreadcsvfilepath
# Select the features for clustering
features Published Airline', 'GEO Region', 'Passenger Count', 'Adjusted Passenger Count', 'Year'
# Define the preprocessing for numerical and categorical features
numericalfeatures Passenger Count', 'Adjusted Passenger Count', 'Year'
categoricalfeatures Published Airline', 'GEO Region'
# Create a column transformer for preprocessing
preprocessor ColumnTransformer
transformers
num StandardScaler numericalfeatures
cat OneHotEncoder categoricalfeatures
remainder'passthrough'
# Convert categorical columns to onehot encoding
preprocessor ColumnTransformer
transformers
num StandardScaler numericalfeatures
cat OneHotEncoderdrop'first' categoricalfeatures # Use drop'first' to handle multicollinearity
remainder'passthrough'
# Modify preprocessing to handle categorical variables and sparse data
preprocessor ColumnTransformer
transformers
num StandardScaler numericalfeatures
cat OneHotEncoderdrop'first' categoricalfeatures # Adjust encoding method as needed
remainder'passthrough'
# Preprocess the data
try:
datapreprocessed preprocessor.fittransformdata
except ValueError as e:
printError during preprocessing:", e
# Handle the error, potentially by inspecting the data further or adjusting preprocessing steps
# Determine categorical feature indices for KPrototypes
catfeaturesindices listrangelennumericalfeatures lennumericalfeatures lencategoricalfeatures
# Perform KPrototypes clustering if data preprocessing was successful
if 'datapreprocessed' in locals:
# Perform KPrototypes clustering
kproto KPrototypesnclusters init'Cao', ninit verbose
clusterlabelskproto kproto.fitpredictdatapreprocessed, categoricalcatfeaturesindices
# Add cluster labels to the original dataframe
dataClusterKPrototypes' clusterlabelskproto
# Extract the cluster centroids
centroids kproto.clustercentroids
# Apply hierarchical clustering to the centroids
Z linkagecentroids method'ward'
# Plot dendrogram for the hierarchical clustering
pltfigurefigsize
dendrogramZ labelsfCluster i for i in rangelencentroids
plttitleHierarchical Clustering Dendrogram'
pltxlabelCluster
pltylabelDistance
pltshow
# Plot clusters in a scatter plot using original features
pltfigurefigsize
snsscatterplotx'Passenger Count', y'Adjusted Passenger Count', hue'ClusterKPrototypes', datadata, palette'viridis', alpha
pltxlabelPassenger Count'
pltylabelAdjusted Passenger Count'
plttitleHybrid Clustering: KPrototypes Hierarchical Clustering'
pltlegendtitle'Cluster'
pltshow
# Compute cluster evaluation metrics
# Silhouette Score
silhouetteavg silhouettescoredatapreprocessed, clusterlabelskproto
printfSilhouette Score: silhouetteavg:f
# CalinskiHarabasz Index
calinskiharabasz calinskiharabaszscoredatapreprocessed, clusterlabelskproto
printfCalinskiHarabasz Index: calinskiharabasz:f
# DaviesBouldin Index
daviesbouldin daviesbouldinscoredatapreprocessed, clusterlabelskproto
printfDaviesBouldin Index: daviesbouldin:f
Error during preprocessing: For a sparse output, all columns should be a numeric or convertible to a numeric.
TypeError: kproto
my dataset columns value are index int
Activity Period int
Operating Airline object
Operating Airline IATA Code object
Published Airline object
Published Airline IATA Code object
GEO Summary object
GEO Region object
Activity Type Code object
Price Category Code object
Terminal object
Boarding Area object
Passenger Count int
Adjusted Activity Type Code object
Adjusted Passenger Count int
Year int
Month object
dtype: object
resolve the error, and provide error free code with the output.Kindly refr
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access with AI-Powered Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started