Answered step by step
Verified Expert Solution
Link Copied!

Question

1 Approved Answer

User import pandas as pd import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.compose import ColumnTransformer from sklearn.mixture import GaussianMixture from sklearn.cluster import

User
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.mixture import GaussianMixture
from sklearn.cluster import DBSCAN
# Load the dataset
file_path = r"C:\Users\ankit\Downloads\Ankit_proj\Ankit_proj\Clustering\Air_Traffic_Passenger_Statistics.csv"
data = pd.read_csv(file_path)
# Define numerical and categorical features
numerical_features =['Passenger Count', 'Adjusted Passenger Count', 'Year']
categorical_features =['Published Airline', 'GEO Region']
# Perform one-hot encoding for categorical columns
data_encoded = pd.get_dummies(data, columns=categorical_features)
# Check if there are any missing values after encoding
missing_values = data_encoded.isnull().sum()
print("Missing values after encoding:
", missing_values)
# Define preprocessing steps for numerical and categorical features
preprocessor = ColumnTransformer(
transformers=[
('num', StandardScaler(), numerical_features),
('cat', OneHotEncoder(), categorical_features)
],
remainder='passthrough'
)
# Preprocess the data
try:
data_preprocessed = preprocessor.fit_transform(data)
except ValueError as e:
print("Error during preprocessing:", e)
print("Please check if all columns in the dataset are numeric or convertible to numeric.")
# If preprocessing is successful, proceed with clustering
if 'data_preprocessed' in locals():
# Perform GMM clustering
gmm = GaussianMixture(n_components=4)
gmm_clusters = gmm.fit_predict(data_preprocessed)
# Perform DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan_clusters = dbscan.fit_predict(data_preprocessed)
# Plot the clusters
plt.figure(figsize=(12,6))
# GMM clusters
plt.subplot(1,2,1)
plt.scatter(data_preprocessed[:,0], data_preprocessed[:,1], c=gmm_clusters, cmap='viridis', alpha=0.5)
plt.title('GMM Clustering')
# DBSCAN clusters
plt.subplot(1,2,2)
plt.scatter(data_preprocessed[:,0], data_preprocessed[:,1], c=dbscan_clusters, cmap='viridis', alpha=0.5)
plt.title('DBSCAN Clustering')
plt.tight_layout()
plt.show()
Error during preprocessing: For a sparse output, all columns should be a numeric or convertible to a numeric.
Please check if all columns in the dataset are numeric or convertible to numeric.
my dataset columns value are index int
6
4
Activity Period int
6
4
Operating Airline object
Operating Airline IATA Code object
Published Airline object
Published Airline IATA Code object
GEO Summary object
GEO Region object
Activity Type Code object
Price Category Code object
Terminal object
Boarding Area object
Passenger Count int
6
4
Adjusted Activity Type Code object
Adjusted Passenger Count int
6
4
Year int
6
4
Month object
dtype: object
resolve the error, and provide error free code with the output.Kindly refrain my usng chatgpt as it also dontknwo the answer, nly it can be done manualy with the knowledge

Step by Step Solution

There are 3 Steps involved in it

Step: 1

blur-text-image

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

blur-text-image

Step: 3

blur-text-image

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Students also viewed these Databases questions