Answered step by step
Verified Expert Solution
Link Copied!

Question

1 Approved Answer

import pandas as pd import numpy as np # % matplotlib inline #import pandas _ profiling import seaborn as sns from sklearn.metrics import roc _

import pandas as pd
import numpy as np
#%matplotlib inline
#import pandas_profiling
import seaborn as sns
from sklearn.metrics import roc_auc_score
amazonData=pd.read_csv("Amazon_Unlocked_Mobile.csv")
amazonData.head(5)
amazonData.describe()
amazonData.info()
#plot to see how rating is distributed
sns.countplot(amazonData['Rating'])
#Data cleaning
amazonData.drop_duplicates(keep=False,inplace=True)
amazonData=amazonData.dropna()
rowcount=amazonData.shape[0]
rowcount
#relation between price and rating
#sns.regplot(x='Price',y='Rating',data=amazonData)
amazonData.isnull().sum()
amazonData["Reviews"].head(2)
amazonData["Reviews"][0]
sentiData=amazonData[["Reviews","Rating"]]
sentiData=sentiData.iloc[:20000]
sentiData.head(5)
sentiData["Reviews"][1]
#distinct products
amazonData["Product Name"].nunique()
#distinct brand
amazonData["Brand Name"].nunique()
#Top 10 Brand Names
pivot1= pd.pivot_table(amazonData,
values =['Rating', 'Review Votes'],
index =['Brand Name'],
columns=[],
aggfunc=[np.sum, np.mean, np.count_nonzero],
margins=True, fill_value=0).sort_values(by=('count_nonzero', 'Rating'), ascending=False).fillna('')
topmost_prods = pivot1.reindex().head(n=10)
topmost_prods = topmost_prods.reset_index()
topmost_prods
###### Top 10 products in Samsung
df_samsung = amazonData.loc[amazonData['Brand Name'].isin(['Samsung'])]
pivot = pd.pivot_table(df_samsung,
values =['Rating', 'Review Votes'],
index =['Product Name'],
columns=[],
aggfunc=[np.sum, np.mean, np.count_nonzero],
margins=True, fill_value=0).sort_values(by=('count_nonzero', 'Rating'), ascending=False).fillna('')
topmost_prods_samsung = pivot.reindex().head(n=10)
topmost_prods_samsung = topmost_prods_samsung.reset_index()
topmost_prods_samsung
###### Top 10 products in apple
df_apple = amazonData.loc[amazonData['Brand Name'].isin(['Apple'])]
pivot = pd.pivot_table(df_apple,
values =['Rating', 'Review Votes'],
index =['Product Name'],
columns=[],
aggfunc=[np.sum, np.mean, np.count_nonzero],
margins=True, fill_value=0).sort_values(by=('count_nonzero', 'Rating'), ascending=False).fillna('')
topmost_prods_apple = pivot.reindex().head(n=10)
topmost_prods_apple = topmost_prods_apple.reset_index()
topmost_prods_apple
#installing NLTK package

Step by Step Solution

There are 3 Steps involved in it

Step: 1

blur-text-image

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

blur-text-image

Step: 3

blur-text-image

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

DB2 11 The Ultimate Database For Cloud Analytics And Mobile

Authors: John Campbell, Chris Crone, Gareth Jones, Surekha Parekh, Jay Yothers

1st Edition

1583474013, 978-1583474013

More Books

Students also viewed these Databases questions