Question
Fix this code so that it gives correct output: Conduct punctuation removal, stop word removal, casefolding, lemmatization, and stemming on the documents. import pandas as
Fix this code so that it gives correct output:
Conduct punctuation removal, stop word removal, casefolding, lemmatization, and stemming on the documents.
import pandas as pd import nltk from nltk.tokenize import RegexpTokenizer from nltk.corpus import stopwords import re from nltk.stem import PorterStemmer from nltk.stem import WordNetLemmatizer nltk.download("wordnet")
sentences=["Can we go to Disney??!!!!!! Let's go on a plane!","The New England Patriots won the Super Bowl.." ,"I HATE going to school so early","When will I be considered an adult?" ,"I want to go to A&M, Baylor, or the University of Texas."]
#remove punctuation and stop words using nltk tokens=[] stop_words=[] tokenizer = RegexpTokenizer(r'w+') print("sentences after punctuation removal are :") print(" ") for i in range(len(sentences)): tokens.append(tokenizer.tokenize(sentences[i])) print(" ".join(list(tokens[i]))) print(" ")
print("sentences after stop word removal are :") print(" ") for i in range(len(sentences)): stop_words.append([w for w in tokens[i] if not w in stopwords.words('english')]) print(" ".join(list(stop_words[i]))) print(" ")
#casefold string print("sentences after casefold are :") for i in range(len(stop_words)): for j in range(len(stop_words[i])): stop_words[i][j]=stop_words[i][j].casefold() print(" ".join(list(stop_words[i]))) print(" ") print("lemmatization:") #lemmatization of words lemmatizer = WordNetLemmatizer() for i in range(len(stop_words)): for j in range(len(stop_words[i])): print(stop_words[i][j],":",lemmatizer.lemmatize(stop_words[i][j])) stop_words[i][j]=lemmatizer.lemmatize(stop_words[i][j]) #stemming the documents print(" ") print("steming:") ps = PorterStemmer() for i in range(len(stop_words)): for j in range(len(stop_words[i])): print(stop_words[i][j],":",ps.stem(stop_words[i][j])) stop_words[i][j]=ps.stem(stop_words[i][j]) print(" ") print("final output:") #final output after completing above operations for i in range(len(stop_words)): print(" ".join(list(stop_words[i])))
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started