#do not change the code in this cell #make sure you run this cell once if you are working on colab or on a fresh
#do not change the code in this cell
#make sure you run this cell once if you are working on colab or on a fresh installation of anaconda
import nltk
nltk.download('twitter_samples')
nltk.download('punkt')
# do not change the code in this cell
# make sure you run this cell
from nltk.corpus import twitter_samples
from nltk.tokenize import word_tokenize
import random
import math
def sample_sentences(corpus, sample_size):
size = len(corpus)
ids = random.sample(range(size), sample_size)
sample = [corpus[i] for i in ids]
return sample
random.seed(37)
tsample = sample_sentences(twitter_samples.strings(), 1000)
twittertokens = [word_tokenize(tweet.lower()) for tweet in tsample]
twittertokens[:5]
ii) What is meant if we say that a word is ambiguous with respect to part-of-speech? Give an example of a word in `tagged_tweets[0]` which could be said to be ambiguous with respect to part-of-speech. What information does a HMM part-of-speech tagger use to try to resolve the ambiguity?
Step by Step Solution
There are 3 Steps involved in it
Step: 1
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started