Question
Tweets Early sentiment analysis work6 included the collection of a set of tweets, some for learning a machine learning model for sentiment analysis, and some
Tweets Early sentiment analysis work6 included the collection of a set of tweets, some for learning a machine learning model for sentiment analysis, and some for evaluating how good that model is. Well be using that same data; it includes the following information for each tweet:7
the gold polarity of the tweet (0 = negative, 2 = neutral, 4 = positive, = not given)
the id of the tweet (2087)
the date of the tweet (Sat May 16 23:58:44 UTC 2009)
Youll be basically implementing a simple keyword-based method for sentiment analysis of tweets, counting up the numbers of positive and negative words in a tweet to determine the predicted polarity of the tweet. (This differs from the gold polarity, which is what has been decided as the true polarity of the tweet; youre going to try to see how well you can predict it based on the content of the tweet.) T1 You will choose approprate representations for the Tweet class. You may or may not choose to base it on other classes Ive supplied (Vertex, VertexIDList). Material from weeks 911 of lectures will be particularly relevant in helping you decide. Youll need to write a constructor based on your chosen representation that instantiates an empty tweet.
import java.io.IOException;
import java.io.Reader;
import java.io.BufferedReader;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import org.apache.commons.csv.*;
import org.junit.Test;
public class TweetCollection {
// TODO: add appropriate data types
public TweetCollection() {
// Constructor
// TODO
}
/*
* functions for accessing individual tweets
*/
public Tweet getTweetByID (String ID) {
// PRE: -
// POST: Returns the Tweet object that with tweet ID
// TODO
return null;
}
public Integer numTweets() {
// PRE: -
// POST: Returns the number of tweets in this collection
// TODO
return null;
}
/*
* functions for accessing sentiment words
*/
public Polarity getBasicSentimentWordPolarity(String w) {
// PRE: w not null, basic sentiment words already read in from file
// POST: Returns polarity of w
// TODO
return null;
}
public Polarity getFinegrainedSentimentWordPolarity(String w) {
// PRE: w not null, finegrained sentiment words already read in from file
// POST: Returns polarity of w
// TODO
return null;
}
public Strength getFinegrainedSentimentWordStrength(String w) {
// PRE: w not null, finegrained sentiment words already read in from file
// POST: Returns strength of w
// TODO
return null;
}
/*
* functions for reading in tweets
*
*/
public void ingestTweetsFromFile(String fInName) throws IOException {
// PRE: -
// POST: Reads tweets from .csv file, stores in data structure
// NOTES
// Data source, file format description at http://help.sentiment140.com/for-students
// Using apache csv reader: https://www.callicoder.com/java-read-write-csv-file-apache-commons-csv/
try (
Reader reader = Files.newBufferedReader(Paths.get(fInName));
CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT);
) {
Iterable
for (CSVRecord csvRecord : csvRecords) {
// Accessing Values by Column Index
Tweet tw = new Tweet(csvRecord.get(0), // gold polarity
csvRecord.get(1), // ID
csvRecord.get(2), // date
csvRecord.get(4), // user
csvRecord.get(5)); // text
// TODO: insert tweet tw into appropriate data type
}
}
}
/*
* functions for sentiment words
*/
public void importBasicSentimentWordsFromFile (String fInName) throws IOException {
// PRE: -
// POST: Read in and store basic sentiment words in appropriate data type
// TODO
}
public void importFinegrainedSentimentWordsFromFile (String fInName) throws IOException {
// PRE: -
// POST: Read in and store finegrained sentiment words in appropriate data type
// TODO
}
public Boolean isBasicSentWord (String w) {
// PRE: Basic sentiment words have been read in and stored
// POST: Returns true if w is a basic sentiment word, false otherwise
// TODO
return null;
}
public Boolean isFinegrainedSentWord (String w) {
// PRE: Finegrained sentiment words have been read in and stored
// POST: Returns true if w is a finegrained sentiment word, false otherwise
// TODO
return null;
}
public void predictTweetSentimentFromBasicWordlist () {
// PRE: Finegrained word sentiment already imported
// POST: For all tweets in collection, tweet annotated with predicted sentiment
// based on count of sentiment words in sentWords
// TODO
}
public void predictTweetSentimentFromFinegrainedWordlist (Integer strongWeight, Integer weakWeight) {
// PRE: Finegrained word sentiment already imported
// POST: For all tweets in v, tweet annotated with predicted sentiment
// based on count of sentiment words in sentWords
// TODO
}
/*
* functions for inverse index
*
*/
public Map
// PRE: -
// POST: Read in and returned contents of file as inverse index
// invIndex has words w as key, IDs of tweets that contain w as value
// TODO
return null;
}
/*
* functions for graph construction
*/
public void constructSharedWordGraph(Map
// PRE: invIndex has words w as key, IDs of tweets that contain w as value
// POST: Graph constructed, with tweets as vertices,
// and edges between them if they share a word
// TODO
}
public Integer numConnectedComponents() {
// PRE: -
// POST: Returns the number of connected components
// TODO
return null;
}
public void annotateConnectedComponents() {
// PRE: -
// POST: Annotates graph so that it is partitioned into components
// TODO
}
public Integer componentSentLabelCount(String ID, Polarity p) {
// PRE: Graph components are identified, ID is a valid tweet
// POST: Returns count of labels corresponding to Polarity p in component containing ID
// TODO
return null;
}
public void propagateLabelAcrossComponent(String ID, Polarity p, Boolean keepPred) {
// PRE: ID is a tweet id in the graph
// POST: Labels tweets in component with predicted polarity p
// (if keepPred == T, only tweets w pred polarity None; otherwise all tweets
// TODO
}
public void propagateMajorityLabelAcrossComponents(Boolean keepPred) {
// PRE: Components are identified
// POST: Tweets in each component are labelled with the majority sentiment for that component
// Majority label is defined as whichever of POS or NEG has the larger count;
// if POS and NEG are both zero, majority label is NONE
// otherwise, majority label is NEUT
// If keepPred is True, only tweets with predicted label None are labelled in this way
// otherwise, all tweets in the component are labelled in this way
// TODO
}
/*
* functions for evaluation
*/
public Double accuracy () {
// PRE: -
// POST: Calculates and returns accuracy of labelling
// TODO
return null;
}
public Double coverage () {
// PRE: -
// POST: Calculates and returns coverage of labelling
// TODO
return null;
}
public static void main(String[] args) {
}
}
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started