Question
The Code import java.io.File; import java.util.Scanner; import java.util.Map.Entry; import java.util.AbstractMap; import java.util.Arrays; import java.util.Comparator; public class WordCountSort { public static void selectionSort( String [] data
The Code
import java.io.File;
import java.util.Scanner;
import java.util.Map.Entry;
import java.util.AbstractMap;
import java.util.Arrays;
import java.util.Comparator;
public class WordCountSort {
public static void selectionSort(String[] data) {
int n = data.length;
for (int i = 0; i
int minIndex = i;
for (int j = i + 1; j
if (data[minIndex].compareTo(data[j])
minIndex = j;
}
}
if (i != minIndex)
swap(data, minIndex, i);
}
}
public static void insertionSort(String[] data) {
int n = data.length;
for (int k = 1; k
String cur = data[k];
int j = k;
while (j > 0 && data[j - 1].compareTo(cur) > 0) {
data[j] = data[j - 1];
j--;
}
data[j] = cur;
}
}
/** Merge two strings. See the textbook for explanation. **/
public static void merge(String[] S1, String[] S2, String[] S) {
int i = 0, j = 0;
while (i + j
if (j == S2.length || (i S1[i].compareTo(S2[j])
S[i + j] = S1[i++];
else
S[i + j] = S2[j++];
}
}
public static void mergeSort(String[] S) {
int n = S.length;
if (n
return;
int mid = n / 2;
// partition the string into two strings
String[] S1 = Arrays.copyOfRange(S, 0, mid);
String[] S2 = Arrays.copyOfRange(S, mid, n);
mergeSort(S1);
mergeSort(S2);
merge(S1, S2, S);
}
private static void swap(String[] array, int i, int j) {
String tmp = array[i];
array[i] = array[j];
array[j] = tmp;
}
public static EntryString, Integer> count_ARRAY_SORT(String[] tokens, String sortMethod) {
int CAPACITY = 1000000;
String[] words = new String[CAPACITY];
int[] counts = new int[CAPACITY];
if (sortMethod.equals("SELECT"))
selectionSort(tokens);
else if (sortMethod.equals("INSERT"))
insertionSort(tokens);
else if (sortMethod.equals("MERGE"))
mergeSort(tokens);
else if (sortMethod.equals("JAVA"))
Arrays.sort(tokens);
else if (sortMethod.equals("QUICK"))
quickSortInPlace(tokens); //This method needs to be created
else
System.out.println(sortMethod + " sorting method does not exist.");
int j = 0, k = 0;
int len = tokens.length;
while (j
int duplicates = 1;
while (j
j++;
duplicates++;
}
words[k] = tokens[j];
counts[k] = duplicates;
j++;
k++;
}
/** get the max count **/
int maxCount = 0;
String maxWord = "";
for (int i = 0; i
if (counts[i] > maxCount) {
maxWord = words[i];
maxCount = counts[i];
}
}
return new AbstractMap.SimpleEntryString, Integer>(maxWord, maxCount);
}
static String[] readText(String PATH) throws Exception {
Scanner doc = new Scanner(new File(PATH)).useDelimiter("[^a-zA-Z]+");
// tokenize text. any characters other than English letters(a-z and A-Z
// ) are delimiters.
int length = 0;
while (doc.hasNext()) {
doc.next();
length++;
}
String[] tokens = new String[length];
Scanner s = new Scanner(new File(PATH)).useDelimiter("[^a-zA-Z]+");
length = 0;
while (s.hasNext()) {
tokens[length] = s.next().toLowerCase();
length++;
}
doc.close();
return tokens;
}
public static void main(String[] args) throws Exception {
String PATH = "/PATH/dblp";
String[] METHODS = { "MERGE", "INSERT", "SELECT" };
String[] DATASETS = { "200", "500", "1k", "5k", "10k", "100k", "1m", "" }; //Files containing that many lines of text
String[] tokens;
// run the experiments on different data sets
for (int j = 0; j
// run the experiments using different methods
System.out.println("Data is " + DATASETS[j]);
for (int i = 0; i
tokens = readText(PATH + DATASETS[j] + ".txt");
long startTime = System.currentTimeMillis();
EntryString, Integer> entry = count_ARRAY_SORT(tokens, METHODS[i]);
long endTime = System.currentTimeMillis();
String time = String.format("%12d", endTime - startTime);
System.out.println(METHODS[i] + " method\t time=" + time + ". Most popular word is " + entry.getKey()
+ ":" + entry.getValue());
}
}
}
}
Task 1 (2 marks). Demonstrate the performance of the algorithms You are required to run these three a gorithms, and observe their performance differences. We prepared a set of test datasets of different sizes. You can click the following data sets to download them. Try to run the data sets in increasing order up to the point when it is unbearably long. 10k lines Link to ablplok 200 lines Link to dblp200 500 lines Link to dblp500 lk lines Link to dblplk 5k lines Link to dblp5k 100k lines Link to dblpl00k 1m lines Link to dblplm 3.6m lines Link to dblp Task 1 (2 marks). Demonstrate the performance of the algorithms You are required to run these three a gorithms, and observe their performance differences. We prepared a set of test datasets of different sizes. You can click the following data sets to download them. Try to run the data sets in increasing order up to the point when it is unbearably long. 10k lines Link to ablplok 200 lines Link to dblp200 500 lines Link to dblp500 lk lines Link to dblplk 5k lines Link to dblp5k 100k lines Link to dblpl00k 1m lines Link to dblplm 3.6m lines Link to dblpStep by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started