Answered step by step
Verified Expert Solution
Link Copied!

Question

1 Approved Answer

The Code import java.io.File; import java.util.Scanner; import java.util.Map.Entry; import java.util.AbstractMap; import java.util.Arrays; import java.util.Comparator; public class WordCountSort { public static void selectionSort( String [] data

image text in transcribedimage text in transcribed

The Code

import java.io.File;

import java.util.Scanner;

import java.util.Map.Entry;

import java.util.AbstractMap;

import java.util.Arrays;

import java.util.Comparator;

public class WordCountSort {

public static void selectionSort(String[] data) {

int n = data.length;

for (int i = 0; i

int minIndex = i;

for (int j = i + 1; j

if (data[minIndex].compareTo(data[j])

minIndex = j;

}

}

if (i != minIndex)

swap(data, minIndex, i);

}

}

public static void insertionSort(String[] data) {

int n = data.length;

for (int k = 1; k

String cur = data[k];

int j = k;

while (j > 0 && data[j - 1].compareTo(cur) > 0) {

data[j] = data[j - 1];

j--;

}

data[j] = cur;

}

}

/** Merge two strings. See the textbook for explanation. **/

public static void merge(String[] S1, String[] S2, String[] S) {

int i = 0, j = 0;

while (i + j

if (j == S2.length || (i S1[i].compareTo(S2[j])

S[i + j] = S1[i++];

else

S[i + j] = S2[j++];

}

}

public static void mergeSort(String[] S) {

int n = S.length;

if (n

return;

int mid = n / 2;

// partition the string into two strings

String[] S1 = Arrays.copyOfRange(S, 0, mid);

String[] S2 = Arrays.copyOfRange(S, mid, n);

mergeSort(S1);

mergeSort(S2);

merge(S1, S2, S);

}

private static void swap(String[] array, int i, int j) {

String tmp = array[i];

array[i] = array[j];

array[j] = tmp;

}

public static EntryString, Integer> count_ARRAY_SORT(String[] tokens, String sortMethod) {

int CAPACITY = 1000000;

String[] words = new String[CAPACITY];

int[] counts = new int[CAPACITY];

if (sortMethod.equals("SELECT"))

selectionSort(tokens);

else if (sortMethod.equals("INSERT"))

insertionSort(tokens);

else if (sortMethod.equals("MERGE"))

mergeSort(tokens);

else if (sortMethod.equals("JAVA"))

Arrays.sort(tokens);

else if (sortMethod.equals("QUICK"))

quickSortInPlace(tokens); //This method needs to be created

else

System.out.println(sortMethod + " sorting method does not exist.");

int j = 0, k = 0;

int len = tokens.length;

while (j

int duplicates = 1;

while (j

j++;

duplicates++;

}

words[k] = tokens[j];

counts[k] = duplicates;

j++;

k++;

}

/** get the max count **/

int maxCount = 0;

String maxWord = "";

for (int i = 0; i

if (counts[i] > maxCount) {

maxWord = words[i];

maxCount = counts[i];

}

}

return new AbstractMap.SimpleEntryString, Integer>(maxWord, maxCount);

}

static String[] readText(String PATH) throws Exception {

Scanner doc = new Scanner(new File(PATH)).useDelimiter("[^a-zA-Z]+");

// tokenize text. any characters other than English letters(a-z and A-Z

// ) are delimiters.

int length = 0;

while (doc.hasNext()) {

doc.next();

length++;

}

String[] tokens = new String[length];

Scanner s = new Scanner(new File(PATH)).useDelimiter("[^a-zA-Z]+");

length = 0;

while (s.hasNext()) {

tokens[length] = s.next().toLowerCase();

length++;

}

doc.close();

return tokens;

}

public static void main(String[] args) throws Exception {

String PATH = "/PATH/dblp";

String[] METHODS = { "MERGE", "INSERT", "SELECT" };

String[] DATASETS = { "200", "500", "1k", "5k", "10k", "100k", "1m", "" }; //Files containing that many lines of text

String[] tokens;

// run the experiments on different data sets

for (int j = 0; j

// run the experiments using different methods

System.out.println("Data is " + DATASETS[j]);

for (int i = 0; i

tokens = readText(PATH + DATASETS[j] + ".txt");

long startTime = System.currentTimeMillis();

EntryString, Integer> entry = count_ARRAY_SORT(tokens, METHODS[i]);

long endTime = System.currentTimeMillis();

String time = String.format("%12d", endTime - startTime);

System.out.println(METHODS[i] + " method\t time=" + time + ". Most popular word is " + entry.getKey()

+ ":" + entry.getValue());

}

}

}

}

Task 1 (2 marks). Demonstrate the performance of the algorithms You are required to run these three a gorithms, and observe their performance differences. We prepared a set of test datasets of different sizes. You can click the following data sets to download them. Try to run the data sets in increasing order up to the point when it is unbearably long. 10k lines Link to ablplok 200 lines Link to dblp200 500 lines Link to dblp500 lk lines Link to dblplk 5k lines Link to dblp5k 100k lines Link to dblpl00k 1m lines Link to dblplm 3.6m lines Link to dblp Task 1 (2 marks). Demonstrate the performance of the algorithms You are required to run these three a gorithms, and observe their performance differences. We prepared a set of test datasets of different sizes. You can click the following data sets to download them. Try to run the data sets in increasing order up to the point when it is unbearably long. 10k lines Link to ablplok 200 lines Link to dblp200 500 lines Link to dblp500 lk lines Link to dblplk 5k lines Link to dblp5k 100k lines Link to dblpl00k 1m lines Link to dblplm 3.6m lines Link to dblp

Step by Step Solution

There are 3 Steps involved in it

Step: 1

blur-text-image

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

blur-text-image

Step: 3

blur-text-image

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

More Books

Students also viewed these Databases questions

Question

Question What is a secular trust?

Answered: 1 week ago