Answered step by step
Verified Expert Solution
Link Copied!

Question

1 Approved Answer

You are to write an efficient program that will read a dictionary of 100,000 words, and then check a document of 2,000,000 words. The program

You are to write an efficient program that will read a dictionary of 100,000 words, and then check a document of 2,000,000 words. The program should insert into an array the position of each misspelled word. I have provided the driver program SpellRunner.cpp, and the necessary class stubs in speller.h, and speller.cpp. You may add any classes and/or code you wish to speller.cpp, and speller.h. You may also use, and alter any Weiss files. CreateDocs.out creates files used for testing. Doc-3-4-1 and Dict-3-4-1 are two testing files for small file. ______________________________________________________________________________________________

SpellRunner.cpp

#include #include #include #include #include #include "CPUTimer.h" #include "mynew.h" #include "speller.h"

// uncomment line below for DOS/Windows OS. // #define DOS

#ifdef DOS #include #else #include #endif

extern int maxRAM; extern int currentRAM;

using namespace std;

char* readDictionary(char *filename, char **dictionary, int dictSize) { int fileSize; char *s; struct stat statbuf;

stat(filename, &statbuf); fileSize = statbuf.st_size; s = new char[fileSize + 1]; ifstream inf(filename); inf.read(s, fileSize); inf.close(); s[fileSize] = '\0'; dictionary[0] = strtok(s, " ");

for(int i = 1; i < dictSize; i++) dictionary[i] = strtok(NULL, " ");

return s; } // readDictionary

char* readDocument(char **document, int dictSize, int docSize, int seed) { char *s, filename[80]; int fileSize; struct stat statbuf; sprintf(filename, "Doc-%d-%d-%d.txt", dictSize, docSize, seed); stat(filename, &statbuf); fileSize = statbuf.st_size; s = new char[fileSize + 100]; ifstream inf(filename); inf.read(s, fileSize); inf.close(); s[fileSize] = '\0';

document[0] = strtok(s, " ");

for(int i = 1; i < docSize; i++) document[i] = strtok(NULL, " ");

return s; } // readDocument()

void readWrongs(int *wrongs, int dictSize, int docSize, int seed, int &wrongCount) { char filename[80]; wrongCount = 0;

sprintf(filename, "Wrongs-%d-%d-%d.txt", dictSize, docSize, seed); ifstream inf(filename); while(inf >> wrongs[wrongCount++]);

wrongCount--; } // readWrongs()

void checkAnswers(int wrongs[], int wrongCount, int misspelled[], int misspelledCount) { for(int i = 0; i < wrongCount && i < misspelledCount; i++) if(wrongs[i] < misspelled[i]) { cout << "Speller missed misspelled word # " << wrongs[i] << endl; return; } else if(wrongs[i] > misspelled[i]) { cout << "Speller thought correctly spelled word # " << misspelled[i] << " was wrong "; return; }

if(wrongCount != misspelledCount) cout << "Speller found " << misspelledCount << " misspelled words when " << " there were really " << wrongCount << " misspelled words. "; } // checkAnswers

void cleanup(char **dictionary, char *docFilePtr, char **document, int *wrongs, int *misspelled) { delete [] dictionary; delete [] docFilePtr; delete [] document; delete [] wrongs; delete [] misspelled; } // cleanup()

int main(int argc, char* argv[]) { char line[80], **dictionary, **document, *dictFilePtr, *docFilePtr; int *wrongs, *misspelled, misspelledCount = 0, seed, dictSize, docSize, wrongCount, tempMaxRAM, tempCurrentRAM; strcpy(line, argv[1]); strtok(line, "-"); dictSize = atoi(strtok(NULL, "-")); docSize = atoi(strtok(NULL, "-")); seed = atoi(strtok(NULL, ".")); dictionary = new char*[dictSize + 3]; dictFilePtr = readDictionary(argv[1], dictionary, dictSize); document = new char*[docSize + 3]; docFilePtr = readDocument(document, dictSize, docSize, seed); wrongs = new int[docSize]; readWrongs(wrongs, dictSize, docSize, seed, wrongCount); misspelled = new int[docSize]; CPUTimer ct; maxRAM = currentRAM = 0; Speller *speller = new Speller(dictionary, dictSize); tempMaxRAM = maxRAM; tempCurrentRAM = currentRAM; delete [] dictFilePtr; maxRAM = tempMaxRAM; currentRAM = tempCurrentRAM; speller->check(document, docSize, misspelled, &misspelledCount); cout << "CPU Time: " << ct.cur_CPUTime() << " Real RAM: " << maxRAM << endl; checkAnswers(wrongs, wrongCount, misspelled, misspelledCount); cleanup(dictionary, docFilePtr, document, wrongs, misspelled); delete speller; return 0; } // main() ______________________________________________________________________________________________ Speller.h

#ifndef SPELLER_H #define SPELLER_H

#include "mynew.h" #include "QuadraticProbing.h" #include

class Speller { public: QuadraticHashTable *hashDict; Speller(char *dictionary[], int dictSize); ~Speller();

void check(char *document[], int docSize, int misspelled[], int *misspelledCount);

}; // class Speller #endif ______________________________________________________________________________________________ Speller.cpp #include "speller.h" Speller::Speller(char *dictionary[], int dictSize) { hashDict = new QuadraticHashTable (-1, dictSize);

for(int i = 0; i < dictSize; i++) hashDict->insert(dictionary[i]); } // Speller()

void Speller::check(char *document[], int docSize, int misspelled[], int *misspelledCount) { *misspelledCount = 0;

//Check document array using dictionary that you stored in constructor for(int i = 0; i < docSize; i++) { if(!(hashDict->find(document[i]))) { ++(*misspelledCount); misspelled[i] = 1; } else { misspelled[i] = 0; } }

//Store the array index of the misspelled word in the corresponding array and increments the count of misspelled words

} // check()

Speller::~Speller() { } // ~Speller() _________________________________________________________________________________________ QuadraticProbing.cpp

#include "QuadraticProbing.h"

/** * Internal method to test if a positive number is prime. * Not an efficient algorithm. */ template bool QuadraticHashTable::isPrime( int n ) const { if( n == 2 || n == 3 ) return true;

if( n == 1 || n % 2 == 0 ) return false;

for( int i = 3; i * i <= n; i += 2 ) if( n % i == 0 ) return false;

return true; }

/** * Internal method to return a prime number at least as large as n. * Assumes n > 0. */ template int QuadraticHashTable::nextPrime( int n ) const { if( n % 2 == 0 ) n++;

for( ; !isPrime( n ); n += 2 ) ;

return n; }

/** * Construct the hash table. */ template QuadraticHashTable::QuadraticHashTable( const HashedObj & notFound, int size ) : array( nextPrime( size ) ), ITEM_NOT_FOUND( notFound ) { makeEmpty( ); }

/** * Insert item x into the hash table. If the item is * already present, then do nothing. */ template void QuadraticHashTable::insert( const HashedObj & x ) { // Insert x as active int currentPos = findPos( x ); if( isActive( currentPos ) ) return; array[ currentPos ] = HashEntry( x, ACTIVE );

// Rehash; see Section 5.5 if( ++currentSize > array.size( ) / 2 ) rehash( ); }

/** * Expand the hash table. */ template void QuadraticHashTable::rehash( ) { vector oldArray = array;

// Create new double-sized, empty table array.resize( nextPrime( 2 * oldArray.size( ) ) ); for( int j = 0; j < array.size( ); j++ ) array[ j ].info = EMPTY;

// Copy table over currentSize = 0; for( int i = 0; i < oldArray.size( ); i++ ) if( oldArray[ i ].info == ACTIVE ) insert( oldArray[ i ].element ); }

/** * Method that performs quadratic probing resolution. * Return the position where the search for x terminates. */ template int QuadraticHashTable::findPos( const HashedObj & x ) const { /* 1*/ int collisionNum = 0; /* 2*/ int currentPos = hash( x, array.size( ) );

/* 3*/ while( array[ currentPos ].info != EMPTY && array[ currentPos ].element != x ) { /* 4*/ currentPos += 2 * ++collisionNum - 1; // Compute ith probe /* 5*/ if( currentPos >= array.size( ) ) /* 6*/ currentPos -= array.size( ); }

/* 7*/ return currentPos; }

/** * Remove item x from the hash table. */ template void QuadraticHashTable::remove( const HashedObj & x ) { int currentPos = findPos( x ); if( isActive( currentPos ) ) array[ currentPos ].info = DELETED; }

/** * Find item x in the hash table. * Return the matching item, or ITEM_NOT_FOUND, if not found. */ template const HashedObj & QuadraticHashTable::find( const HashedObj & x ) const { int currentPos = findPos( x ); return isActive( currentPos ) ? array[ currentPos ].element : ITEM_NOT_FOUND; // Above mentions only if active, if it is return array element, else return not found }

/** * Make the hash table logically empty. */ template void QuadraticHashTable::makeEmpty( ) { currentSize = 0; for( int i = 0; i < array.size( ); i++ ) array[ i ].info = EMPTY; }

/** * Deep copy. */ template const QuadraticHashTable & QuadraticHashTable:: operator=( const QuadraticHashTable & rhs ) { if( this != &rhs ) { array = rhs.array; currentSize = rhs.currentSize; } return *this; }

/** * Return true if currentPos exists and is active. */ template bool QuadraticHashTable::isActive( int currentPos ) const { return array[ currentPos ].info == ACTIVE; }

/** * A hash routine for string objects. */ template int QuadraticHashTable::hash( const string & key, int tableSize ) const { int hashVal = 0;

for( int i = 0; i < key.length( ); i++ ) hashVal = 37 * hashVal + key[ i ];

hashVal %= tableSize; if( hashVal < 0 ) hashVal += tableSize;

return hashVal; }

/** * A hash routine for ints. */ template int QuadraticHashTable::hash( int key, int tableSize ) const { if( key < 0 ) key = -key; return key % tableSize; } _________________________________________________________________________________________ QuadraticProbing.h

#ifndef _QUADRATIC_PROBING_H_ #define _QUADRATIC_PROBING_H_

#include "vector.h" #include "mystring.h"

// QuadraticProbing Hash table class // // CONSTRUCTION: an initialization for ITEM_NOT_FOUND // and an approximate initial size or default of 101 // // ******************PUBLIC OPERATIONS********************* // void insert( x ) --> Insert x // void remove( x ) --> Remove x // Hashable find( x ) --> Return item that matches x // void makeEmpty( ) --> Remove all items // int hash( String str, int tableSize ) // --> Static method to hash strings

class QuadraticHashTable { public: explicit QuadraticHashTable( const HashedObj & notFound, int size = 101 ); QuadraticHashTable( const QuadraticHashTable & rhs ) : ITEM_NOT_FOUND( rhs.ITEM_NOT_FOUND ), array( rhs.array ), currentSize( rhs.currentSize ) { }

const HashedObj & find( const HashedObj & x ) const;

void makeEmpty( ); void insert( const HashedObj & x ); void remove( const HashedObj & x );

const QuadraticHashTable & operator=( const QuadraticHashTable & rhs );

enum EntryType { ACTIVE, EMPTY, DELETED }; private: struct HashEntry { HashedObj element; EntryType info;

HashEntry( const HashedObj & e = HashedObj( ), EntryType i = EMPTY ) : element( e ), info( i ) { } };

vector array; int currentSize; const HashedObj ITEM_NOT_FOUND; bool isPrime( int n ) const; int nextPrime( int n ) const; bool isActive( int currentPos ) const; char* findPos( const HashedObj & x ) const; char* hash( const string & key, int tableSize ) const; int hash( int key, int tableSize ) const; void rehash( ); };

#include "QuadraticProbing.cpp" #endif _________________________________________________________________________________________ Dict-3-4-1.txt (Dictionary)

luov vwucoxynjm lsrwwvov _________________________________________________________________________________________

Doc-3-4-1.txt (Checking for misspell words)

lsrwwvov vwuboxynjm luov vwucoxynj

Step by Step Solution

There are 3 Steps involved in it

Step: 1

blur-text-image

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

blur-text-image_2

Step: 3

blur-text-image_3

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

More Books

Students also viewed these Databases questions