Answered step by step
Verified Expert Solution
Link Copied!

Question

1 Approved Answer

Implement the pattern-matching algorithm shown in the pseudo-code listing below. The 'slist' object in the pseudo-code is a map data structure. Most listings of this

Implement the pattern-matching algorithm shown in the pseudo-code listing below. The 'slist' object in the pseudo-code is a map data structure. Most listings of this algorithm's use arrays, however, a map is much more space efficient.

Your map data structure is your dynamic-array, converted to a map that takes pairs as data members. Also, the map is implemented as a hash-table. The midterm contained a hash-table section that handled unique-string combinations. For this map, design your hashing function to handle 'unique strings' of length 1 thru 8.

This algorithm expects unique strings of length 1 (the lower ascii table). Test your implementation using a range of text strings and patterns. For testing purposes, assume the strings are unique and you do not need to check for uniqueness.

A second test would be to test unique strings of length > 1. Note that a unique string is a string that contains unique characters not present in other strings. For example: , , are unique strings of length 3, while ,, are not unique.

See algorithms below for pseudo-code. Also, the uploaded sample code for Rabin-Karp (RabinKarp.cpp), has a bug in the implementation: sometimes it finds a match, and sometimes it doesn't. It should always find it, as it's a character-by-character crawler. Part 2 of this assignment is to fix bug, so it finds it in all cases.

Pseudo Code:

Algorithm Position(c, s, t) 1 for o = len(t)+1 to 1 dec o 2 f = true 3 for i = 0 to len(s) step i 4 it = o - len(s) - 1 + i 5 if it >= 0 and s[i] != t[it] 6 f = false 7 it = o - len(s) - 1 8 if f == true and (it<=0 or t[it-1] ne c) 9 return len(t) - o + 1 10 return -1 Algorithm BadShift(p) 1 for x = 0 to len(p)-1 step 1 2 slist[p[x]] = len(p) - x - 1 Algorithm GoodShift(k) 1 b = "" 2 for i = 0 to len(k) step 1 3 slist[len(b)] = Position(k[len(k) - 1 - i],b,k) 4 b = k[len(k) - 1 - i] + b 5 return slist Algorithm Search(t,p) 1 g = GoodShift(p) 2 b = BadShift(p) 3 while i < len(t) - len(p) + 1 4 j = len(p) 5 while j > 0 && p[j-1] == t[i+j-1] 6 j -= 1 7 if j > 0 8 bs = b[t[i+j-1]] 9 gs = g[len(p)-j] 10 if bs > gs 11 i += bs 12 else 13 i += gs 14 else 15 return i 16 return -1 Algorithm Test() 1 target = "block of text ..." 2 pattern = "some pattern" 3 Search(target,pattern) 

Rabin Karp.cpp:

#include

#include

#include

#include

#include

#include

#include "ConsoleColor.h"

using namespace std;

// the number of characters in input alphabet

#define ascii 128 // lower ascii

// utility

void Output(string stext, int startindex)

{

for (int i = 0; i < stext.length(); i++)

{

if (i < startindex)

cout << yellow << stext[i];

else

cout << red << stext[i];

}

cout << white << endl;

}

// A linear approach: Seach character by character

void BruteForce(string pattern, string text)

{

int ncompares = 0;

for (int i = 0; i < text.length(); i++)

{

Output(text, i);

int j = 0;

for (; j < pattern.length(); j++)

{

ncompares++;

if (text[i + j] != pattern[j])

break;

}

if (j == pattern.length())

{

cout << cyan << "Pattern found at index " <<

i << endl;

}

}

cout << white << "BruteForce compares = " << ncompares << endl;

}

// Optimized BF: skips some characters when match is found

void BFOptimized(string pattern, string text)

{

int ncompares = 0;

int M = pattern.length()-1;

for (int i = 0; i < text.length(); i++)

{

Output(text, i);

ncompares++;

if (text[i] == pattern[0])

{

ncompares++;

if ((i + M) > text.length())

break;

if (text[i + M] == pattern[M])

{

bool bfound = true;

for (int j = 1; j < M; j++)

{

ncompares++;

if (text[i + j] !=

pattern[j])

{

bfound = false;

break;

}

}

if (bfound)

{

cout << cyan << "Pattern

found at index " << i << endl;

i += M;

}

}

}

}

cout << white << "BFOptimized compares = " << ncompares << endl;

}

// Optimized BF Skip: skips based first character match

void BFSkip(string pattern, string text)

{

int ncompares = 0;

int i = 0;

while (i < text.length())

{

Output(text, i);

ncompares++;

if (text[i] == pattern[0])

{

ncompares++;

int bskip = i;

if (i + pattern.length() > text.length())

break;

if (text[i + pattern.length()] ==

pattern[pattern.length()-1])

{

bool bfound = true;

for (int j = 1; j < pattern.length();

j++)

{

ncompares++;

if (text[i + j] !=

pattern[j])

{

bfound = false;

i++;

break;

}

}

if (bfound)

{

cout << cyan << "Pattern

found at index " << i << endl;

i += pattern.length();

}

}

else

i = bskip + pattern.length();

}

else

i++;

}

cout << white << "BFSkip compares = " << ncompares << endl;

}

void computeTF(string pattern, vector>& TF)

{

int state = 0;

cout << cyan << "Table" << endl;

for (int state = 0; state <= pattern.length(); ++state)

{

TF[state][pattern[state]] = state + 1;

cout << cyan << "{" << state << "," <<

TF[state][pattern[state]] << "}" << endl;

}

}

void FiniteAutomata(string pattern, string target)

{

vector> TF(pattern.length() + 1);

computeTF(pattern, TF);

int ncompares = 0;

int state = 0;

for (int i = 0; i < target.length(); i++)

{

Output(target, i);

state = TF[state][target[i]];

ncompares++;

if (state == pattern.length())

cout << cyan << "Pattern found at index " <<

i - pattern.length() + 1 << endl;

}

cout << white << "Finite Automata compares = " << ncompares << endl;

}

void RabinKarp(string pattern, string text, int prime)

{

int ncompares = 0;

int jndex = 0;

int phash = 0; // hash value for pattern

int thash = 0; // hash value for txt

int hash = 1;

// The value of h would be "pow(d, M-1)%q"

hash = int(pow(ascii, pattern.length() - 1)) % prime;

if (hash < 0)

hash = hash + prime;

// Calculate the hash value of pattern and first window of text

for (int index = 0; index < pattern.length(); index++)

{

phash = (ascii * phash + pattern[index]) % prime;

thash = (ascii * thash + text[index]) % prime;

}

cout << cyan << "hash = " << hash << '\t';

cout << cyan << "phash = " << phash << '\t';

cout << cyan << "thash = " << thash << endl;

for (int index = 0; index <= text.length() - pattern.length();

index++)

{

ncompares++;

Output(text, index);

if (phash == thash)

{

for (jndex = 0; jndex < pattern.length();

jndex++)

{

if (text[index + jndex] !=

pattern[jndex])

break;

}

if (jndex == pattern.length())

cout << white << "Pattern found at

index " << index << endl;

}

if (index < (text.length() - pattern.length()))

{

thash = (ascii * (thash - text[index] * hash)

+ text[index + pattern.length()]) % prime;

if (thash < 0)

thash = (thash + prime);

cout << cyan << "text hash " << thash <<

endl;

}

}

cout << white << "RabinKarp compares = " << ncompares << endl;

}

void preKMP(string pattern, vector& prefix)

{

int m = pattern.length();

prefix[0] = -1;

for (int i = 1; i

{

int p = prefix[i - 1];

while (p >= 0)

{

if (pattern[p] == pattern[i - 1])

break;

else

p = prefix[p];

}

prefix[i] = p + 1;

}

for (int i = 0; i

cout << cyan << "index " << i << " skip " << prefix[i] <<

endl;

}

void KMP(string pattern, string target)

{

vector prefix(pattern.length());

preKMP(pattern, prefix);

int index = 0;

int pat = 0;

int ncompares = 0;

while (index < target.length())

{

Output(target, index);

if (pat == -1)

{

index++;

pat = 0;

}

else if (target[index] == pattern[pat])

{

index++;

pat++;

if (pat == pattern.length())

{

cout << cyan << "Found index at " <<

pat - pattern.length() << endl;

pat = 0;

}

}

else

{

pat = prefix[pat];

ncompares++;

}

}

cout << white << "KMP compares " << ncompares << endl;

}

void main()

{

//string text =

"ACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGGCCACGGCCACCGCGCCCTGCCCCTG

GAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAATAAGGAAAAGCAGCCTCCTGAC

TTTCCTCGCTTGGTGGTTTGAGTGGACCTCCCAGGCCAGTGCCGGGCCACACAGTCCTCATAGGAGAGGAAGCT

CGGGAGGTGGCCAGGCGGCAGGAAGGCGCACCCCCCCAGCAATCCGCGCGCCGGGACAGAATGCCCTGCAGGAA

CTTCTTCTGGAAGACCTTCTCCTCCTGCAAATAAAACCTCACCCATGAATGCTCACGCAAGTTTAATTACAGAC

CTGAA";

//string pattern = "ACACAGT";

//cout << "Text length " << text.length() << endl;

//cout << purple << "BruteForce ========== ";

//BruteForce(pattern, text);

//cout << purple << "BFOptimized ========== ";

//BFOptimized(pattern, text);

//cout << purple << "BFSkip ========== ";

//BFSkip(pattern, text);

//cout << purple << "FiniteAutomata ========== ";

//FiniteAutomata(pattern, text);

//cout << purple << "RabinKarp ========== ";

//string rtext = ",<12250 Stevens Creek

Blvd>,,<95014>,<864-5300>";

//string rpattern = "Cupertino";

//string rtext = "ACATACGACACAGT";

//string rpattern = "CAGT";

//int prime = 157; // A prime number

//RabinKarp(rpattern, rtext, prime);

string ktext =

"ACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGGCCACGGCCACCGCGCCCTGCCCCTG

GAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAATAAGGAAAAGCAGCCTCCTGAC

TTTCCTCGCTTGGTGGTTTGAGTGGACCTCCCAGGCCAGTGCCGGGCCACACAGTCCTCATAGGAGAGGAAGCT

CGGGAGGTGGCCAGGCGGCAGGAAGGCGCACCCCCCCAGCAATCCGCGCGCCGGGACAGAATGCCCTGCAGGAA

CTTCTTCTGGAAGACCTTCTCCTCCTGCAAATAAAACCTCACCCATGAATGCTCACGCAAGTTTAATTACAGAC

CTGAA";

string kpattern = "ACACAGT";

cout << purple << "KMP ========== ";

KMP(kpattern, ktext);

}

ConsoleColor.h:

#pragma once

#include

#include

using namespace std;

enum COLOR

{

// Text foreground colors

// Standard text colors

GRAY_TEXT = 8,

BLUE_TEXT,

GREEN_TEXT,

CYAN_TEXT,

RED_TEXT,

MAGENTA_TEXT,

YELLOW_TEXT,

WHITE_TEXT,

// Faded text colors

BLACK_TEXT = 0,

BLUE_FADE_TEXT,

GREEN_FADE_TEXT,

CYAN_FADE_TEXT,

RED_FADE_TEXT,

MAGENTA_FADE_TEXT,

YELLOW_FADE_TEXT,

WHITE_FADE_TEXT,

// Standard text background color

GRAY_BACKGROUND = GRAY_TEXT << 4,

BLUE_BACKGROUND = BLUE_TEXT << 4,

GREEN_BACKGROUND = GREEN_TEXT << 4,

CYAN_BACKGROUND = CYAN_TEXT << 4,

RED_BACKGROUND = RED_TEXT << 4,

MAGENTA_BACKGROUND = MAGENTA_TEXT << 4,

YELLOW_BACKGROUND = YELLOW_TEXT << 4,

WHITE_BACKGROUND = WHITE_TEXT << 4,

// Faded text background color

BLACK_BACKGROUND = BLACK_TEXT << 4,

BLUE_FADE_BACKGROUND = BLUE_FADE_TEXT << 4,

GREEN_FADE_BACKGROUND = GREEN_FADE_TEXT << 4,

CYAN_FADE_BACKGROUND = CYAN_FADE_TEXT << 4,

RED_FADE_BACKGROUND = RED_FADE_TEXT << 4,

MAGENTA_FADE_BACKGROUND = MAGENTA_FADE_TEXT << 4,

YELLOW_FADE_BACKGROUND = YELLOW_FADE_TEXT << 4,

WHITE_FADE_BACKGROUND = WHITE_FADE_TEXT << 4

};

inline ostream& purple(ostream &s)

{

HANDLE hStdout = GetStdHandle(STD_OUTPUT_HANDLE);

SetConsoleTextAttribute(hStdout, MAGENTA_TEXT);

return s;

}

inline ostream& cyan(ostream &s)

{

HANDLE hStdout = GetStdHandle(STD_OUTPUT_HANDLE);

SetConsoleTextAttribute(hStdout, CYAN_TEXT);

return s;

}

inline ostream& backblue(ostream &s)

{

HANDLE hStdout = GetStdHandle(STD_OUTPUT_HANDLE);

SetConsoleTextAttribute(hStdout, CYAN_TEXT | BLUE_BACKGROUND);

return s;

}

inline ostream& blue(ostream &s)

{

HANDLE hStdout = GetStdHandle(STD_OUTPUT_HANDLE);

SetConsoleTextAttribute(hStdout, BLUE_TEXT);

return s;

}

inline ostream& red(ostream &s)

{

HANDLE hStdout = GetStdHandle(STD_OUTPUT_HANDLE);

SetConsoleTextAttribute(hStdout, RED_TEXT);

return s;

}

inline ostream& green(ostream &s)

{

HANDLE hStdout = GetStdHandle(STD_OUTPUT_HANDLE);

SetConsoleTextAttribute(hStdout, GREEN_TEXT);

return s;

}

inline ostream& yellow(ostream &s)

{

HANDLE hStdout = GetStdHandle(STD_OUTPUT_HANDLE);

SetConsoleTextAttribute(hStdout, YELLOW_TEXT);

return s;

}

inline ostream& white(ostream &s)

{

HANDLE hStdout = GetStdHandle(STD_OUTPUT_HANDLE);

SetConsoleTextAttribute(hStdout, WHITE_TEXT);

return s;

}

inline ostream& backwhite(ostream &s)

{

HANDLE hStdout = GetStdHandle(STD_OUTPUT_HANDLE);

SetConsoleTextAttribute(hStdout, WHITE_FADE_BACKGROUND);

return s;

}

// experimental

inline ostream& orange(ostream &s)

{

HANDLE hStdout = GetStdHandle(STD_OUTPUT_HANDLE);

SetConsoleTextAttribute(hStdout, YELLOW_FADE_TEXT);

return s;

}

struct color

{

color(WORD attribute) : m_color(attribute) { };

WORD m_color;

ostream& operator()(ostream& os)

{

return os;

}

};

template

basic_ostream<_Elem,_Traits>& operator<<(basic_ostream<_Elem,_Traits>& s,

color& c)

{

HANDLE hStdout=GetStdHandle(STD_OUTPUT_HANDLE);

SetConsoleTextAttribute( hStdout, c.m_color );

return s;

}

Step by Step Solution

There are 3 Steps involved in it

Step: 1

blur-text-image

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

blur-text-image

Step: 3

blur-text-image

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

Database Fundamentals Study Guide

Authors: Dr. Sergio Pisano

1st Edition

B09K1WW84J, 979-8985115307

More Books

Students also viewed these Databases questions