Question
after the header, each line of the database file rebase210.txt contains the name of a restriction enzyme and possible DNA sites the enzyme may cut
after the header, each line of the database file rebase210.txt contains the name of a restriction enzyme and possible DNA sites the enzyme may cut (cut location is indicated by a ) in the following format: enzyme_acronym/recognition_sequence//recognition_sequence//
For instance the first few lines of rebase210.txt are: AanI/TTA'TAA// AarI/CACCTGCNNNN'NNNN/'NNNNNNNNGCAGGTG// AasI/GACNNNN'NNGTC// AatII/GACGT'C// AbsI/CC'TCGAGG// AccI/GT'MKAC// AccII/CG'CG// AccIII/T'CCGGA// Acc16I/TGC'GCA// Acc36I/ACCTGCNNNN'NNNN/'NNNNNNNNGCAGGT//
That means that each line contains one enzyme acronym associated with one or more recognition sequences. For example on line 2:The enzyme acronym AarI corresponds to the two recognition sequences CACCTGCNNNN'NNNN and 'NNNNNNNNGCAGGTG.
Part (a) You will create a parser to read in this database and construct an AVL tree. For each line of the database and for each recognition sequence in that line, you will create a new SequenceMap object that contains the recognition sequence as its recognition_sequence_ and the enzyme acronym as the only string of its enzyme_acronyms_, and you will insert this object into the tree. This is explained with the following pseudo code:
Tree
while (GetNextLineFromDatabaseFile(db_line)) {
// Get the first part of the line:
string an_enz_acro = GetEnzymeAcronym(db_line); string a_reco_seq; while (GetNextRegocnitionSequence(db_line, a_rego_seq){ SequenceMap new_sequence_map(a_reco_seq, an_enz_acro); a_tree.insert(new_sequence_map); } }
In the case that the new_sequence_map.recognition_sequence_ equals the recognition_sequence_ of a node X in the tree, then the search trees insert() function will call the X.Merge(new_sequence_map) function of the existing element. This will have the effect of updating the enzyme_acronym_ of X. Note, that this will be part of the functionality of the insert() function. The Merge() will only be called in case of duplicates as described above. Otherwise, no Merge() is required and the new_sequence_map will be inserted into the tree. To implement the above, write a test program named query_tree which will use your parser to create a search tree and then allow the user to query it using a recognition sequence. If that sequence exists in the tree then this routine should print all the corresponding enzymes that correspond to that recognition sequence. Your programs should run from the terminal as follows: The user should enter THREE strings (supposed to be recognition sequences) for instance:
CC'TCGAGG
TTA'TAA
TC'C
Your program should print in the standard output their associated enzyme acronyms. In the above example the output will be
AbsI
AanI PsiI
Not Found
I will test it with a file containing three strings and run your code like that: To make things easier the following is the already written SequenceMap class to which this part is related with
#ifndef SEQUENCEMAP_H #define SEQUENCEMAP_H
class SequenceMap { public: SequenceMap() = default; SequenceMap(const SequenceMap &rhs) = default; SequenceMap& operator=(const SequenceMap &rhs) = default; SequenceMap(SequenceMap &&rhs) = default; SequenceMap& operator=(SequenceMap &&rhs) = default; ~SequenceMap() = default; SequenceMap(const string &a_rec_seq, const string &an_enz_acro) { recognition_sequence_ = a_rec_seq; enzyme_acronyms_.push_back(an_enz_acro); } bool operator<(const SequenceMap &rhs) const { return (recognition_sequence_ < rhs.recognition_sequence_); } friend std::ostream &operator<<(std::ostream &out, const SequenceMap &a_SequenceMap) { out << a_SequenceMap.recognition_sequence_ << " "; for (int i = 0; i < a_SequenceMap.enzyme_acronyms_.size(); ++i) { out << a_SequenceMap.enzyme_acronyms_[i] << " "; } return out; } void Merge(const SequenceMap &other_sequence) { for (int i = 0; i < other_sequence.enzyme_acronyms_.size(); ++i) { enzyme_acronyms_.push_back(other_sequence.enzyme_acronyms_[i]); } } private: string recognition_sequence_ ; vector
#endif
And here is the avl_tree.h to whcih you have to insert.
#ifndef AVL_TREE_H #define AVL_TREE_H
template
AvlTree( AvlTree && rhs ) : root{ rhs.root } { rhs.root = nullptr; } ~AvlTree( ) { makeEmpty( ); }
/** * Deep copy. */ AvlTree & operator=( const AvlTree & rhs ) { AvlTree copy = rhs; std::swap( *this, copy ); return *this; } /** * Move. */ AvlTree & operator=( AvlTree && rhs ) { std::swap( root, rhs.root ); return *this; } /** * Find the smallest item in the tree. */ const Comparable & findMin( ) const { if( isEmpty( ) ) throw UnderflowException{ }; return findMin( root )->element; }
/** * Find the largest item in the tree. */ const Comparable & findMax( ) const { if( isEmpty( ) ) throw UnderflowException{ }; return findMax( root )->element; }
/** * Returns true if x is found in the tree. */ bool contains( const Comparable & x ) const { return contains( x, root ); } void insert( const Comparable & x ) { insert( x, root ); } /** * Insert x into the tree; duplicates are ignored. */ void insert( Comparable && x ) { insert( std::move( x ), root ); }
private: struct AvlNode { Comparable element; AvlNode *left; AvlNode *right; int height;
AvlNode( const Comparable & ele, AvlNode *lt, AvlNode *rt, int h = 0 ) : element{ ele }, left{ lt }, right{ rt }, height{ h } { } AvlNode( Comparable && ele, AvlNode *lt, AvlNode *rt, int h = 0 ) : element{ std::move( ele ) }, left{ lt }, right{ rt }, height{ h } { } };
AvlNode *root; /** * Internal method to insert into a subtree. */ void insert( const Comparable & x, AvlNode * & t ) { if( t == nullptr ) t = new AvlNode{ x, nullptr, nullptr }; else if( x < t->element ) insert( x, t->left ); else if( t->element < x ) insert( x, t->right ); balance( t ); }
/** * Internal method to insert into a subtree. */ void insert( Comparable && x, AvlNode * & t ) { if( t == nullptr ) t = new AvlNode{ std::move( x ), nullptr, nullptr }; else if( x < t->element ) insert( std::move( x ), t->left ); else if( t->element < x ) insert( std::move( x ), t->right ); balance( t ); } static const int ALLOWED_IMBALANCE = 1;
// Assume t is balanced or within one of being balanced void balance( AvlNode * & t ) { if( t == nullptr ) return; if( height( t->left ) - height( t->right ) > ALLOWED_IMBALANCE ) { if( height( t->left->left ) >= height( t->left->right ) ) rotateWithLeftChild( t ); else doubleWithLeftChild( t ); } else if( height( t->right ) - height( t->left ) > ALLOWED_IMBALANCE ) { if( height( t->right->right ) >= height( t->right->left ) ) rotateWithRightChild( t ); else doubleWithRightChild( t ); } t->height = max( height( t->left ), height( t->right ) ) + 1; } /** * Internal method to find the smallest item in a subtree t. */ AvlNode * findMin( AvlNode *t ) const { if( t == nullptr ) return nullptr; if( t->left == nullptr ) return t; return findMin( t->left ); } AvlNode * findMax( AvlNode *t ) const { if( t != nullptr ) while( t->right != nullptr ) t = t->right; return t; }
/** * Internal method to test if an item is in a subtree. */ bool contains( const Comparable & x, AvlNode *t ) const { if( t == nullptr ) return false; else if( x < t->element ) return contains( x, t->left ); else if( t->element < x ) return contains( x, t->right ); else return true; // Match } int height( AvlNode *t ) const { return t == nullptr ? -1 : t->height; } void rotateWithLeftChild( AvlNode * & k2 ) { AvlNode *k1 = k2->left; k2->left = k1->right; k1->right = k2; k2->height = max( height( k2->left ), height( k2->right ) ) + 1; k1->height = max( height( k1->left ), k2->height ) + 1; k2 = k1; } void doubleWithLeftChild( AvlNode * & k3 ) { rotateWithRightChild( k3->left ); rotateWithLeftChild( k3 ); } };
#endif
//
#include "avl_tree.h" #include "sequence_map.h" #include
namespace {
// @db_filename: an input filename. // @a_tree: an input tree of the type TreeType. It is assumed to be // empty. template
// Get the first part of the line:
string an_enz_acro = GetEnzymeAcronym(db_line); string a_reco_seq; while (GetNextRegocnitionSequence(db_line, a_rego_seq){ SequenceMap new_sequence_map(a_reco_seq, an_enz_acro); a_tree.insert(new_sequence_map); } // End second while.
}
// End first while. // Code for running Part2(a) // You can use public functions of TreeType. For example: a_tree.insert(10); a_tree.printTree(); }
} // namespace
int main(int argc, char **argv) { if (argc != 2) { cout << "Usage: " << argv[0] << "
Thank you so much. pretty much most of the code is written but please help me with the parser and insertion. thank you. Plz make any needed changes to the existing code.
Plz just make sure that the input and expected output are expected as indicated above.
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started