Question

1 Approved Answer

Posted on Sep 23, 2024

Modify the (attached) sample code to accept valid sentences based upon the newly defined grammar above. The parser should also reject invalid sentences with a

Modify the (attached) sample code to accept valid sentences based upon the newly defined grammar above. The parser should also "reject" invalid sentences with a descriptive error message. Note that the program should still accept a filename from the "Command Line" as illustrated in the example. Please no HARD-CODED file paths in the source.

public class SyntaxAnalyzer {

private LexicalAnalyzer lexer; // The lexer which will provide the tokens

/** * The constructor initializes the terminal literals in their vectors. */ public SyntaxAnalyzer(LexicalAnalyzer lexer) { this.lexer = lexer; }

/** * Begin analyzing... */ public void analyze() throws ParseException { parseSentance(0); }

// This method implements the BNF rule for a sentence from Section 2.2. // ::= protected void parseSentance(int treeDepth) throws ParseException { log("", treeDepth++);

NounPhrase(treeDepth); Verb(treeDepth); NounPhrase(treeDepth); }

// This method implements the BNF rule for a noun phrase from Section 2.2. // ::= protected void NounPhrase(int treeDepth) throws ParseException { log("", treeDepth++);

Article(treeDepth); Noun(treeDepth); }

// This method implements the BNF rule for a verb from Section 2.2. // ::= loves | hates | eats protected void Verb(int treeDepth) throws ParseException { log(" = " + lexer.lexemeBuffer, treeDepth);

if (TOKEN.VERB != lexer.curToken) { String msg = "A verb was expected when '" + lexer.lexemeBuffer + "' was found."; throw new ParseException(msg); }

lexer.parseNextToken(); }

// This method implements the BNF rule for a noun from Section 2.2. // ::= dog | cat | rat protected void Noun(int treeDepth) throws ParseException { log(" = " + lexer.lexemeBuffer, treeDepth);

if (TOKEN.NOUN != lexer.curToken) { String msg = "A noun was expected when '" + lexer.lexemeBuffer + "' was found."; throw new ParseException(msg); }

lexer.parseNextToken(); }

// This method implements the BNF rule for an article from Section 2.2. // ::= a | the protected void Article(int treeDepth) throws ParseException { log(" = " + lexer.lexemeBuffer, treeDepth);

if (TOKEN.ARTICLE != lexer.curToken) { String msg = "An article was expected when '" + lexer.lexemeBuffer + "' was found."; throw new ParseException(msg); }

lexer.parseNextToken(); }

private void log(String msg, int treeDepth) { for (int i = 0; i < treeDepth; i++) { System.out.print(" "); } System.out.println(msg); } }

public enum TOKEN {

ARTICLE("a", "the"), // a list of articles NOUN("dog", "cat", "rat"), // a list of nouns VERB("loves", "hates", "eats"), // a list of verbs UNKNOWN(); // keep our lexemes "type-safe"! // // The lexemes under this token private List lexemeList;

// Construct the token with the list of lexems private TOKEN(String... tokenStrings) { lexemeList = new ArrayList<>(tokenStrings.length); lexemeList.addAll(Arrays.asList(tokenStrings)); }

// Gets a token from a lexeme public static TOKEN fromLexeme(String str) { // Search through the lexemes looking for a match. for (TOKEN t : TOKEN.values()) { if (t.lexemeList.contains(str)) { return t; } }

// If nothing matches then return UNKNOWN. return UNKNOWN; } }

public class LexicalAnalyzer {

private String sourceLine; private char nextChar; private int curPosition; protected TOKEN curToken; protected StringBuilder lexemeBuffer;

/** * The main driver of this class. This method takes a "program", in this * case a single line of text in the form of a sentence, and gets the first * lexeme/token. */ public void start(String line) throws ParseException { sourceLine = line; curPosition = 0;

getChar(); parseNextToken(); }

/** * This method does a character-by-character analysis to get the next token * and set it in the Compiler class's currentToken global String variable. * This simple lexical analyzer does not differentiate between letters, * digits and other special characters - it simply looks for characters, * spaces and end of line characters to determine relevant tokens. */ public void parseNextToken() throws ParseException { resetLexemeBuffer();

// Ignore spaces and add the first character to the token getNextNonBlank(); addChar(); getChar();

// Continue gathering characters for the token while ((nextChar != ' ') && (nextChar != ' ')) { addChar(); getChar(); }

// Convert the gathered character array token into a String String lexeme = lexemeBuffer.toString();

// Set the new token this.curToken = TOKEN.fromLexeme(lexeme); }

/** * This method gets the next character from the "program" string. */ private void getChar() { if (curPosition < sourceLine.length()) { nextChar = sourceLine.charAt(curPosition); curPosition++; } else { nextChar = ' '; } }

/** * A (trivial) helper method to determine if the current character is a space. */ private boolean isSpace(char c) { return (c == ' '); }

/** * A helper method to get the next non-blank character. */ private void getNextNonBlank() { while (isSpace(nextChar)) { getChar(); } }

/** * This method adds the current character the the token after checking to * make sure that the length of the token isn't too long, a lexical error in * this case. */ private void addChar() throws ParseException { if (lexemeBuffer.length() <= 98) { lexemeBuffer.append(nextChar); } else { throw new ParseException("LEXICAL ERROR: The found lexeme is too long! ");

/* Code to Skip Lexical errors instead of throwing an error... Good for debugging, but problematic for (hopefully) obvious reasons. */ // System.out.println("LEXICAL ERROR: The found lexeme is too long! -- Skipping"); // resetLexemeBuffer(); // // if (!isSpace(nextChar)) { // while (!isSpace(nextChar)) { // getChar(); // } // } // // getNonBlank(); // addChar(); } }

/** * Simple method to reset the lexeme buffer. */ private void resetLexemeBuffer() { lexemeBuffer = new StringBuilder(); } }

import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException;

public class Compiler {

/** * It is assumed that the first argument provided is the name of the source file that is to be * "compiled". */ public static void main(String[] args) throws IOException { args = new String[]{"grammar_sheier1.txt"}; //args = new String[]{"D:\\Version_Controlled\\_SVN_\\Towson\\COSC455\\Java\\ParserSample\\input.txt"};

if (args.length < 1) { System.out.println("Need a filename!"); } else { // Java 7 "try-with-resource" to create the file input buffer. try (BufferedReader br = new BufferedReader(new FileReader(args[0]))) { // Create the new lexer. LexicalAnalyzer lexer = new LexicalAnalyzer();

// Start lexing and parsing. processFile(lexer, br); } } }

/** * Reads each line of the input file and invokes the lexer and parser for each. */ static void processFile(LexicalAnalyzer lexer, BufferedReader br) throws IOException { String sourceLine;

// Read each line in the source file to be compiled as a unique sentence // to check against the grammar. while ((sourceLine = br.readLine()) != null) { // Ignore empty lines and comments. if (sourceLine.trim().length() <= 0) { continue; } if (sourceLine.trim().startsWith("#")) { System.out.println("Comment: " + sourceLine.substring(1).trim()); continue; }

// Create a new syntax analyzer over the provided lexer. SyntaxAnalyzer parser = new SyntaxAnalyzer(lexer);

// Parse the given sentence against the given grammar. We assume that the // sentence, , production is the start state. try { // Start the lexer... lexer.start(sourceLine);

// Start the parser... parser.analyze(); // No exceptions, so we must be good! System.out.printf("The sentence '%s' follows the BNF grammar.%n", sourceLine); } catch (ParseException error) { // If a syntax error was found, print that the sentence does not follow the grammar. System.out.printf("SYNTAX ERROR while processing: '%s'%n", sourceLine); System.out.printf("ERROR MSG: %s%n", error.getErrMsg()); }

System.out.println("-----------------------------------------------------------"); } } }