Question
Rose, a local software engineer, was assigned to fix the code. She is known for her good design and coding skills. While she is usually
Rose, a local software engineer, was assigned to fix the code. She is known for her good design and coding skills. While she is usually very rational, she has decided that it is not worth fixing Jack Hackers code below. Shed rather rewrite it from scratch! Rose is right the code below doesnt completely work, nor is it particularly well written or efficient. For Part 1 of this assignment, identify any 10 problems with this code that make it bad code.
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
// File: cnnCrawler.java
//
// This code looks at the CNN website and follows some links to get info on articles that I want more
// info on.
// All output is written in the working directory to: cnnCrawlerOutput.txt
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
import gnu.regexp.*;
import java.net.*;
import java.io.*;
public class cnnCrawler{
public static void main(String[] args)
{
StringBuffer basePage = new StringBuffer();
// Connect to CNN and get the document
basePage = getBasePageContents("http://www.cnn.com");
// Look at the area of interest (The "MORE FROM CNN" section)
basePage = initialIsolateBasePageContents(basePage);
// Pull all of the URLs out
basePage = getInfo(basePage, " ]*|/b>]*");
basePage = getInfo(basePage, "\"/[^(\")]*");
basePage = getInfo(basePage,"\"[^&]*");
// Go to the URLs and pull out the information of interest and
// write to file.
goToURLs(basePage);
}
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
// Method: getBasePageContents
//
// This method opens a connection to the webpage we are interested in and stores
// all of the text on the page
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
public static StringBuffer getBasePageContents(String myURL){
try{
// Set base document to CNN, open connection,
// and copy the source text into a buffer
URL cnnBaseDoc = new URL(myURL);
cnnBaseDoc.openConnection();
BufferedReader cnnBaseBuffer = new BufferedReader(
new InputStreamReader(
cnnBaseDoc.openStream()));
String cnnBaseInputLine;
StringBuffer tempDocument = new StringBuffer();
while ((cnnBaseInputLine = cnnBaseBuffer.readLine()) != null){
tempDocument.append(cnnBaseInputLine);
}
cnnBaseBuffer.close();
return(tempDocument);
}
catch(MalformedURLException e) {
System.out.println("Unable to create URL object");
return(null);
}
catch(IOException e){
System.out.println("Unable to open URL");
return(null);
}
}
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
// Method: initialIsolateBasePageContents
//
// This method isolates us to store only the section we are interest in --
// the "MORE FROM CNN" section
//
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
public static StringBuffer initialIsolateBasePageContents(StringBuffer basePage){
try{
RE document = new RE(basePage);
// Define the left and right isolators
String sLeft = new String("MORE FROM CNN[//w//W]*");
RE leftCntxt = new RE(sLeft);
RE rightCntxt= new RE(">SPORTS");
StringBuffer sLIsolator = new StringBuffer("");
int iLIsolatorIndex = 0;
RE regLIsolator = new RE(leftCntxt);
REMatch ctxtLMatch = regLIsolator.getMatch(basePage);
sLIsolator.append(ctxtLMatch.toString());
iLIsolatorIndex = ctxtLMatch.getStartIndex();
// Find the Right Isolator
StringBuffer sRIsolator = new StringBuffer();
RE regRIsolator = new RE(rightCntxt);
int iRIsolatorIndex = 0;
REMatch ctxtRMatch = regRIsolator.getMatch(basePage);
sRIsolator.append(ctxtRMatch.toString());
iRIsolatorIndex = ctxtRMatch.getStartIndex();
basePage.delete(iRIsolatorIndex, basePage.length());
basePage.delete(0, iLIsolatorIndex);
return(basePage);
}
catch(REException e){
System.out.println("RE Exception");
return(null);
}
}
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
// Method: getInfo
//
// This method applies the specified regular expression to the string passed in
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
public static StringBuffer getInfo(StringBuffer textToSearch, String regExp){
try{
StringBuffer sIsolated = new StringBuffer("");
int iLIsolatorIndex = 0;
String sLeft = new String(regExp);
RE leftCntxt = new RE(sLeft);
RE regLIsolator = new RE(leftCntxt);
REMatchEnumeration ctxtLMatch = regLIsolator.getMatchEnumeration(textToSearch);
while (ctxtLMatch.hasMoreMatches()){
sIsolated.append(ctxtLMatch.nextMatch().toString());
sIsolated.append(" ");
}
return(sIsolated);
}
catch(REException e){
System.out.println("RE Exception");
return(null);
}
}
public static void goToURLs(StringBuffer textToSearch)
{
try{
StringBuffer interestingDoc = new StringBuffer("");
StringBuffer sInfoForFile = new StringBuffer("");
int numPage=0;
FileOutputStream fCnnOut;
PrintStream pCnnOut;
String sLeft = new String("/[^\"]*");
RE leftCntxt = new RE(sLeft);
String sIsolated = new String();
int iLIsolatorIndex = 0;
RE regLIsolator = new RE(leftCntxt);
REMatchEnumeration ctxtLMatch = regLIsolator.getMatchEnumeration(textToSearch);
fCnnOut = new FileOutputStream("cnnCrawlerOutput.txt");
pCnnOut = new PrintStream(fCnnOut);
while (ctxtLMatch.hasMoreMatches())
{
numPage++;
sIsolated = "http://www.cnn.com";
sIsolated += (ctxtLMatch.nextMatch().toString());
interestingDoc = connectToURLs(sIsolated);
sInfoForFile = getDocInfo(interestingDoc, sIsolated, numPage);
pCnnOut.println (sInfoForFile);
}
pCnnOut.close();
System.out.println("You may view the output in file: cnnCrawlerOutput.txt.");
}
catch(REException e){
System.out.println("RE Exception");
}
catch (Exception e)
{
System.out.println ("Error writing file.");
}
}
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
// Method: connectToURLs
// This method opens a URL and returns the text of the page
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
public static StringBuffer connectToURLs(String urlText){
try{
URL cnnBaseDoc = new URL(urlText);
cnnBaseDoc.openConnection();
BufferedReader cnnBaseBuffer = new BufferedReader(
new InputStreamReader(
cnnBaseDoc.openStream()));
String cnnBaseInputLine;
StringBuffer tempDocument = new StringBuffer();
while ((cnnBaseInputLine = cnnBaseBuffer.readLine()) != null){
tempDocument.append(cnnBaseInputLine);
}
cnnBaseBuffer.close();
return(tempDocument);
}
catch(MalformedURLException e) {
System.out.println("Unable to create URL object");
return(null);
}
catch(IOException e){
System.out.println("Unable to open URL");
return(null);
}
}
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
// Method: getDocInfo
//
// This method returns the interesting information that we were asked to parse out
// including: Date, Place, Headline, URL, and First paragraph.
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
public static StringBuffer getDocInfo(StringBuffer doc, String URL, int ID){
StringBuffer importantInfoToReturn = new StringBuffer("");
StringBuffer Headline = new StringBuffer("");
StringBuffer Date = new StringBuffer("");
StringBuffer Place = new StringBuffer("");
StringBuffer FirstParagraph = new StringBuffer("");
URL = URL.substring(0, (URL.length()-1));
Date.append(getInfo(doc, "name=\"DATE\" content=\"[^>]*"));
if(Date.length() > 0){
Date.delete(0,21);
Date.delete((Date.length()-1), Date.length());
}
else{
Date.append("No date Reported.");
}
Place.append(getInfo(doc, "
[^(
)]*|
[^-]*"));
if(Place.length() > 0){
Place.delete(0,6);
}
else{
Place.append("No location Reported.");
}
Headline.append(getInfo(doc, "
if(Headline.length() > 0){
Headline.delete(0,17);
Headline.delete((Headline.length()-1), Headline.length());
}
else{
Headline.append("No headline Reported.");
}
FirstParagraph.append(getInfo(doc, "DESCRIPTION\" content=[^>]*"));
if(FirstParagraph.length() > 0){
FirstParagraph.delete(0, 22);
FirstParagraph.delete(FirstParagraph.length()-1, FirstParagraph.length());
}
importantInfoToReturn.append(" ");
importantInfoToReturn.append((ID + " | "));
importantInfoToReturn.append((Headline + " | "));
importantInfoToReturn.append((URL + " | "));
importantInfoToReturn.append((Date + " | "));
importantInfoToReturn.append((Place + " | "));
importantInfoToReturn.append((FirstParagraph));
return(importantInfoToReturn);
}
}
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started