Answered step by step
Verified Expert Solution
Link Copied!

Question

1 Approved Answer

package index; import java.io.IOException; import java.io.Reader; import java.util.List; import java.util.Set; import comparators.TfIdfComparator; import documents.DocumentId; /** * A simplified document indexer and search engine. * *

package index;

import java.io.IOException;

import java.io.Reader;

import java.util.List;

import java.util.Set;

import comparators.TfIdfComparator;

import documents.DocumentId;

/**

* A simplified document indexer and search engine.

*

* Documents are added to the engine one-by-one, and uniquely identified by a DocumentId.

*

* Documents are internally represented as "terms", which are lowercased versions of each word

* in the document.

*

* Queries for terms are also made on the lowercased version of the term. Terms are

* therefore case-insensitive.

*

* Lookups for documents can be done by term, and the most relevant document(s) to a specific term

* (as computed by tf-idf) can also be retrieved.

*

* See:

* -

* -

* -

*

* @author Marc Liberatore

*

*/

public class SearchEngine {

/**

* Inserts a document into the search engine for later analysis and retrieval.

*

* The document is uniquely identified by a documentId; attempts to re-insert the same

* document are ignored.

*

* The document is supplied as a Reader; this method stores the document contents for

* later analysis and retrieval.

*

* @param documentId

* @param reader

* @throws IOException iff the reader throws an exception

*/

public void addDocument(DocumentId documentId, Reader reader) throws IOException {

}

/**

* Returns the set of DocumentIds contained within the search engine that contain a given term.

*

* @param term

* @return the set of DocumentIds that contain a given term

*/

public Set indexLookup(String term) {

return null;

}

/**

* Returns the term frequency of a term in a particular document.

*

* The term frequency is number of times the term appears in a document.

*

* See

* @param documentId

* @param term

* @return the term frequency of a term in a particular document

* @throws IllegalArgumentException if the documentId has not been added to the engine

*/

public int termFrequency(DocumentId documentId, String term) throws IllegalArgumentException {

return 0;

}

/**

* Returns the inverse document frequency of a term across all documents in the index.

*

* For our purposes, IDF is defined as log ((1 + N) / (1 + M)) where

* N is the number of documents in total, and M

* is the number of documents where the term appears.

*

* @param term

* @return the inverse document frequency of term

*/

public double inverseDocumentFrequency(String term) {

// first calculate N, the number of documents plus one

// loop through all of the documents to calculate M

// finally, calculate and return log ((1 + N) / (1 + M))

// use Math.log to compute the logarithm

return 0;

}

/**

* Returns the tfidf score of a particular term for a particular document.

*

* tfidf is the product of term frequency and inverse document frequency for the given term and document.

*

* @param documentId

* @param term

* @return the tfidf of the the term/document

* @throws IllegalArgumentException if the documentId has not been added to the engine

*/

public double tfIdf(DocumentId documentId, String term) throws IllegalArgumentException {

return 0.0;

}

/**

* Returns a sorted list of documents, most relevant to least relevant, for the given term.

*

* A document with a larger tfidf score is more relevant than a document with a lower tfidf score.

*

* Each document in the returned list must contain the term.

*

* @param term

* @param max the maximum number of documents to return (you may return fewer)

* @return a list of documents sorted in descending order by tfidf

*/

public List relevanceLookup(String term, int max) {

return null;

}

}

Step by Step Solution

There are 3 Steps involved in it

Step: 1

blur-text-image

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

blur-text-image

Step: 3

blur-text-image

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

Spatial Databases A Tour

Authors: Shashi Shekhar, Sanjay Chawla

1st Edition

0130174807, 978-0130174802

More Books

Students also viewed these Databases questions