Answered step by step
Verified Expert Solution
Link Copied!

Question

1 Approved Answer

The code is in Python ranking.py import json import typing def term_count(query: str, document: str) -> int: count = 0 query_terms = query.lower().split() document_terms =

The code is in Python

ranking.py

import json

import typing

def term_count(query: str, document: str) -> int:

count = 0

query_terms = query.lower().split()

document_terms = document.lower().split()

for query_term in query_terms:

for document_term in document_terms:

if query_term == document_term:

count += 1

return count

def boolean_term_count(query: str, document: str) -> int:

count = 0

query_terms = query.lower().split()

document_terms = document.lower().split()

for term in query_terms:

if term in document_terms:

count += 1

return count

def search(query: str, documents: typing.List[str]) -> typing.List[str]:

counts = dict()

for i, doc in enumerate(documents):

counts[i] = term_count(query=query, document=doc)

indexes = sorted(range(len(documents)), key=counts.get, reverse=True)

return [documents[i] for i in indexes]

def run_search():

with open(r'C:\Users\Alex\Documents\DePaul\datasets\wiki_small\wiki_small.json') as fp:

data = json.load(fp)

documents = [record['init_text'] for record in data]

query = input("Please enter a query:")

while query:

print(search(query, documents))

query = input("Please enter a query:")

matching.py

import typing

def search(query: str, documents: typing.List[str]) -> typing.List[str]:

"""

Naive search implementation.

:param query: The text to search for.

:param documents: A list of strings representing documents that we are searching over.

:return: Documents matching the query.

"""

# The code in this function is equivalent to the following list comprehension:

# return [doc for doc in documents if boolean_term_match(query, doc)]

out = []

for doc in documents:

if boolean_term_match(query=query, document=doc):

out.append(doc)

return out

def string_match(query: str, document: str) -> bool:

"""

Implements document matching by checking if the query is a substring of the document.

:param query: The text a user searched for.

:param document: A candidate document.

:return: True if the document matches the query and False otherwise.

"""

return query in document

def boolean_term_match(query: str, document: str) -> bool:

"""

Boolean matching function.

:param query: The text a user searched for.

:param document: A candidate document.

:return: True if all terms in the query are also in the document and False otherwise.

"""

query_terms: typing.List[str] = query.lower().split()

document_terms: typing.List[str] = document.lower().split()

for term in query_terms:

if term not in document_terms:

return False

return True

indexing_process

import json

import typing

class Document(typing.NamedTuple):

doc_id: str

text: str

class DocumentCollection:

def __init__(self):

self.docs: typing.List[Document] = []

def add_document(self, doc: Document):

self.docs.append(doc)

def get_all_docs(self) -> typing.List[Document]:

return self.docs

class TransformedDocument(typing.NamedTuple):

doc_id: str

tokens: typing.List[str]

class TransformedDocumentCollection:

def __init__(self):

self.docs: typing.List[TransformedDocument] = []

def add_document(self, doc: TransformedDocument):

self.docs.append(doc)

def write(self, path: str):

json_data = {'docs': [td._asdict() for td in self.docs]}

with open(path, 'w') as fp:

json.dump(obj=json_data, fp=fp)

@staticmethod

def read(path: str) -> 'TransformedDocumentCollection':

out = TransformedDocumentCollection()

with open(path) as fp:

collection_dict = json.load(fp)

doc_records = collection_dict['docs']

for record in doc_records:

doc = TransformedDocument(doc_id=record['doc_id'], tokens=record['tokens'])

out.add_document(doc)

return out

class Index:

pass

class WikiSource:

DEFAULT_PATH = r'C:\Users\Alex\Documents\DePaul\datasets\wiki_small\wiki_small.json'

def read_documents(self, data_file_path: str = DEFAULT_PATH) -> DocumentCollection:

with open(data_file_path) as fp:

doc_records = json.load(fp)

doc_collection = DocumentCollection()

for record in doc_records:

doc = Document(doc_id=record['id'], text=record['init_text'])

doc_collection.add_document(doc)

return doc_collection

def tokenize(document_text: str) -> typing.List[str]:

return document_text.lower().split()

def transform_documents(document_collection: DocumentCollection) -> TransformedDocumentCollection:

docs = document_collection.get_all_docs()

out = TransformedDocumentCollection()

for d in docs:

tokens = tokenize(d.text)

transformed_doc = TransformedDocument(doc_id=d.doc_id, tokens=tokens)

out.add_document(transformed_doc)

return out

def create_index(transformed_documents):

pass

def indexing_process(document_source: WikiSource) -> (DocumentCollection, Index):

document_collection = document_source.read_documents()

transformed_documents = transform_documents(document_collection)

# transformed_documents.write(path='')

index = create_index(transformed_documents)

return (document_collection, index)

Problem 1: Please describe the main difference between the search implementation in ranking.py and in matching.py. What would you do if you were asked to produce just the best 10 results for a query in each implementation.

Please add all of the following to the indexing_process.py file we have worked on during the lectures

Step by Step Solution

There are 3 Steps involved in it

Step: 1

blur-text-image

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

blur-text-image

Step: 3

blur-text-image

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

More Books

Students also viewed these Databases questions

Question

a. Describe the encounter. What made it intercultural?

Answered: 1 week ago