Answered step by step
Verified Expert Solution
Link Copied!

Question

1 Approved Answer

I need to make text analyzer class using the following methods. I must comment on code. Here is part 1 completed. import requests, re

I need to make text analyzer class using the following methods. I must comment on code.

Here is part 1 completed.

 

import requests, re

from bs4 import BeautifulSoup

from collections import Counter

import statistics as stats

import string


 

#I must make my class here


 

import operator

import matplotlib.pyplot as plt; plt.rcdefaults()


 

class TextAnalyzer:

   def __init__(self, src, src_type="discover"):

       self._src_type = None

       self._content = None

       self._orig_content = None

   

# determine src_type if not specified

       if src_type == "discover":

           if src.startswith("http"):

               src_type = "url"

           elif src.endswith(".txt"):

               src_type = "path"

           else:

               src_type = "text"

               

       self._src_type = src_type

       

       # load content based on src_type

       if self._src_type == "url":

           response = requests.get(src)

           self._orig_content = response.text

       elif self._src_type == "path":

           with open(src, "r") as f:

               self._orig_content = f.read()

       elif self._src_type == "text":

           self._orig_content = src

       

       # preprocess content

       self._content = self._preprocess(self._orig_content)

   

   def _preprocess(self, text):

       # remove punctuation

       text = text.translate(str.maketrans("", "", string.punctuation))

       # remove whitespace

       text = re.sub(r"\s+", " ", text)

       # convert to lowercase

       text = text.lower()

       return text

 def __init__(self, src, src_type="discover"):

       self._src_type = None

       self._content = None

       self._orig_content = None

       

       # determine src_type if not specified

       if src_type == "discover":

           if src.startswith("http"):

               src_type = "url"

           elif src.endswith(".txt"):

               src_type = "path"

           else:

               src_type = "text"

               

       self._src_type = src_type

       

       # load content based on src_type

       if self._src_type == "url":

           response = requests.get(src)

           self._orig_content = response.text

       elif self._src_type == "path":

           with open(src, "r") as f:

               self._orig_content = f.read()

       elif self._src_type == "text":

           self._orig_content = src

       

       # preprocess content

       self._content = self._preprocess(self._orig_content)

   

   def _preprocess(self, text):

       # remove punctuation

       text = text.translate(str.maketrans("", "", string.punctuation))

       # remove whitespace

       text = re.sub(r"\s+", " ", text)

       # convert to lowercase

       text = text.lower()

       return text

 

 

This is what i need to do for part 2.

 

set_content_to_tag(self, tag, tag_id=None)

Changes _content to the text within a specific element of an HTML document.

Keyword arguments:

  • tag (str) - Tag to read
  • tag_id (str) - ID of tag to read

It's possible the HTML does not contain the tag being searched. I should use exception handling to catch any errors.

 

This is what i should do for part 3

reset_content(self)

Resets _content to full text that was originally loaded. Useful after a call to set_content_to_tag().

Step by Step Solution

There are 3 Steps involved in it

Step: 1

blur-text-image

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

blur-text-image

Step: 3

blur-text-image

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

Income Tax Fundamentals 2013

Authors: Gerald E. Whittenburg, Martha Altus Buller, Steven L Gill

31st Edition

1111972516, 978-1285586618, 1285586611, 978-1285613109, 978-1111972516

More Books

Students also viewed these Programming questions