Question
I need to make text analyzer class using the following methods. I must comment on code. Here is part 1 completed. import requests, re
I need to make text analyzer class using the following methods. I must comment on code.
Here is part 1 completed.
import requests, re
from bs4 import BeautifulSoup
from collections import Counter
import statistics as stats
import string
#I must make my class here
import operator
import matplotlib.pyplot as plt; plt.rcdefaults()
class TextAnalyzer:
def __init__(self, src, src_type="discover"):
self._src_type = None
self._content = None
self._orig_content = None
# determine src_type if not specified
if src_type == "discover":
if src.startswith("http"):
src_type = "url"
elif src.endswith(".txt"):
src_type = "path"
else:
src_type = "text"
self._src_type = src_type
# load content based on src_type
if self._src_type == "url":
response = requests.get(src)
self._orig_content = response.text
elif self._src_type == "path":
with open(src, "r") as f:
self._orig_content = f.read()
elif self._src_type == "text":
self._orig_content = src
# preprocess content
self._content = self._preprocess(self._orig_content)
def _preprocess(self, text):
# remove punctuation
text = text.translate(str.maketrans("", "", string.punctuation))
# remove whitespace
text = re.sub(r"\s+", " ", text)
# convert to lowercase
text = text.lower()
return text
def __init__(self, src, src_type="discover"):
self._src_type = None
self._content = None
self._orig_content = None
# determine src_type if not specified
if src_type == "discover":
if src.startswith("http"):
src_type = "url"
elif src.endswith(".txt"):
src_type = "path"
else:
src_type = "text"
self._src_type = src_type
# load content based on src_type
if self._src_type == "url":
response = requests.get(src)
self._orig_content = response.text
elif self._src_type == "path":
with open(src, "r") as f:
self._orig_content = f.read()
elif self._src_type == "text":
self._orig_content = src
# preprocess content
self._content = self._preprocess(self._orig_content)
def _preprocess(self, text):
# remove punctuation
text = text.translate(str.maketrans("", "", string.punctuation))
# remove whitespace
text = re.sub(r"\s+", " ", text)
# convert to lowercase
text = text.lower()
return text
This is what i need to do for part 2.
set_content_to_tag(self, tag, tag_id=None)
Changes _content to the text within a specific element of an HTML document.
Keyword arguments:
- tag (str) - Tag to read
- tag_id (str) - ID of tag to read
It's possible the HTML does not contain the tag being searched. I should use exception handling to catch any errors.
This is what i should do for part 3
reset_content(self)
Resets _content to full text that was originally loaded. Useful after a call to set_content_to_tag().
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started