Answered step by step
Verified Expert Solution
Question
1 Approved Answer
Start with iris7_explore_7models.py , and perform the following : 1. For the decision trees classifier: a. Visualize the decision tree with maximum depth of 2,
Start with iris7_explore_7models.py, and perform the following:
1. For the decision trees classifier:
a. Visualize the decision tree with maximum depth of 2, 4, 8 and unrestricted. For each tree depth generate DOT/gv and PNG files. In addition, for each tree depth show the training time and predication time.
b. Find the optimal maximum depth hyper parameter that produces the highest precision.
c. Show the code running under Python 3 along with correct output.
d. Show program output with no command line options and the output for each of the following command line options: --help, --version, --sample, --eval and --summaryonly
e. Compare the results between the original code and the modified code.
2. For the SVM classifier:
a. Find the optimal hyper parameters that produce the highest precision.
b. Show the training time and predication time.
c. Show the code running under Python 3 along with correct output.
d. Show program output with no command line options and the output for each of the following command line options: --help, --version, --sample, --eval and --summaryonly
e. Compare the results between the original code and the modified code.
3. Refactor and improve the following functions:
a. def _main()
b. def _train(alg, algName, X_train,
Y_train, X_test, Y_test)
c. def _predict(alg, algName, X_train, Y_train, X_test, Y_test)
d. def _predictionAccuracySummary(models, X_train, Y_train, X_test, Y_test)
e. Make the --help,--version and --sample command line options only display help message, version information and data samplesrespectively. These options should no longerperform training or prediction on the dataset.
**iris7_explore_7models.py code below**
# iris7_explore_7models.py
# Load system libraries
import sys
import datetime
import random
# Load ML libraries
import pandas
from pandas.plotting import scatter_matrix
from matplotlib import pyplot
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC as SVMClassifier
from scipy.spatial import distance
def _main():
if (_showingHelp()):
_showHelp()
exit(0)
if (_showingVersions()):
_showVersions()
# load dataset
if (not _showingSummaryOnly()): print(datetime.datetime.now(), "explore_iris_7: Loading data");
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
# url = "file:////iris.csv"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pandas.read_csv(url, names=names)
if (_showingSamples()):
_sampleData(dataset)
_visualizeData(dataset, pyplot, scatter_matrix)
# split data into train/test datasets
if (not _showingSummaryOnly()): print(datetime.datetime.now(), "explore_iris_7: Splitting data into training and test sets");
array = dataset.values
X = array[:,0:4]
Y = array[:,4]
test_size = 0.20
seed = 7
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)
# test options and perform evaluation metric
seed = 7
scoring = 'accuracy'
models = []
models.append(('Logistic Regression', LogisticRegression()))
models.append(('KNN Neighbors', KNeighborsClassifier()))
models.append(('Support Vector', SVMClassifier()))
models.append(('DecisionTree', DecisionTreeClassifier()))
models.append(('Random Forest', RandomForestClassifier()))
if (not _showingSummaryOnly()):
# evaluate models
results = []
names = []
msg = ""
for name, model in models:
print(datetime.datetime.now(), "explore_iris_7: Evaluate model %s" % name);
kfold = model_selection.KFold(n_splits=10, random_state=seed)
cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
results.append(cv_results)
names.append(name)
msg += "%20s: \t\t%f\t(%f)" % (name, cv_results.mean(), cv_results.std()) + " "
print(msg)
print();
if ((_showingEval())):
# compare algorithms
fig = pyplot.figure()
fig.suptitle('Compare algorithms')
ax = fig.add_subplot(111)
pyplot.boxplot(results)
ax.set_xticklabels(names)
pyplot.show()
# make predictions on test dataset
lor = LogisticRegression()
_train(lor, "Logistic Regression", X_train, Y_train, X_test, Y_test)
_predict(lor, "Logistic Regression", X_train, Y_train, X_test, Y_test)
knn = KNeighborsClassifier()
_train(knn, "KNeighbors Classifier", X_train, Y_train, X_test, Y_test)
_predict(knn, "KNeighbors Classifier", X_train, Y_train, X_test, Y_test)
svm = SVMClassifier()
_train(svm, "Support Vector", X_train, Y_train, X_test, Y_test)
_predict(svm, "Support Vector", X_train, Y_train, X_test, Y_test)
dtc = DecisionTreeClassifier()
_train(dtc, "Decision Tree Classifier", X_train, Y_train, X_test, Y_test)
_predict(dtc, "Decision Tree Classifier", X_train, Y_train, X_test, Y_test)
rfc = RandomForestClassifier()
_train(rfc, "Random Forest", X_train, Y_train, X_test, Y_test)
_predict(rfc, "Random Forest", X_train, Y_train, X_test, Y_test)
myrnd = myRNDClassifier()
_train(myrnd, "My Random", X_train, Y_train, X_test, Y_test)
_predict(myrnd, "My Random", X_train, Y_train, X_test, Y_test)
myknn = myKNNClassifier()
_train(myknn, "My KNN", X_train, Y_train, X_test, Y_test)
_predict(myknn, "My KNN", X_train, Y_train, X_test, Y_test)
models.clear()
models.append(('Logistic Regression', lor))
models.append(('KNN Neighbors', knn))
models.append(('Support Vector', svm))
models.append(('DecisionTree', dtc))
models.append(('Random Forest', rfc))
models.append(('My Random', myrnd))
models.append(('My KNN', myknn))
_predictionAccuracySummary(models, X_train, Y_train, X_test, Y_test)
#####################################################
# My random classifier
class myRNDClassifier:
def fit(self, X_train, y_train):
self.X_train = X_train
self.y_train = y_train
def predict(self, X_test):
predictions = []
for row in X_test:
label = random.choice(self.y_train)
predictions.append(label)
return predictions
#####################################################
# My KNN K =1 classifier
class myKNNClassifier:
def fit(self, X_train, y_train):
self.X_train = X_train
self.y_train = y_train
def predict(self, X_test):
predictions = []
for row in X_test:
label = self.closest(row)
predictions.append(label)
return predictions
def closest(self, row):
best_distance = euc(row, self.X_train[0])
best_index = 0
for i in range(1, len(self.X_train)):
dist = euc(row, self.X_train[i])
if (dist < best_distance):
best_distance = dist
best_index = i
return(self.y_train[best_index])
def euc(a,b):
return(distance.euclidean(a,b))
#####################################################
# training and prediction functions
def _train(alg, algName, X_train, Y_train, X_test, Y_test):
if (not _showingSummaryOnly()): print(datetime.datetime.now(), "Begin training: ", algName)
alg.fit(X_train, Y_train)
if (not _showingSummaryOnly()): print(datetime.datetime.now(), "End training: ", algName)
def _predict(alg, algName, X_train, Y_train, X_test, Y_test):
if (not _showingSummaryOnly()): print(datetime.datetime.now(), "Begin prediction: ", algName)
predictions = alg.predict(X_test)
if (not _showingSummaryOnly()): print(datetime.datetime.now(), "End prediction: ", algName)
if (not _showingSummaryOnly()): print("%s: accuracy_score=%0.2f" % (algName, accuracy_score(Y_test, predictions)))
if (not _showingSummaryOnly()): print(confusion_matrix(Y_test, predictions))
if (not _showingSummaryOnly()): print(classification_report(Y_test, predictions))
def _predictionAccuracySummary(models, X_train, Y_train, X_test, Y_test):
print("Algorithm\t\tAccuracy Score")
for name, model in models:
predictions = model.predict(X_test)
print("%20s\t\t%0.2f" % (name, accuracy_score(Y_test, predictions)))
#####################################################
# data sampling and visualization functions
def _sampleData(dataset):
# show shape, first 10 records, description of frame and its distribution
print(dataset.shape)
print(dataset.head(20))
print(dataset.describe())
# show class distribution and distribution by sepal-length
print(dataset.groupby('class').size())
print(dataset.groupby('sepal-length').size())
def _visualizeData(dataset, pyplot, scatter_matrix):
# visualize data and draw box and whisker plots
dataset.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False)
pyplot.show()
# show histograms and scatter plot matrix
dataset.hist()
pyplot.show()
scatter_matrix(dataset)
pyplot.show()
#####################################################
# helper functions
def _parseArgumets(arg):
for i in range(1, len(sys.argv)):
if (sys.argv[i] == arg):
return 1
def _showingHelp():
return(_parseArgumets("--help"))
def _showingSummaryOnly():
return(_parseArgumets("--summaryonly"))
def _showingVersions():
return(not _parseArgumets("--summaryonly") and _parseArgumets("--version"))
def _showingSamples():
return(not _parseArgumets("--summaryonly") and _parseArgumets("--sample"))
def _showingEval():
return(not _parseArgumets("--summaryonly") and _parseArgumets("--eval"))
def _showHelp():
print("iris7_explore_7models: syntax iris7_explore_7models --version --sample --eval --summaryonly")
print("--help: show this help message");
print("--version: show version info for Python runtime and ML libraries");
print("--sample: show sample data");
print("--eval: show evaluation of algorithms")
print("--summaryonly: show only a summary of algorithms and their accuracy scores")
def _showVersions():
# check versions of Python runtime and ML libraries
import sys
print('Python: {}'.format(sys.version))
import scipy
print('scipy: {}'.format(scipy.__version__))
import numpy
print('numpy: {}'.format(numpy.__version__))
import matplotlib
print('matplotlib: {}'.format(matplotlib.__version__))
import pandas
print('pandas: {}'.format(pandas.__version__))
import sklearn
print('sklearn: {}'.format(sklearn.__version__))
_main()
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started