Question

1 Approved Answer

Posted on Oct 15, 2024

How to implement learn and classify function in decision class tree? import csv import numpy as np # http://www.numpy.org import ast from datetime import datetime

How to implement learn and classify function in decision class tree?

import csv import numpy as np # http://www.numpy.org import ast from datetime import datetime from math import log, floor, ceil class Utility(object): # This method computes entropy for information gain def entropy(self, class_y): entropy = 0 ### Implement your code here ############################################# ## probability of zero and ones p_0 = len([y for y in class_y if y==0])/len(class_y) p_1 = len([y for y in class_y if y==1])/len(class_y) ## check if probablity is 0 to avoid error during log if p_0==0 or p_1==0: entropy =0 return entropy ## find entropy from probabiliy entropy =-p_0*np.log2(p_0)-p_1*np.log2(p_1) ############################################# return entropy def partition_classes(self, X, y, split_attribute, split_val): X_left = [] X_right = [] y_left = [] y_right = [] ### Implement your code here ############################################# ## split the array using np.where X=np.array(X) X_left = list(X[np.where(X[:,split_attribute]<=split_val)]) y_left =list(y[np.where(X[:,split_attribute]<=split_val)]) X_right = list(X[np.where(X[:,split_attribute]>split_val)]) y_right =list(y[np.where(X[:,split_attribute]>split_val)]) # pass ############################################# return (X_left, X_right, y_left, y_right) def information_gain(self, previous_y, current_y): info_gain = 0 ### Implement your code here ############################################# # get previous entropy entropy_previous = self.entropy(previous_y) ## list of entropy values for splits entropy_after =[self.entropy(y) if len(y)>0 else 0 for y in current_y ] ## get probs for splits probs = [len(y)/len(previous_y) for y in current_y] ## get the final entropy after split entropy_after =sum([entropy_after[i]*probs[i] for i in range(len(probs))]) info_gain = entropy_previous-entropy_after # pass ############################################# return info_gain def best_split(self, X, y): split_attribute = 0 split_value = 0 X_left, X_right, y_left, y_right = [], [], [], [] num_attributes = len(X[0]) chosen_atributes = np.random.choice(num_attributes,num_attributes//2,replace=False) max =-float("Inf") for attribute in chosen_atributes: for row_idx in range(len(X)): X_left_check, X_right_check, y_left_check, y_right_check = self.partition_classes(X,y,attribute,X[row_idx][attribute]) current_y = [y_left_check,y_right_check] info_gain = self.information_gain(y,current_y) if maxreturn X_left, X_right, y_left, y_right,split_attribute,split_value ############################################# class DecisionTree(object):   def __init__(self, max_depth):     # Initializing the tree as an empty dictionary or list, as preferred     self.tree = {}     self.max_depth = max_depth         def learn(self, X, y, par_node = {}, depth=0):     # TODO: Train the decision tree (self.tree) using the the sample X and labels y     # You will have to make use of the functions in Utility class to train the tree     # Use the function best_split in Utility class to get the best split and     # data corresponding to left and right child nodes         # One possible way of implementing the tree:     #  Each node in self.tree could be in the form of a dictionary:     #    https://docs.python.org/2/library/stdtypes.html#mapping-types-dict     #  For example, a non-leaf node with two children can have a 'left' key and a     #  'right' key. You can add more keys which might help in classification     #  (eg. split attribute and split value)     ### Implement your code here     #############################################         pass     #############################################   def classify(self, record):     # TODO: classify the record using self.tree and return the predicted label     ### Implement your code here     #############################################         pass     #############################################

Here is the sample data.

6,148,72,35,0,33.6,0.627,50,1 1,85,66,29,0,26.6,0.351,31,0 8,183,64,0,0,23.3,0.672,32,1 1,89,66,23,94,28.1,0.167,21,0 0,137,40,35,168,43.1,2.288,33,1 5,116,74,0,0,25.6,0.201,30,0 3,78,50,32,88,31.0,0.248,26,1 10,115,0,0,0,35.3,0.134,29,0 2,197,70,45,543,30.5,0.158,53,1 8,125,96,0,0,0.0,0.232,54,1 4,110,92,0,0,37.6,0.191,30,0