Answered step by step
Verified Expert Solution
Link Copied!

Question

1 Approved Answer

from math import sqrt from matplotlib import pyplot as plot from random import seed from random import randrange from csv import reader # Step 2

from math import sqrt
from matplotlib import pyplot as plot
from random import seed
from random import randrange
from csv import reader
# Step 2: Load the csv file
def load_csv(filename, skip=False):
dataset = list()
with open(filename,'r') as file:
csv_reader = reader(file)
if skip:
next(csv_reader, None)
for row in csv_reader:
dataset.append(row)
return dataset
# Step 3: Convert any string column to a float column
def string_column_to_float(dataset, column):
for row in dataset:
row[column]= float(row[column].strip())
# Step 4: Calculate the mean value of a list of numbers
def mean(values):
return sum(values)/ float(len(values))
# Step 5: Calculate a regularisation value for the parameter
def regularisation(parameter, lambda_value=0.01):
return lambda_value + parameter
def variance(values, mean):
return sum([(x - mean)**2 for x in values])
def covariance(x, x_mean, y, y_mean):
covar =0.0
for i in range(len(x)):
covar +=(x[i]- x_mean)*(y[i]- y_mean)
return covar
# Step 6: Calculate least squares between x and y
def leastSquares(dataset):
x =[row[0] for row in dataset]
y =[row[1] for row in dataset]
x_mean = mean(x)
y_mean = mean(y)
b1= covariance(x, x_mean, y, y_mean)/ variance(x, x_mean)
b0= y_mean - b1* x_mean
return [b0, b1]
# Step 7: Calculate root mean squared error
def root_mean_square_error(actual, predicted):
sum_error =0.0
for i in range(len(actual)):
prediction_error = predicted[i]- actual[i]
sum_error +=(prediction_error **2)
mean_error = sum_error / float(len(actual))
return sqrt(mean_error)
# Step 8: Make predictions
def simple_linear_regression(train, test):
predictions = list()
b0, b1= leastSquares(train)
for row in test:
yhat = b0+ b1* row[0]
predictions.append(yhat)
return predictions
# Step 9: Split the data into training and test sets
def train_test_split(dataset, split):
train = list()
test = list(dataset)
train_size = split * len(dataset)
while len(train)< train_size:
index = randrange(len(test))
train.append(test.pop(index))
return train, test
# Seed the random value
seed(1)
# Load and prepare data
filename = 'fertility_rate-worker_percent.csv'
dataset = load_csv(filename, skip=True)
for i in range(len(dataset[0])):
string_column_to_float(dataset, i)
# Evaluate algorithm
split =0.6
rmse = evaluate_simple_linear_regression(dataset, split)
print('Root Mean Square Error: %.3f'% rmse)
# Visualise the dataset
def visualise_dataset(dataset):
test_set = list()
for row in dataset:
row_copy = list(row)
row_copy[-1]= None
test_set.append(row_copy)
sizes, prices =[],[]
for i in range(len(dataset)):
sizes.append(dataset[i][0])
prices.append(dataset[i][1])
plot.figure()
plot.plot(sizes, prices, 'x')
plot.xlabel('Fertility rate')
plot.ylabel('Worker percent')
plot.grid()
plot.tight_layout()
plot.show()
visualise_dataset(dataset)

Step by Step Solution

There are 3 Steps involved in it

Step: 1

blur-text-image

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

blur-text-image_2

Step: 3

blur-text-image_3

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

Secrets Of Analytical Leaders Insights From Information Insiders

Authors: Wayne Eckerson

1st Edition

1935504347, 9781935504344

More Books

Students also viewed these Databases questions

Question

Understand the process of arbitration

Answered: 1 week ago

Question

Know the different variations of arbitration that are in use

Answered: 1 week ago