Answered step by step
Verified Expert Solution
Link Copied!

Question

1 Approved Answer

Is this implementation correct? I mean the log loss, gradient, dfp method import tensorflow as tf import numpy as np from sklearn.preprocessing import normalize import

Is this implementation correct? I mean the log loss, gradient, dfp method
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import normalize
import matplotlib.pyplot as plt
# parameter zero_one controls, whether the labels are 0,1 or -1,1
def read_parse_data(zero_one=True):
labels =[]
data =[]
with open("ijcnn1.txt") as f:
for line in f:
xs = line.split("")
h =[0.0]*22
for s in xs[1:]:
s = s.strip()
if len(s)==0:
continue
k, v = s.split(":")
h[int(k)-1]= float(v)
data.append(h)
if xs[0]=="1":
labels.append(1)
elif zero_one:
labels.append(0)
else:
labels.append(-1)
return np.array(data), np.array(labels)
def prediction(w, data):
return 1/(1+np.exp(-np.dot(data, w)))
def logloss(pred, labels):
pred = np.clip(pred,1e-15,1-1e-15)
Li =-(labels * np.log(pred)+(1- labels)* np.log(1- pred))
return np.mean(Li)
def grad(w, data, labels):
pred = prediction(w, data)
return np.dot(data.T,(pred - labels))/ len(labels)
def wolfe(w, p, data, labels, grad_f=grad, loss_f=logloss, pred_f=prediction, alpha=1.0, c1=0.001, c2=0.9):
grad_w = grad_f(w, data, labels)
for i in range(1000):
if loss_f(pred_f(w + alpha * p, data), labels)> loss_f(pred_f(w, data), labels)+ c1* alpha * np.dot(grad_w, p):
alpha *=0.5
elif np.dot(grad_f(w + alpha * p, data, labels), p)< c2* np.dot(grad_w, p):
alpha *=2.1
elif i >999:
raise Exception("wolfe doesn't finish")
else:
break
return alpha
def dfp(w_0, B_0, data, labels, pred_f=prediction, grad_f=grad, loss_f=logloss, max_iter=100, tol=0.0001):
w = w_0
B_inv = np.linalg.inv(B_0)
grad_w = grad_f(w, data, labels)
losses =[]
for i in range(max_iter):
p =-np.dot(B_inv, grad_w)
alpha = wolfe(w, p, data, labels, grad_f=grad_f, loss_f=loss_f)
s = alpha * p
w_new = w + s
grad_new = grad_f(w_new, data, labels)
if np.linalg.norm(grad_new - grad_w)< tol:
break
y = grad_new - grad_w
grad_w = grad_new
sy = np.dot(s, y)
Bs = np.dot(B_inv, s)
B_inv += np.outer(s, s)/ np.dot(s, y)- np.outer(Bs, Bs)/ np.dot(s, Bs)
predictions = pred_f(w, data)
loss = loss_f(predictions, labels)
print(f"Iter: {i}, Loss: {loss}")
losses.append(loss)
w = w_new
return w, losses
n=123
B=np.eye(n) #use either identity or compute Hessian in first step for BFGS
w=np.zeros(n) #initialize weight vector
features, labels = read_parse_data()
n = features.shape[1]
w_0= np.ones(n)
B_0= np.eye(n)
w_opt, losses = dfp(w_0, B_0, features, labels, grad_f=grad, loss_f=logloss)
Iter: 0, Loss: 1.4180136783927826
Iter: 1, Loss: 1.0493899267197955
Iter: 2, Loss: 0.3792809465978115
Iter: 3, Loss: 0.3714018048303409
Iter: 4, Loss: 0.36672889216134275
Iter: 5, Loss: 0.365689704361027
Iter: 6, Loss: 0.36430271286220156
Iter: 7, Loss: 0.363284481254417
Iter: 8, Loss: 0.3619351491046495
Iter: 9, Loss: 0.36079584853744845
Iter: 10, Loss: 0.3593349751368699
Iter: 11, Loss: 0.35796607021715604
Iter: 12, Loss: 0.3562683386529582
Iter: 13, Loss: 0.3545499965600709
Iter: 14, Loss: 0.352497178681491
Iter: 15, Loss: 0.3503199878187087
Iter: 16, Loss: 0.3478437409837462
Iter: 17, Loss: 0.34525760570232067
Iter: 18, Loss: 0.3426658807753199
Iter: 19, Loss: 0.3404046801388335
Iter: 20, Loss: 0.3385228765672317
Iter: 21, Loss: 0.33683376894469985
Iter: 22, Loss: 0.3352487122841688
Iter: 23, Loss: 0.3337309661524962
Iter: 24, Loss: 0.3322

Step by Step Solution

There are 3 Steps involved in it

Step: 1

blur-text-image

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

blur-text-image

Step: 3

blur-text-image

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

More Books

Students also viewed these Databases questions