是的,所以这是我在多类逻辑回归中的代码,但是当我运行它时会出现Value Error,Shapes not aligned或者其他错误。
import numpy
import matplotlib.pyplot as plt
import math as mt
#normalized and feature scaled
只需加载数据集
def load():
data = numpy.loadtxt(open("housing.data.txt", "rb"), dtype="float")
m, n = data.shape
first_col = numpy.ones((m, 1))
#create new array using new parameters
data = numpy.hstack((first_col, data))
#divide each X with the max in the column
#subtract the mean of X to each element
for l in range(1, n):
max = 0.0
sum = 0.0
for j in range(0, m):
if max < data[j, l]:
max = data[j, l]
sum += data[j, l]
avg = sum / m
for j in range(0, m):
data[j, l] -= avg
data[j, l] /= max
return data
def logistic(z):
z = z[0,0]
z = z * -1
return (1.0 / (1.0 + mt.exp(z)))
def hyp(theta, x):
x = numpy.mat(x)
theta = numpy.mat(theta)
return logistic(theta * x.T)
#cost and derivative functions: TO REWRITE
#regularize using "-1000/m (hyp(theta, data[x, :-1]))"
def derv(theta, data, j):
sum = 0.0
last = data.shape[1] - 1
m = data.shape[0]
for x in range(0, m):
sum += (hyp(theta, data[x, :-1]) - numpy.mat(data[x, last])) +
numpy.mat(data[x, j])
return (sum[0,0] / m)
#regularize using " + 1000/2m(hyp(theta, data[x, :-1]))"
def cost(theta, data):
sum = 0.0
last = data.shape[1] - 1
m = data.shape[0]
for x in range(0, m):
y = data[x, last]
sum += y * mt.log(hyp(theta, data[x, :-1])) + (1 - y) * mt.log(1
- hyp(theta, data[x, :-1]))
return -1 * (sum / m)
data = load()
data1 = data[:, [10]]
data2 = data[:, [13]]
d12 = numpy.hstack((data1, data2))
data3 = data[:, [14]]
pdata = numpy.hstack((d12, data3))
print(pdata)
alpha = 0.01
theta = [10,10,10,10]
ntheta = [0,0,0,0]
delta = 50
x = 0
for l in range(0, 1000):
old_cost = cost(theta, pdata)
for y in range(0, data.shape[1] - 1):
ntheta[y] = theta[y] - alpha * derv(theta, data1, y)
for k in range(0, data.shape[1] - 1):
theta[k] = ntheta[k]
new_cost = cost(theta, data1)
delta = new_cost - old_cost
print("Cost: " + str(new_cost))
print("Delta: " + str(delta))
for r in range(0, data.shape[1]):
if hyp(theta, data1[r, :-1]) >= 0.5:
print("Predicted: 1 Actual: " + str(data1[r, data1.shape[1] - 1]))
else:
print("Predicted: 0 Actual: " + str(data1[r, data1.shape[1] - 1]))
plt.scatter(data1[:, 1], data1[:, 2])
x1 = (-1 * theta[0]) / theta[1]
x2 = (-1 * theta[0]) / theta[1]
x = range(-2, 2)
y = [((-1 * theta[0]) - (theta[1] * z) ) for z in x]
plt.plot(x, y)
plt.show()
我猜它不能像这样或者idk