Question

是的，所以这是我在多类逻辑回归中的代码，但是当我运行它时会出现Value Error，Shapes not aligned或者其他错误。

import numpy
import matplotlib.pyplot as plt
import math as mt

#normalized and feature scaled

只需加载数据集

def load():
    data = numpy.loadtxt(open("housing.data.txt", "rb"), dtype="float")
    m, n = data.shape
    first_col = numpy.ones((m, 1))
    #create new array using new parameters
    data = numpy.hstack((first_col, data))
    #divide each X with the max in the column
    #subtract the mean of X to each element

    for l in range(1, n):
        max = 0.0
        sum = 0.0

        for j in range(0, m):
            if max < data[j, l]:
                max = data[j, l]

            sum += data[j, l]
        avg = sum / m

        for j in range(0, m):
            data[j, l] -= avg
            data[j, l] /= max

    return data
def logistic(z):
    z = z[0,0]
    z = z * -1
    return (1.0 / (1.0 + mt.exp(z)))

def hyp(theta, x):
    x = numpy.mat(x)
    theta = numpy.mat(theta)
    return logistic(theta * x.T)

#cost and derivative functions: TO REWRITE
#regularize using "-1000/m (hyp(theta, data[x, :-1]))"
def derv(theta, data, j):
    sum = 0.0
    last = data.shape[1] - 1
    m = data.shape[0]

    for x in range(0, m):
        sum += (hyp(theta, data[x, :-1]) - numpy.mat(data[x, last])) + 
numpy.mat(data[x, j])

    return (sum[0,0] / m)

#regularize using " + 1000/2m(hyp(theta, data[x, :-1]))"
def cost(theta, data):
    sum = 0.0
    last = data.shape[1] - 1
    m = data.shape[0]

    for x in range(0, m):
        y = data[x, last]
        sum += y * mt.log(hyp(theta, data[x, :-1])) + (1 - y) * mt.log(1 
- hyp(theta, data[x, :-1]))


    return -1 * (sum / m)



data = load()
data1 = data[:, [10]]
data2 = data[:, [13]]
d12 = numpy.hstack((data1, data2))
data3 = data[:, [14]]

pdata = numpy.hstack((d12, data3))
print(pdata)
alpha = 0.01
theta = [10,10,10,10]
ntheta = [0,0,0,0]
delta = 50
x = 0

for l in range(0, 1000):
        old_cost = cost(theta, pdata)

        for y in range(0, data.shape[1] - 1):
                ntheta[y] = theta[y] - alpha * derv(theta, data1, y)

        for k in range(0, data.shape[1] - 1):
                theta[k] = ntheta[k]

        new_cost = cost(theta, data1)

        delta = new_cost - old_cost

        print("Cost: " + str(new_cost))
        print("Delta: " + str(delta))

for r in range(0, data.shape[1]):
        if hyp(theta, data1[r, :-1]) >= 0.5:
                print("Predicted: 1 Actual: " + str(data1[r, data1.shape[1] - 1]))
        else:
                print("Predicted: 0 Actual: " + str(data1[r, data1.shape[1] - 1]))

plt.scatter(data1[:, 1], data1[:, 2])

x1 = (-1 * theta[0]) / theta[1]
x2 = (-1 * theta[0]) / theta[1]

x = range(-2, 2)
y = [((-1 * theta[0]) - (theta[1] * z) ) for z in x]

plt.plot(x, y)
plt.show()

我猜它不能像这样或者idk

值错误，形状不对齐Python

0 个答案: