Question

https://gist.github.com/marcelcaraciolo/1321585

从这段代码中，我试图找到我目前在numpy数组中拥有的数据集的theta系数。我已将训练阵列保存到名为“foo.csv＆＃39;”的csv中。我通过使用pandas库从不同的csv文件转换了我的代码，目前我的训练集是10886行乘12列。我的第一列是我希望预测的Y或值，所有其他列都是我希望获得theta值的变量。

这应该意味着我最终得到了12个1的θ值矩阵，因为有12个因变量。

现在我对Python比较陌生。我目前正在运行iPython并想要输入我的测试数组，因为我将其保存为名为＆＃39; foo.csv＆＃39;的csv文件。我希望能够写出[1] MVLR.calctheta（foo.csv）并使输出为12乘1矩阵。但我没有得到那个。我一直得到：

AttributeError: 'module' object has no attribute 'calctheta'

但是我已经明确地将calctheta保存为一个函数，并且我不理解为什么我不能调用它。我是否错误地宣布了此方法？我假设我可以评估theta值，然后运行for循环，以便使用这些theta值和因变量来评估每个测试行。

我遇到问题的是这个calctheta函数，我从上面的github改变了。我想要它所以我可以用

的csv文件调用calctheta

def calctheta(name):
    data = genfromtxt (name, delimiter=",")
    y = data[:,0]
    X = data[:,1:11]


    #number of training samples
    m = y.size

    y.shape = (m, 1)

    #Scale features and set them to zero mean
    x, mean_r, std_r = feature_normalize(X)

    #Add a column of ones to X (interception data)
    it = ones(shape=(m, 12))
    it[:, 1:12] = x

    #Some gradient descent settings
    iterations = 100
    alpha = 0.01

    #Init Theta and Run Gradient Descent
    theta = zeros(shape=(11, 1))

    theta, J_history = gradient_descent(it, y, theta, alpha, iterations)
    print theta
    plot(arange(iterations), J_history)
    xlabel('Iterations')
    ylabel('Cost Function')
    show()

另一方面，有了这个多变量线性回归问题，有许多因变量。我的一些变量是按0 - >的排名等级确定的。有多少选择。

e.g。如果该列选择的3个选项，则由训练集确定分布，但对于其他列，它是原始值，因此平均值就是那个（例如，那是温度列）

我的问题是，在计算theta值时，变量排名选项不同的事实并没有取消使用多变量线性回归的资格。如果我们假设您尝试衡量的最终内容相对于其输入是正态分布的，我不会认为这样做。

编辑：

我将此添加到我的代码顶部，并使用以下代码缩进代码的其余部分：

class MVLR:

我现在正在

NameError: name 'calctheta' is not defined

编辑2：

我的代码

类MVLR：

from numpy import loadtxt, zeros, ones, array, genfromtxt, linspace, logspace, mean, std, arange
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from pylab import plot, show, xlabel, ylabel

#Evaluate the linear regression

def __init__(self, name):
    self.name = name

def feature_normalize(self.X):
    mean_r = []
    std_r = []
    X_norm = X
    n_c = X.shape[1]
    for i in range(n_c):
        m = mean(X[:, i])
        s = std(X[:, i])
        mean_r.append(m)
        std_r.append(s)
        X_norm[:, i] = (X_norm[:, i] - m) / s
    return X_norm, mean_r, std_r


def compute_cost(self, X, y, theta):
    '''
    Comput cost for linear regression
    '''
    #Number of training samples
    m = y.size

    predictions = X.dot(theta)

    sqErrors = (predictions - y)

    J = (1.0 / (2 * m)) * sqErrors.T.dot(sqErrors)

    return J


def gradient_descent(self, X, y, theta, alpha, num_iters):
    '''
    Performs gradient descent to learn theta
    by taking num_items gradient steps with learning
    rate alpha
    '''
    m = y.size
    J_history = zeros(shape=(num_iters, 1))

    for i in range(num_iters):

        predictions = X.dot(theta)

        theta_size = theta.size

        for it in range(theta_size):

            temp = X[:, it]
            temp.shape = (m, 1)

            errors_x1 = (predictions - y) * temp

            theta[it][0] = theta[it][0] - alpha * (1.0 / m) * errors_x1.sum()

        J_history[i, 0] = compute_cost(X, y, theta)

    return theta, J_history

#Load the dataset



#Plot the data
'''
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
n = 100
for c, m, zl, zh in [('r', 'o', -50, -25)]:
    xs = data[:, 0]
    ys = data[:, 1]
    zs = data[:, 2]
    ax.scatter(xs, ys, zs, c=c, marker=m)
ax.set_xlabel('Size of the House')
ax.set_ylabel('Number of Bedrooms')
ax.set_zlabel('Price of the House')
plt.show()
'''

def calctheta(self, name):
    data = genfromtxt (name, delimiter=",")
    y = data[:,0]
    X = data[:,1:11]


    #number of training samples
    m = y.size

    y.shape = (m, 1)

    #Scale features and set them to zero mean
    x, mean_r, std_r = feature_normalize(X)

    #Add a column of ones to X (interception data)
    it = ones(shape=(m, 12))
    it[:, 1:12] = x

    #Some gradient descent settings
    iterations = 100
    alpha = 0.01

    #Init Theta and Run Gradient Descent
    theta = zeros(shape=(11, 1))

    theta, J_history = gradient_descent(it, y, theta, alpha, iterations)
    print theta
    plot(arange(iterations), J_history)
    xlabel('Iterations')
    ylabel('Cost Function')
    show()

Answer 1

您应该考虑使用Classes设计代码。你可以让你的文件看起来像这样（部分代码取自你的问题）：

from numpy import loadtxt, zeros, ones, array, genfromtxt, linspace, logspace, mean, std, arange
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from pylab import plot, show, xlabel, ylabel

class MyClass(object):
    def __init__(self, name):
        self.name = name

    def calculate_theta(self, name):
       # code calculating theta here
       return theta

    def feature_normalize(self.X):
        mean_r = []
        std_r = []
        X_norm = X
        n_c = X.shape[1]
        for i in range(n_c):
           m = mean(X[:, i])
           s = std(X[:, i])
           mean_r.append(m)
           std_r.append(s)
           X_norm[:, i] = (X_norm[:, i] - m) / s
        return X_norm, mean_r, std_r

if __name__ == '__main__':
    my_class = MyClass(some_input_x)
    my_class.calculate_theta(some_input_y)

Here您可以更好地了解如何创建类。

将线性回归解决方案放在一起

1 个答案: