使用Python进行多项式回归只能在一定程度上进行

时间:2018-07-14 04:15:54

标签: python python-3.x regression linear-regression

import numpy as np
import matplotlib.pyplot as plt

#Read Sum.fcc and store data in array A and E
content = [x.strip() for x in content]
A = []
E = []
for i in content:
    A.append(float(i[0:3]))
    i = i[4:]
    sign = ""
    if i[0] == "-":
        sign = "-"
    index = int(i[-2:])
    num = i[i.index(".") + 1:i.index("E")]
    E.append(float(sign + num[0:index] + "." + num[index:]))
print("\n".join([(str(A[n]) + " " + str(E[n])) for n in range(len(A))]))
minA = min(A)
maxA = max(A)

def regress(n, x, y):
    global degree, coef, f
    degree = str(n)
    X = []
    for i in range(len(x)):
        X.append([x[i]**(index) for index in range(n+1)][::-1])
    a = np.array(X)
    b = np.array(y)
    #Estimated coefficient using ordinary least square estimation
    coef = np.matmul(np.matmul(np.linalg.inv(np.matmul(np.matrix.transpose(a), a)),np.matrix.transpose(a)), b)
    #2D Array with indexes and coefficients for the function
    f = [(x, coef[n-x]) for x in range(n+1)[::-1]]

def polynomial(x, function_array):
    y = 0
    for i in function_array:
        y += (x**i[0]) * i[1]
    return y

#Defining the function to plot a graph of energy vs lattice constant
def plot(title = ""):
    if title == "":
        title = "Polynomial Degree: " + degree
    x = np.arange(minA, maxA + 0.01, 0.01)
    y = polynomial(x, f)
    plt.plot(x, y)
    plt.scatter(A, E)
    plt.xlabel("Lattice Constant/Å")
    plt.ylabel("Energy/eV")
    plt.title(title)
    plt.show()

regress(2, A, E)
plot()
regress(3, A, E)
plot()
regress(4, A, E)
plot()
regress(5, A, E)
plot()
regress(6, A, E)

回归函数是我从此Wikipedia page获得的函数。回归一直很精确,直到4级,然后回归很多,我不知道为什么。

我还尝试了另一种基于quadratic regression equations确定系数的方法,并在多项式次数增加时扩展矩阵。

def regress(n, x, y):
    global degree, coef, f
    degree = str(n)
    sigmax = [sum(np.power(x, n)) for n in range(2 * n, -1, -1)]
    X = []
    for i in range(n + 1):
        X.append(sigmax[:n+1])
        sigmax.pop(0)
    Y = []
    for i in range(n, -1, -1):
        Y.append(sum([(x[j]**i) * y[j] for j in range(int(X[-1][-1]))]))
    coef = np.linalg.solve(X, Y)
    #2D Array with indexes and coefficients for the function
    f = [(x, coef[n-x]) for x in range(n+1)[::-1]]

#Calculates the R squared
def error():
    global R2
    SSres = 0
    for i in range(len(A)):
        SSres += (polynomial(A[i], f) - E[i])**2
    ybar = sum(E)/len(E)
    SStot = 0
    for i in range(len(E)):
        SStot += (ybar - E[i])**2
    R2 = 1.0 - (SSres/SStot)
    return R2

我还定义了一个计算R平方的函数,对于我的特定Sum.fcc文件,该函数在5度处开始减小。可以找到here,包括Sum.fcc的所有文件。系数也开始偏离excel在5级时给出的系数。这是否仅是由于python浮点数中存储的位数限制?还是我错过了代码中的某些内容?

0 个答案:

没有答案