Question

我在这里有很多解决方案，但我已经完全解决了这些问题，但仍无法解决我的问题。我试图打印每个图形的rSquared，但我得到错误＆＃39;局部变量＆＃34; m＆＃34;在分配之前引用。请帮忙！我知道这里的间距是关闭的。我在跑步者中拥有所有这一切。谢谢！

def readData(fileName):


    hsGPA = []   #High School GPA
    mathSAT = []  #Math SAT scores
    crSAT = []  #Verbal SAT scores
    collegeGPA = []  #College GPA
    compGPA=[]  #ComputerScience GPA
    FullList=[] 
    inputFile = open(fileName, 'r', encoding = 'utf-8')

    for line in inputFile:
        FullList=line.split(',')
        hsGPA.append(float(FullList[0]))
        mathSAT.append(int(FullList[1]))
        crSAT.append(int(FullList[2]))
        collegeGPA.append(float(FullList[3]))
        compGPA.append(float(FullList[4]))
    return hsGPA, mathSAT, crSAT, collegeGPA, compGPA



def plotData(hsGPA, mathSAT, crSAT, collegeGPA,compGPA):

    GPA1 = []   #High School GPA
    Score1 = []  #Math SAT scores
    Score2= []  #Verbal SAT scores
    GPA2 = []  #College GPA
    GPA3=[]    #ComputerScience GPA 

    hsGPA, mathGPA, crSAT, collegeGPA,compGPA = readData('satFINAL.txt')
    pyplot.figure(1)

    pyplot.subplot(5,1,1)
    for line in range(len(hsGPA)):
        GPA1.append(line)
    pyplot.plot(GPA1,hsGPA)

    pyplot.subplot(5,1,2)
    for line in range(len(mathSAT)):
        Score1.append(line)
    pyplot.plot(Score1,mathSAT)

    pyplot.subplot(5,1,3)
    for line in range(len(crSAT)):
        Score2.append(line)       
    pyplot.plot(Score2,crSAT)

    pyplot.subplot(5,1,4)
    for line in range(len(collegeGPA)):
        GPA2.append(line)
    pyplot.plot(GPA2,collegeGPA)

    pyplot.subplot(5,1,5)
    for line in range(len(compGPA)):
        GPA3.append(line)
    pyplot.plot(GPA3,compGPA)

    pyplot.show()

def LinearRegression(xList, yList):
    '''
This function finds the constants in the y = mx+b, or linear regression
forumula

xList - a list of the x values
yList - a list of the y values
m - the slope f the line
b - where the line intercepts the y axis
'''

    n = len(xList)
    sumX = 0
    sumXX = 0
    sumXY = 0
    sumY = 0

    for index in range(n):
        sumX += xList[index]
        sumXY += xList[index] * yList[index]
        sumXX += xList[index]**2
        sumY += yList[index]
        #the components needed to find m and b

    m = (n*(sumXY - (sumX*sumY)))/(n*(sumXX - (sumX**2)))
    b = (sumY - (m*sumX))/n
    #actually implements formula

    return m, b


def plotRegression(x,y, xLabel, yLabel):


    pyplot.scatter(x,y)
    m,b = LinearRegression(x,y)
    minX = min(x)
    maxX = max(x)
    pyplot.plot([minX, maxX], [m * minX + b, m * maxX + b], color ='red')
    pyplot.xlabel(xLabel)
    pyplot.ylabel(yLabel)
    pyplot.show()




def rSquared(x,y):

    n = len(x)
    R=0
    sumS=0
    sumT=0
    sumY=0

    for index in range(n):
        a=(y[index]-((m*x[index])+b))**2
        sumS = sumS+a


    for index in range(len(y)):
        sumY = sumY= y[index]
        MeanY= sumY/(len(y))
        e=(y[index]-MeanY)**2
        sumT = sumT+e




    m,b= LinearRegression(x, y)

    RG=1-(sumS/sumT)



def main():
    data = readData('satFINAL.txt')
    print(data)
    plotData(*data)
    hsGPA, mathSAT, crSAT, collegeGPA,compGPA = data
    # added ScoreT calculation here
    ScoreT = [sum(x) for x in zip(mathSAT, crSAT)]
    plotRegression(hsGPA,collegeGPA, 'highGPA', 'collegeGPA')
    plotRegression(mathSAT,collegeGPA, 'mathSAT' , 'collegeGPA')
    plotRegression(crSAT,collegeGPA, 'crSAT' , 'collegeGPA')
    plotRegression(ScoreT,collegeGPA, 'Math and CR SAT' , 'collegeGPA')
    plotRegression(mathSAT,crSAT, 'mathSAT', 'CR SAT')
    plotRegression(mathSAT,compGPA, 'mathSAT', 'CompGPA')
    plotRegression(hsGPA,compGPA, 'HsGPA', 'CompGPA')
    plotRegression(ScoreT,compGPA, 'SATscore ', 'CompGPA')
    print(rSquared(hsGPA,collegeGPA))





main()

Answer 1

很难说 - 你的缩进搞砸了，而且你的代码很多，你实际上没有给出错误跟踪（这实际上会识别出错误所在的行！） - 但看起来，在rSquared的定义中，您在为a=(y[index]-((m*x[index])+b))**2分配值之前调用了m。

编辑：我经历了很多重复的代码并重构为循环;现在希望它更具可读性。我还针对linear_regression交叉检查了scipy.stats.linregress函数并获得了相同的结果;我未已验证r_squared，因此您应该检查一下。

import matplotlib.pyplot as plt

# column indices
HS, MATH, VERBAL, COLLEGE, COMPSCI = range(5)
# column labels
LABELS = ["High school GPA", "Math SAT", "Verbal SAT", "College GPA", "CompSci GPA"]
# column data types
DTYPES = [ float,             int,        int,          float,         float       ]

def read_columns(fname, encoding="utf-8", separator=",", dtypes=None):
    """
    Return columns of data from a file

    If dtypes is specified, convert each column to the given data type
    """
    # read rows from data file
    with open(fname, encoding=encoding) as inf:
        rows = [line.split(separator) for line in inf]
    # transpose to columns
    cols = zip(*rows)
    # apply data types
    if dtypes is not None:
        cols = [[dtype(cell) for cell in col] for dtype,col in zip(dtypes,cols)]
    return cols

def linear_regression(xs, ys):
    """
    Return the linear regression constants m,b
      in the least-squares best fit to y = m*x+b
    """
    # if you have SciPy you can use scipy.stats.linregress instead
    n = len(xs)
    xsum  = sum(xs)
    ysum  = sum(ys)
    xxsum = sum(x*x for x in xs)
    xysum = sum(x*y for x,y in zip(xs, ys))
    m = (n * xysum - xsum * ysum) / (n * xxsum - xsum * xsum)
    b = (ysum - m * xsum) / n
    return m, b    

def r_squared(xs, ys):
    m, b = linear_regression(xs, ys)
    ysum, n = sum(ys), len(ys)
    ymean = ysum / n
    ssum = sum((y - (m * x + b))**2 for x,y in zip(xs, ys))
    tsum = sum((y - ymean)**2 for y in ys)
    return 1 - ssum / tsum

def plot_regression(xs, xlabel, ys, ylabel):
    m, b = linear_regression(xs, ys)
    min_, max_ = min(xs), max(xs)
    plt.scatter(xs, ys)
    plt.plot([min_, max_], [m * min_ + b, m * max_ + b], "r")
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()

def main():
    # read data
    scores = read_columns("satFINAL.txt", dtypes=DTYPES)
    # add composite math-and-verbal score
    MATH_VERBAL = 5
    LABELS.append("Math+Verbal SAT")
    DTYPES.append(int)
    scores.append([math+verbal for math,verbal in zip(scores[MATH], scores[VERBAL])])

    # do raw score plots
    plt.figure(1)
    num_figs = len(LABELS)
    # draw subplots
    for fig, column, nums in zip(range(num_figs), LABELS, scores):
        plt.subplot(num_figs, 1, fig+1)
        plt.plot(range(len(nums)), nums)
        plt.xlabel(LABELS[fig])
    # display results
    plt.show()

    # do regression plots
    regressions = [
        (HS,          COLLEGE),
        (MATH,        COLLEGE),
        (VERBAL,      COLLEGE),
        (MATH_VERBAL, COLLEGE),
        (MATH,        VERBAL),
        (MATH,        COMPSCI),
        (HS,          COMPSCI),
        (MATH_VERBAL, COMPSCI)
    ]
    for x,y in regressions:
        print("r**2 for {} and {}: {}".format(LABELS[x], LABELS[y], r_squared(scores[x], scores[y])))
        plot_regression(scores[x], LABELS[x], scores[y], LABELS[y])

if __name__=="__main__":
    main()

继续得到这个错误，在赋值之前没有引用局部变量（python）

1 个答案: