Python Scatter图

时间:2015-04-27 18:43:37

标签: python statistics

从“如何像计算机科学家一样思考”课程中得到这个问题:

解释数据文件labdata.txt,使每行包含一个x,y坐标对。编写一个名为plotRegression的函数,该函数从该文件中读取数据并使用乌龟根据以下公式绘制这些点和最佳拟合线:

Y = Y + M(X-X)

M =Σxiyi-nx¯y¯Σx2i-NX2

http://interactivepython.org/runestone/static/thinkcspy/Files/Exercises.html?lastPosition=1308

我的代码似乎没有工作,我无法弄清楚为什么。看起来python将数据解释为str而不是float。

def plotregression(t):
    labfile = open('labdata.txt','r')
    sumx = 0
    sumy = 0
    count = 0
    sumprod = 0
    sumsqrx =0
    sumsqrnx = 0
    for i in labfile:
        points = i.split()
        print (points)
        t.up()
        t.setpos(points[0],points[1])
        t.stamp()
        sumx = sumx + int(points[0])
        sumy = sumy + int(points[1])
        prod = points[0]*int(points[1])
        sumprod = sumprod + prod
        count += 1
        sqrx = int(points[0])**2
        sumsqrx = sumsqrx + sqrx
        sqrnx = int(points[0])**(-2)
        sumsqrnx = sumsqrnx + sqrnx

    avgx = sumx/count
    avgy = sumy/count

    m = (sumprod - count(avgx*avgy))/sumsqrx- (count(avgx**2))
    print(m)
    for bestline in labfile:
        line = bestline.split()
        y= avgy + m(int(line[0])-avgx)
        t.down()
        t.setpos(0,0)
        t.setpos(line[0],y)

plotregression(kj)

感谢您的帮助。

日Thnx

3 个答案:

答案 0 :(得分:0)

我实际上是自己解决了这个问题,它最终似乎正在做我告诉它的事情。但我很想知道我是否可以删除任何不必要的代码行。我认为它有点太长了,我错过了一些可以让这更简单的事情。

import turtle

wn= turtle.Screen()
kj = turtle.Turtle()
kj.shape('circle')
kj.turtlesize(0.2)
kj.color('blue')
kj.speed(1)



def plotregression(t):

    sumx = 0
    sumy = 0
    count = 0
    sumprod = 0
    sumsqrx =0

    labfile = open('labdata.txt','r')
    for i in labfile:
        points = i.split()
        print (points)
        t.up()
        t.setpos(int(points[0]),int(points[1]))
        t.stamp()
        sumx = sumx + int(points[0])
        sumy = sumy + int(points[1])
        prod = int(points[0])*int(points[1])
        sumprod = sumprod + prod
        count += 1
        sqrx = int(points[0])**2
        sumsqrx = sumsqrx + sqrx

    avgx = sumx/count
    avgy = sumy/count

    m = (sumprod - count*(avgx*avgy))/(sumsqrx- (count*(avgx**2)))
    print('M is: ',m )

    labfile.close()

    labfile = open('labdata.txt','r')
    besttfit = open('bestfit.txt','w')

    for bestline in labfile:
        line = bestline.split()
        y = avgy + m*(int(line[0])-avgx)
        print('y is:' ,y)
        besttfit.write((line[0])+'\t'+str(y)+'\n')

    labfile.close()
    besttfit.close()



    bestfitline = open('bestfit.txt','r')
    for regline in bestfitline:
        reg = regline.split()
        t.goto(float(reg[0]),float(reg[1]))
        t.down()

    t.write('Best fit line')
    bestfitline.close()


wn.setworldcoordinates(-10,-10,120,120)
figure = plotregression(kj)


wn.exitonclick()

如果我能在任何地方减少,请告诉我

答案 1 :(得分:0)

我从交互式python中解决了同样的问题。我就是这样做的。

import turtle

def plotRegression(data):

    win = turtle.Screen()
    win.bgcolor('pink')

    t = turtle.Turtle()
    t.shape('circle')
    t.turtlesize(0.2)

    x_list, y_list = [i[0] for i in plot_data], [i[1] for i in plot_data]
    x_list, y_list = [float(i) for i in x_list], [float(i) for i in y_list]
    x_sum, y_sum = sum(x_list), sum(y_list)
    x_bar, y_bar = x_sum / len(x_list), y_sum / len(y_list)
    x_list_square = [i ** 2 for i in x_list]
    x_list_square_sum = sum(x_list_square)
    xy_list = [x_list[i] * y_list[i] for i in range(len(x_list))]
    xy_list_sum = sum(xy_list)

    m = (xy_list_sum - len(x_list) * x_bar * y_bar) / (x_list_square_sum - len(x_list) * x_bar ** 2)
    # best y
    y_best = [ (y_bar + m * (x_list[i] - x_bar)) for i in range( len(x_list) ) ]

    # plot points

    max_x = max(x_list)
    max_y = max(y_list)
    win.setworldcoordinates(0, 0, max_x, max_y)
    for i in range(len(x_list)):
        t.penup()
        t.setposition(x_list[i], y_list[i])
        t.stamp()

    #plot best y
    t.penup()
    t.setposition(0,0)
    t.color('blue')
    for i in range(len(x_list)):
        t.setposition(x_list[i],y_best[i])
        t.pendown()

    win.exitonclick()

with open('files/labdata.txt', 'r') as f:
    plot_data = [aline.split() for aline in f]

plotRegression(plot_data)

答案 2 :(得分:0)

我来晚了大约5年,但这是我的两分钱。

问题可能出在该行中: t.setpos(points [0],points [1]) 这是告诉乌龟转到点[0]和点[1]的字符串值。 例如,如果points [0]存储值“ 50”而points [1]保留值“ 60”,则“ 50” +“ 60”将返回字符串“ 5060”

此行也可能有问题: prod = points [0] * int(points [1]) 这是将点[0]中的字符串值与点[1]中的整数值相加 在这种情况下,使用先前的值,points [0]将为“ 50”,而int(points [1])将为60。即为60,而不是“ 60”。因此,您不能将字符串“ 50”与整数60相加。

这是我解决问题的方法:

import turtle
import math
import statistics as stats


def get_line(means, slope, xlist):
    """Return a list of best y values."""
    line = [(means[1] + slope * (xlist[x] + means[0]))
            for x in range(len(xlist))]
    return line


def get_mtop(xlist, ylist, n, means):
    """Return top half of m expression."""
    xbyy_list = [xlist[x] * ylist[x] for x in range(len(xlist))]
    xbyy_sum = sum(xbyy_list)
    nby_means = n * (means[0] * means[1])
    top = xbyy_sum - nby_means
    return top


def get_mbot(xlist, n, means):
    """Return bottom half of m expression."""
    sqr_comprehension = [x**2 for x in xlist]
    sqr_sum = sum(sqr_comprehension)
    nbymean_sqr = n * means[0]**2
    bot = sqr_sum - nbymean_sqr
    return bot


def get_mean(xlist, ylist):
    """Return a tuple that contains the means of xlist and ylist
        in form of (xmean,ymean)."""
    xmean = stats.mean(xlist)
    ymean = stats.mean(ylist)
    return xmean, ymean


def plotRegression(input_file, input_turtle):
    """Draw the plot regression.""""
    infile = open(input_file, 'r')
    input_turtle.shape("circle")
    input_turtle.penup()

    # Get a list of xcoor and a list of ycoor
    xcoor = []
    ycoor = []
    for line in infile:
        coor = line.split()
        xcoor.append(int(coor[0]))
        ycoor.append(int(coor[1]))

    # Plot and count the points
    num_points = 0
    for count in range(len(xcoor)):
        input_turtle.goto(xcoor[count], ycoor[count])
        input_turtle.stamp()
        num_points += 1

    # Get the mean values of the xcoor and ycoor lists
    means_tup = get_mean(xcoor, ycoor)
    print(means_tup)

    # Get the value for M
    mtop = get_mtop(xcoor, ycoor, num_points, means_tup)
    mbot = get_mbot(xcoor, num_points, means_tup)
    m = mtop / mbot
    print(m)

    # Draw the line
    yline = get_line(means_tup, m, xcoor)
    input_turtle.color("green")
    input_turtle.goto(xcoor[0], yline[0])
    input_turtle.pendown()
    for x in range(len(xcoor)):
        print(xcoor[x], yline[x])
        input_turtle.goto(xcoor[x], yline[x])

    input_turtle.hideturtle()


def main():
    """Create the canvas and the turtle. Call the function(s)"""
    # Set up the screen
    sc = turtle.Screen()
    sc.setworldcoordinates(0, 0, 100, 100)
    sc.bgcolor("black")

    # Create the turtle
    Donatello = turtle.Turtle()
    Donatello.color("purple")

    # Run plot Regression
    labdata = """C:\\Users\\user\\pathtofile\\labdata.txt"""
    plotRegression(labdata, Donatello)

    sc.exitonclick()


if __name__ == "__main__":
    main()

我不知道这是不是正确的斜率,但似乎是朝正确的方向。希望这对遇到同样问题的人有所帮助。