Question

我正在研究股票报价数据的线性回归模型，但我不能让Pylab正常工作。我已经成功绘制了数据，但我希望得到一条最适合我的数据。（不是出于任何特定目的，只是使用线性回归的随机数据集。）

import pylab
import urllib.request
from matplotlib import pyplot as plt
from bs4 import BeautifulSoup
import requests

def chartStocks(*tickers):

    # Run loop for each ticker passed in as an argument
    for ticker in tickers:

        # Convert URL into text for parsing
        url = "http://finance.yahoo.com/q/hp?s=" + str(ticker) + "+Historical+Prices"
        sourceCode = requests.get(url)
        plainText = sourceCode.text
        soup = BeautifulSoup(plainText, "html.parser")

        # Find all links on the page
        for link in soup.findAll('a'):
            href = link.get('href')
            link = []
            for c in href[:48]:
                link.append(c)
            link = ''.join(link)

            # Find the URL for the stock ticker CSV file and convert the data to text
            if link == "http://real-chart.finance.yahoo.com/table.csv?s=":
                csv_url = href
                res = urllib.request.urlopen(csv_url)
                csv = res.read()
                csv_str = str(csv)

                # Parse the CSV to create a list of data points
                point = []
                points = []
                curDay = 0
                day = []
                commas = 0               
                lines = csv_str.split("\\n")
                lineOne = True
                for line in lines:
                    commas = 0
                    if lineOne == True:
                        lineOne = False
                    else:
                        for c in line:
                            if c == ",":
                                commas += 1
                            if commas == 4:
                                point.append(c)
                            elif commas == 5:
                                for x in point:
                                    if x == ",":
                                        point.remove(x)
                                point = ''.join(point)
                                point = float(point)
                                points.append(point)
                                day.append(curDay)
                                curDay += 1
                                point = []
                                commas = 0
                points = list(reversed(points))

                # Plot the data
                pylab.scatter(day,points)
                pylab.xlabel('x')
                pylab.ylabel('y')
                pylab.title('title')

                k, b = pylab.polyfit(day,points,1)
                yVals = k * day + b
                pylab.plot(day,yVals,c='r',linewidth=2)
                pylab.title('title')

                pylab.show()


chartStocks('AAPL')

出于某种原因，我收到属性错误，我不确定原因。我是否错误地将数据传递给pylab.scatter()？我不完全确定是否传递x和y值的列表是正确的方法。我无法找到遇到此问题的其他任何人，.scatter肯定是Pylab的一部分，所以我不确定是什么时候发生的。

Answer 1

当你使用pylab时，它会导入一些其他包。因此，当您执行import pylab时，您会使用前缀np进行调整，因此您需要np.polyfit。正如这个问题所示，如果您只是直接导入numpy来执行此操作，我认为代码的读者会更清楚。

Answer 2

我认为存在版本冲突。尝试：

plt.pyplot.scatter(day,points)

模块'pylab'没有属性'scatter'

2 个答案: