数据我试图缩短做空

时间:2015-07-15 20:55:13

标签: python html web-scraping screen-scraping

我尝试使用Python将此basketball-reference example中的播放表格刮到CSV文件中。

当我运行此代码时,表格被缩短并且缺少许多单元格。我是一个编程n00b,任何帮助都将不胜感激。

from bs4 import BeautifulSoup
from urllib2 import urlopen
import csv

bref = "http://www.basketball-reference.com"
print "Enter game code:"
game = raw_input("> ")

def make_soup(url):
    return BeautifulSoup(urlopen(url), "lxml")

def get_pbp(pbp):
    soup = make_soup(bref + "/boxscores/pbp/" + game + ".html")
    table = soup.find("table", "no_highlight stats_table")
    rows = [row.find_all("td") for row in table.find_all("tr")]

    data = []
    for row in rows:
        values = []
        for value in row:
            if value.string is None:
                values.append(u"")
            else:
                values.append(value.string.replace(u"\xa0", u""))
        data.append(values)
    return data

if __name__ == '__main__':

    print "Writing data for game " + game

    with open(game + '.csv', 'w') as f:
        writer = csv.writer(f)
        writer.writerows(get_pbp(game))

    print game + " has been successfully scraped."

0 个答案:

没有答案