我尝试使用Python将此basketball-reference example中的播放表格刮到CSV文件中。
当我运行此代码时,表格被缩短并且缺少许多单元格。我是一个编程n00b,任何帮助都将不胜感激。
from bs4 import BeautifulSoup
from urllib2 import urlopen
import csv
bref = "http://www.basketball-reference.com"
print "Enter game code:"
game = raw_input("> ")
def make_soup(url):
return BeautifulSoup(urlopen(url), "lxml")
def get_pbp(pbp):
soup = make_soup(bref + "/boxscores/pbp/" + game + ".html")
table = soup.find("table", "no_highlight stats_table")
rows = [row.find_all("td") for row in table.find_all("tr")]
data = []
for row in rows:
values = []
for value in row:
if value.string is None:
values.append(u"")
else:
values.append(value.string.replace(u"\xa0", u""))
data.append(values)
return data
if __name__ == '__main__':
print "Writing data for game " + game
with open(game + '.csv', 'w') as f:
writer = csv.writer(f)
writer.writerows(get_pbp(game))
print game + " has been successfully scraped."