import requests
from bs4 import BeautifulSoup
import csv
from urlparse import urljoin
import urllib2
base_url = 'http://www.baseball-reference.com/' # base url for concatenation
data = requests.get("http://www.baseball-reference.com/teams/BAL/2014-schedule-scores.shtml") #website for scraping
soup = BeautifulSoup(data.content)
b=5
for link in soup.find_all('a'):
if not link.has_attr('href'):
continue
if link.get_text() != 'boxscore':
continue
url = base_url + link['href']
response = requests.get(url)
html = response.content
soup = BeautifulSoup(html)
# Scores
table = soup.find('table', attrs={'id': 'BaltimoreOriolespitching'})
for row in table.findAll('tr'):
list_of_cells = []
for cell in row.findAll('td'):
text = cell.text.replace(' ', '')
list_of_cells.append(text)
for list in list_of_cells:
with open('test1.csv', 'w', newline='') as fp:
a = csv.writer(fp, delimiter=',')
a.writerows(list)
我正在尝试将信息写入csv,以便每条信息都有自己的单元格。我玩代码越多,我得到一个缩进错误或第一行打印到csv,就是这样。
IndentationError:预期缩进块
答案 0 :(得分:3)
我认为首先要考虑的是移动打开文件并在循环外创建CSV编写器。我认为您在每次通过'w'
循环时都会覆盖CSV文件(for
)。所以试试这个:
with open('test1.csv', 'w', newline='') as fp:
csvw = csv.writer(fp, delimiter=',')
for link in soup.find_all('a'):
if not link.has_attr('href'):
continue
if link.get_text() != 'boxscore':
continue
url = base_url + link['href']
response = requests.get(url)
html = response.content
soup = BeautifulSoup(html)
# Scores
table = soup.find('table', attrs={'id': 'BaltimoreOriolespitching'})
for row in table.findAll('tr'):
list_of_cells = []
for cell in row.findAll('td'):
text = cell.text.replace(' ', '')
list_of_cells.append(text)
for list in list_of_cells:
csvw.writerows(list)