我正在尝试从网站上抓取数据并以xls格式保存数据但是当我以xls格式保存数据时,它们都排成一行。任何人都可以帮助我如何在10个数据之后拆分行?我的代码是
from bs4 import BeautifulSoup
import urllib2
pageSource=urllib2.urlopen('http://www.livescore.com/worldcup/tables/').read()
soup = BeautifulSoup(pageSource)
alltables = soup.findAll( "table", {"class":"league-wc table bh"} )
results=[]
for table in alltables:
rows = table.findAll('tr')
_table = []
for tr in rows[1:]:
_row = []
cols = tr.findAll('td')
for td in cols:
if td.findAll('a'):
text=td.a.renderContents().strip()
else:
text=td.renderContents().strip()
_row.append(text)
_table.append(_row)
results.append(_table)
# print results
index = 1
f=open('world.xls','w')
for table in results:
for row in table:
print ','.join([str(index)] + row[1:])
f.write("\t".join([str(index)] + row[1:]))
index += 1
f.close()
输出:
1,1,Australia,0,0,0,0,0,0,0,0
2,1,Chile,0,0,0,0,0,0,0,0
3,1,Netherlands,0,0,0,0,0,0,0,0
4,1,Spain,0,0,0,0,0,0,0,0
5,1,Colombia,0,0,0,0,0,0,0,0
6,1,Greece,0,0,0,0,0,0,0,0
7,1,Ivory Coast,0,0,0,0,0,0,0,0
8,1,Japan,0,0,0,0,0,0,0,0
9,1,Costa Rica,0,0,0,0,0,0,0,0
10,1,England,0,0,0,0,0,0,0,0
11,1,Italy,0,0,0,0,0,0,0,0
12,1,Uruguay,0,0,0,0,0,0,0,0
13,1,Ecuador,0,0,0,0,0,0,0,0
14,1,France,0,0,0,0,0,0,0,0
15,1,Honduras,0,0,0,0,0,0,0,0
16,1,Switzerland,0,0,0,0,0,0,0,0
17,1,Argentina,0,0,0,0,0,0,0,0
18,1,Bosnia-Herzegovina,0,0,0,0,0,0,0,0
19,1,Iran,0,0,0,0,0,0,0,0
20,1,Nigeria,0,0,0,0,0,0,0,0
21,1,Germany,0,0,0,0,0,0,0,0
22,1,Ghana,0,0,0,0,0,0,0,0
23,1,Portugal,0,0,0,0,0,0,0,0
24,1,USA,0,0,0,0,0,0,0,0
25,1,Algeria,0,0,0,0,0,0,0,0
26,1,Belgium,0,0,0,0,0,0,0,0
27,1,Russia,0,0,0,0,0,0,0,0
28,1,South Korea,0,0,0,0,0,0,0,0
29,1,Brazil,0,0,0,0,0,0,0,0
30,1,Cameroon,0,0,0,0,0,0,0,0
31,1,Croatia,0,0,0,0,0,0,0,0
32,1,Mexico,0,0,0,0,0,0,0,0
excel输出:
1 1 Australia 0 0 0 0 0 0 0 2 1 Chile 0 0 0 0 0 0 0 3 1 Netherlands 0 0 0 0 0 0 0 4 1 Spain 0 0 0 0 0 0 0 5 1 Colombia 0 0 0 0 0 0 0 6 1 Greece 0 0 0 0 0 0 0 7 1 Ivory Coast 0 0 0 0 0 0 0 8 1 Japan 0 0 0 0 0 0 0 9 1 Costa Rica 0 0 0 0 0 0 0 10 1 England 0 0 0 0 0 0 0 11 1 Italy 0 0 0 0 0 0 0 12 1 Uruguay 0 0 0 0 0 0 0 13 1 Ecuador 0 0 0 0 0 0 0 14 1 France 0 0 0 0 0 0 0 15 1 Honduras 0 0 0 0 0 0 0 16 1 Switzerland 0 0 0 0 0 0 0 17 1 Argentina 0 0 0 0 0 0 0 18 1 Bosnia-Herzegovina 0 0 0 0 0 0 0 19 1 Iran 0 0 0 0 0 0 0 20 1 Nigeria 0 0 0 0 0 0 0 21 1 Germany 0 0 0 0 0 0 0 22 1 Ghana 0 0 0 0 0 0 0 23 1 Portugal 0 0 0 0 0 0 0 24 1 USA 0 0 0 0 0 0 0 25 1 Algeria 0 0 0 0 0 0 0 26 1 Belgium 0 0 0 0 0 0 0 27 1 Russia 0 0 0 0 0 0 0 28 1 South Korea 0 0 0 0 0 0 0 29 1 Brazil 0 0 0 0 0 0 0 30 1 Cameroon 0 0 0 0 0 0 0 31 1 Croatia 0 0 0 0 0 0 0 32 1 Mexico 0 0 0 0 0 0 0 0
在excel输出中,上面显示的所有数据都存储在一行中,但是我希望在每10列之后拆分数据,例如在chile之前,如excel ouptput.so那样输出
1 1 Australia 0 0 0 0 0 0 0 0 2
Chile 0 0 0 0 0 0 0 3 1
以及其他数据。
答案 0 :(得分:0)
对脚本的最小更改是在每行末尾的文件中写入"\n"
字符;文件上的write()
不会添加换行符。 EG:
f.write("\t".join([str(index)] + row[1:]) + "\n")
但您可能会发现csv
模块很有用。此外,您可能不应该为标签分隔值文件提供扩展名.xls
- 它实际上不是Excel文件,即使Excel可以处理它。 .tsv
或.tab
会更正确,甚至可能.txt
。