我正在写一个beatifulsoup webcrawler。该脚本获取正确的数据并将其写入csv格式的文件。但是,当试图读回数据(接近代码末尾)时,我使用不同的变量名再次打开文件并尝试阅读。但是,最后一个打印行的输出是来自原始站点的一堆HTML代码。我认为它来自'汤'字符串。到底是怎么回事?
import datetime
import csv
import urllib
from bs4 import BeautifulSoup
import urllib2
file_name = "/users/ripple/Dropbox/Python/FinViz.txt"
file = open(file_name,"w")
url = "http://www.finviz.com"
print 'Grabbing from: ' + url + '...\n'
try:
r = urllib2.urlopen(url)
except urllib2.URLError as e:
r = e
if r.code in (200, 401):
#get the table data from the page
data = urllib.urlopen(url).read()
#send to beautiful soup
soup = BeautifulSoup(data)
i=1
for table in soup("table", { "class" : "t-home-table"}):
#First and second tables
if i==1 or i==2:
for tr in table.findAll('tr')[1:]:
if i<3:
col = tr.findAll('td')
ticker = col[0].get_text().encode('ascii','ignore')
price = col[1].get_text().encode('ascii','ignore')
change = col[2].get_text().encode('ascii','ignore')
volume = col[3].get_text().encode('ascii','ignore')
metric = col[5].get_text().encode('ascii','ignore')
record = ticker + ',' + price + ',' + change + ',' + volume + ',' + metric + '\n'
print record
file.write(record)
if i==3:
file.write('END\n')
# Third and fourth tables
if i==3 or i==4:
for tr in table.findAll('tr')[1:]:
col = tr.findAll('td')
ticker1 = col[0].get_text().encode('ascii','ignore')
ticker2 = col[1].get_text().encode('ascii','ignore')
ticker3 = col[2].get_text().encode('ascii','ignore')
ticker4 = col[3].get_text().encode('ascii','ignore')
metric = col[5].get_text().encode('ascii','ignore')
record = ticker1 + ',' + ticker2 + ',' + ticker3 + ',' + ticker4 + ',' + metric + '\n'
print record
file.write(record)
i+=1
#if the page does not open
else:
print "ERROR:"
file.close()
#open written file to read tickers and download tables from finviz
file = open(file_name,"r")
finviz_csv = csv.reader(file)
for row in finviz_csv:
stock = col[0]
print stock
答案 0 :(得分:0)
我很快就解决了这个问题。它应该是这样的:
for row in finviz_csv:
colnum = 1
for col in row:
if colnum==1:
stock = col
print col