我写了一个像这样的Python WebScrapper:
from selenium import webdriver
from BeautifulSoup import BeautifulSoup
wd = webdriver.Firefox()
wd.get('http://www.nseindia.com/live_market/dynaContent/live_analysis/top_gainers_losers.htm?cat=G&utm_campaign=website&utm_source=sendgrid.com&utm_medium=email')
html_page = wd.page_source
wd.quit()
soup = BeautifulSoup(html_page)
table = soup.find("table", attrs = {"id":"topGainers"})
print "success"
#print table
for row in table.findAll('tr')[1:]:
cols = row.findAll('td')
#print cols
#break
some = [cols[0], cols[5], cols[6], cols[9]]
#print some
#break
for td in some:
if td.find(text = True):
text = ''.join(td.find(text = True))
print text + "|"
else:
continue
现在我希望我的输出(文本)以表格格式存储在html文件中。我怎么能这样做?
答案 0 :(得分:1)
#Okay first if you want the table to have HEADERS above each column you should save the heading names in a list like so...
listofheaders=['header1','header2','header3']
#for each row in the table save the data included in the row in a list of lists something like this:
listofrows=[['a','b','c'],['a','b','c'],['a','b','c']]
#now create a string with the following:
htmlstuff='<!DOCTYPE html>\n<html>\n<head>\n<style>\ntable,th,td\n{\nborder:1px solid black;\nborder-collapse:collapse;\n}\nth,td\n{\npadding:5px;\n}\n</style>\n</head>\n\n<body>\n<table style="width:300px">\n<tr>\n '
#now you would add the COLUMN HEADERS to the list...
for header in listofheaders:
htmlstuff=htmlstuff+'<th>'+str(header)+'</th>\n'
#then you can populate the table row by row...
for row in listofrows:
htmlstuff+=' <tr>\n'
for item in row:
htmlstuff=htmlstuff+' <td>'+str(item)+'</td>\n'
htmlstuff+=' </tr>\n'
#finish off the html coding...
htmlstuff+='</table>\n</body>\n\n</html>'
#now create the html page and write the data...
f=open('webpage.html','w')
f.write(htmlstuff)
f.close()
您甚至可以使用webbrowser自动为您打开页面。
import webbrowser
webbrowser.open('webpage.html')