from lxml import html
import requests
import xlsxwriter
Urllist = []
titlecontentlist =[]
我将添加不同范围的索引,但首先需要使该程序正常运行
pageno = 123310
# `enter code here`
while pageno<123314:
Url = "http://pib.nic.in/newsite/PrintRelease.aspx?relid="+ str(pageno) + ""
page = requests.get(Url)
tree = html.fromstring(page.text)
titlecontent = tree.xpath('//title/text()')
heading = tree.xpath('//div[@id="condiv"]/text()')
#for fj in titlecontent:
# maintitle = fj
# print type(maintitle)
#print Url
#print titlecontent[0]
#print "\n"*3,Url,"\n"+maintitle
#for bodycontent in heading:
# b=bodycontent
# print b
final_list = (['URL' ,'TITLE'],[ Url,titlecontent[0] ])
workbook = xlsxwriter.Workbook('PIB.xlsx')
worksheet = workbook.add_worksheet()
# Some data we want to write to the worksheet.
i=0
while i< 3:
row = i
col = i
# Iterate over the data and write it out row by row.
for item, cost in (final_list):
worksheet.write(row, col, item)
worksheet.write(row, col + 1, cost)
row += 1
i+=1
#final_list = [ Url,titlecontent[0] ]
#print final_list
pageno+=1
workbook.close()
我坚持要在Excel文件中保存这个动态可扩展列表。
答案 0 :(得分:1)
尝试以下方法:
from lxml import html
import requests
import xlsxwriter
Urllist = []
titlecontentlist =[]
workbook = xlsxwriter.Workbook('PIB.xlsx')
worksheet = workbook.add_worksheet()
worksheet.write_row(0, 0, ["URL", "TITLE"])
row = 1
for pageno in range(123310, 123314):
Url = "http://pib.nic.in/newsite/PrintRelease.aspx?relid={}".format(pageno)
page = requests.get(Url)
tree = html.fromstring(page.text)
titlecontent = tree.xpath('//title/text()')
heading = tree.xpath('//div[@id="condiv"]/text()')
worksheet.write_row(row, 0, [Url, titlecontent[0].strip()])
row += 1
workbook.close()
这将生成一个XLSX文件,如下所示: