Question

from lxml import html
import requests
import xlsxwriter

Urllist = []
titlecontentlist =[]

我将添加不同范围的索引，但首先需要使该程序正常运行

pageno = 123310
# `enter code here`

while pageno<123314:
    Url = "http://pib.nic.in/newsite/PrintRelease.aspx?relid="+ str(pageno) + ""
    page = requests.get(Url)

    tree = html.fromstring(page.text)

    titlecontent = tree.xpath('//title/text()')
    heading = tree.xpath('//div[@id="condiv"]/text()')

    #for fj in titlecontent:
    #        maintitle = fj
    #        print type(maintitle)

    #print Url
    #print titlecontent[0]
    #print "\n"*3,Url,"\n"+maintitle
    #for bodycontent in heading: 
     #   b=bodycontent    
      #  print b
    final_list = (['URL' ,'TITLE'],[ Url,titlecontent[0] ])

    workbook = xlsxwriter.Workbook('PIB.xlsx')
    worksheet = workbook.add_worksheet()

    # Some data we want to write to the worksheet.

    i=0
    while i< 3:
        row = i
        col = i

        # Iterate over the data and write it out row by row.
        for item, cost in (final_list):
            worksheet.write(row, col,     item)
            worksheet.write(row, col + 1, cost)
            row += 1   
            i+=1   

    #final_list = [ Url,titlecontent[0] ]
    #print final_list

    pageno+=1

workbook.close()

我坚持要在Excel文件中保存这个动态可扩展列表。

Answer 1

尝试以下方法：

from lxml import html
import requests
import xlsxwriter

Urllist = []
titlecontentlist =[]

workbook = xlsxwriter.Workbook('PIB.xlsx')
worksheet = workbook.add_worksheet()
worksheet.write_row(0, 0, ["URL", "TITLE"])
row = 1

for pageno in range(123310, 123314):
    Url = "http://pib.nic.in/newsite/PrintRelease.aspx?relid={}".format(pageno)
    page = requests.get(Url)
    tree = html.fromstring(page.text)

    titlecontent = tree.xpath('//title/text()')
    heading = tree.xpath('//div[@id="condiv"]/text()')

    worksheet.write_row(row, 0, [Url, titlecontent[0].strip()])
    row += 1

workbook.close()

这将生成一个XLSX文件，如下所示：

创建URL和标题的动态列表，并将其保存在Excel文件中

1 个答案: