使用python将列标题添加到xlsx?

时间:2017-03-24 06:25:07

标签: python web-scraping beautifulsoup

以下是为网址划分品牌和产品名称的代码,网址存储在xlsx文件中,输出为xls文件。

import requests
from bs4 import BeautifulSoup
import xlrd
import xlwt

file_location = "C:/Users/Nitin Kansal/Desktop/Facets Project/Jabong ALL/Jabong/input.xlsx"

workbook = xlrd.open_workbook(file_location)

sheet = workbook.sheet_by_index(0)

products = []
for r in range(sheet.nrows):
    products.append(sheet.cell_value(r,0))

book = xlwt.Workbook(encoding= "utf-8", style_compression = 0)
sheet = book.add_sheet("Sheet11", cell_overwrite_ok=True)

for index, url in enumerate(products):
    source =  requests.get(url)
    data = source.content
    soup = BeautifulSoup(data, "lxml")

    sheet.write(index, 0, url)

    try:
        Brand = soup.select(".brand")[0].text
        sheet.write(index, 1, Brand)

    except Exception:
        sheet.write(index, 1, "")

    try:
        Product_Name = soup.select(".product-title")[0].text
        sheet.write(index, 2, Product_Name)

    except Exception:
        sheet.write(index, 2, "")

book.save("Jabong Output.xls")

输出如下:

http://www.jabong.com/belle-fille-Grey-Solid-Winter-Jacket-1310773.html          Belle Fille              Grey Solid Winter Jacket
http://www.jabong.com/Femella-Red-Solid-Winter-Jacket-2880302.html                 Femella              Red Solid Winter Jacket
http://www.jabong.com/Style-Quotient-Fuchsia-Striped-Sweatshirt-2765328.html     Style Quotient Fuchsia   Striped Sweatshirt

我需要在输出中添加标题,以便它如下所示:

URL                                                                              Brand                    Product_Name
http://www.jabong.com/belle-fille-Grey-Solid-Winter-Jacket-1310773.html          Belle Fille              Grey Solid Winter Jacket
http://www.jabong.com/Femella-Red-Solid-Winter-Jacket-2880302.html              Femella                   Red Solid Winter Jacket
http://www.jabong.com/Style-Quotient-Fuchsia-Striped-Sweatshirt-2765328.html     Style Quotient Fuchsia   Striped Sweatshirt

1 个答案:

答案 0 :(得分:1)

您可以在写入条目之前先写入列名。

import requests
from bs4 import BeautifulSoup
import xlrd
import xlwt

file_location = "C:/Users/Nitin Kansal/Desktop/Facets Project/Jabong ALL/Jabong/input.xlsx"

workbook = xlrd.open_workbook(file_location)

sheet = workbook.sheet_by_index(0)

products = []
for r in range(sheet.nrows):
    products.append(sheet.cell_value(r,0))

book = xlwt.Workbook(encoding= "utf-8", style_compression = 0)
sheet = book.add_sheet("Sheet11", cell_overwrite_ok=True)

#write column names
sheet.write(0, 0, "URL")
sheet.write(0, 1, "Brand")
sheet.write(0, 2, "Product_Name")

for index, url in enumerate(products):
    source =  requests.get(url)
    data = source.content
    soup = BeautifulSoup(data, "lxml")

    sheet.write(index+1, 0, url)

    try:
        Brand = soup.select(".brand")[0].text
        sheet.write(index+1, 1, Brand)

    except Exception:
        sheet.write(index+1, 1, "")

    try:
        Product_Name = soup.select(".product-title")[0].text
        sheet.write(index+1, 2, Product_Name)

    except Exception:
        sheet.write(index+1, 2, "")

book.save("Jabong Output.xls")

或者,您可以使用pandas:

import pandas as pd
l = []
for url in products:
    source =  requests.get(url)
    data = source.content
    soup = BeautifulSoup(data, "lxml")
    brand = soup.select(".brand")[0].text
    product_name = soup.select(".product-title")[0].text
    l.append((url,brand,product_name))

df = pd.DataFrame(l,columns=["URL","Brand",'Product_Name'])
df.to_excel("output.xlsx",index=False)