使用 Selenium 和 Beautiful Soup 进行网页抓取 - 转换为 CSV 文件

时间:2021-04-28 15:48:41

标签: python selenium beautifulsoup selenium-chromedriver export-to-csv

我需要从电子商务网站上抓取价格、品牌和产品名称。我现在的代码能够刮取品牌和产品名称的组合。价格也是组合在一起的(降价和正常价格)。如果我想将品牌和产品名称以及降价与正常价格分开,我需要更改什么?最后,所有东西都需要放入一个 csv 文件中,但我也无法编写正确的代码。

您可以在下面找到我的代码。 非常感谢您的时间!

from pathlib import Path
from selenium import webdriver
from bs4 import BeautifulSoup

PATH = "/Users/Ziye/Desktop/Python/chromedriver"

def get_html(url):
    driver = webdriver.Chrome(PATH)
    driver.get(url)
    return driver.page_source

def main ():
    url = "https://www.zalando.de/damen/?q=michael+michael+kors+taschen&p=2"
    html = get_html(url)

    soup = BeautifulSoup(html, "lxml")
    cards = soup.find_all("div", {"class": "qMZa55 SQGpu8 iOzucJ JT3_zV DvypSJ"})
    print(len(cards))


for card in cards:
    print(card.find(class_="hPWzFB").get_text())

    print(card.find(class_="_0xLoFW u9KIT8 _7ckuOK").get_text())


if __name__ == "__main__":
    main()

1 个答案:

答案 0 :(得分:1)

我会在这里使用熊猫。当您遍历 cards 时,将数据放入字典(键是您想要的列名),然后将它们附加到列表中。从中构建表并写入文件。

from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd

PATH = "/Users/Ziye/Desktop/Python/chromedriver"
PATH = "C:/chromedriver_win32/chromedriver.exe"

def get_html(url):
    driver = webdriver.Chrome(PATH)
    driver.get(url)
    return driver.page_source

def main ():
    rows = []
    url = "https://www.zalando.de/damen/?q=michael+michael+kors+taschen&p=2"
    html = get_html(url)

    soup = BeautifulSoup(html, "lxml")
    cards = soup.find_all("div", {"class": "qMZa55 SQGpu8 iOzucJ JT3_zV DvypSJ"})
    print(len(cards))


    for card in cards:
        print(card.find(class_="hPWzFB").get_text())
    
        print(card.find(class_="_0xLoFW u9KIT8 _7ckuOK").get_text())
        
        row = {'col1': card.find(class_="hPWzFB").get_text(),
               'col2': card.find(class_="_0xLoFW u9KIT8 _7ckuOK").get_text().strip()}
        
        rows.append(row)
        
    df = pd.DataFrame(rows)
    df[['col_2A', 'col_2B']] = df['col2'].str.split(' ', expand=True)
    df.to_csv('file.csv', index=False)

if __name__ == "__main__":
    main()

更好的解决方案:

我还将在这里提供更好/更强大的解决方案。可以直接通过api获取数据。无需使用硒。

import requests
import pandas as pd

url = 'https://en.zalando.de/api/catalog/articles'
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'}
payload = {
'categories': 'women',
'limit': '84',
'offset': '0',
'query': 'michael michael kors taschen'}

jsonData = requests.get(url, headers=headers, params=payload).json()

total_pages = jsonData['pagination']['page_count']
per_page = len(jsonData['articles'])

rows = []
for offset in range(0,total_pages):
    if offset == 0:
        pass
    else:
        payload.update({'offset':per_page*offset})
        jsonData = requests.get(url, headers=headers, params=payload).json()
        
    articles = jsonData['articles']
    rows += articles
    print('Page: %s of %s' %(offset+1,total_pages))
    
df = pd.json_normalize(rows)

输出:这将在大约 2 秒内获得所有数据,328 行。

print(df.head(5).to_string())
             sku                                        name       sizes                                                                  url_key                                                                                                                                                                                                                                                                             media            brand_name  is_premium                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          family_articles                                                                          flags product_group                                                                       delivery_promises price.original price.promotional  price.has_different_prices  price.has_different_original_prices  price.has_different_promotional_prices  price.has_discount_on_selected_sizes_only
0  MK151H0IH-Q11                  VOYAGER - Tote bag - black  [One Size]  michael-michael-kors-voyager-signature-tote-handbag-black-mk151h0ih-q11  [{'path': 'spp-media-p1/76f7c78f9cc63f1192a3e2c29946130f/0b46cac7883e4c7ba71dd5b14b8c3811.jpg', 'role': 'DEFAULT', 'packet_shot': False}, {'path': 'spp-media-p1/c03dbf3ac05a32b5800410422333f513/8b7a7099a3254ebca45791b1f72c34fa.jpg', 'role': 'HOVER', 'packet_shot': False}]  MICHAEL Michael Kors        True                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 [{'sku': 'MK151H0IH-Q11', 'url_key': 'michael-michael-kors-voyager-signature-tote-handbag-black-mk151h0ih-q11', 'media': [{'path': 'spp-media-p1/c9ab2b63cfe739e5bbe98e65f59bbecf/20d472ce70d24b289f4ec45672974773.jpg', 'role': 'FAMILY', 'packet_shot': True}], 'name': 'VOYAGER - Tote bag - black', 'sizes': ['One Size'], 'price': {'original': '249,95 €', 'promotional': '174,95 €', 'has_different_prices': False, 'has_different_original_prices': False, 'has_different_promotional_prices': False, 'has_discount_on_selected_sizes_only': False}, 'flags': [{'key': 'discountRate', 'value': '-30%', 'tracking_value': 'discount rate'}], 'delivery_promises': [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]}, {'sku': 'MK151H0IH-C11', 'url_key': 'michael-michael-kors-voyager-signature-tote-handbag-mk151h0ih-c11', 'media': [{'path': 'spp-media-p1/8d4ff9ca955f36259d05ac4984639516/6e08275415d349b9a6f4394c19c5f23f.jpg', 'role': 'FAMILY', 'packet_shot': True}], 'name': 'VOYAGER - Tote bag - pearl grey', 'sizes': ['One Size'], 'price': {'original': '249,95 €', 'promotional': '249,95 €', 'has_different_prices': False, 'has_different_original_prices': False, 'has_different_promotional_prices': False, 'has_discount_on_selected_sizes_only': False}, 'flags': [], 'delivery_promises': [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]}]  [{'key': 'discountRate', 'value': '-30%', 'tracking_value': 'discount rate'}]   accessoires  [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]       249,95 €          174,95 €                       False                                False                                   False                                      False
1  MK151H0E6-B11  POUCHES CHAIN POUCHETTE - Clutch - vanilla  [One Size]                           michael-michael-kors-clutch-sand-mk151h0e6-b11  [{'path': 'spp-media-p1/38b844528a8538d6abd3af70aa241cb9/b7bf5696bb51419c82a7e93ea1e11f79.jpg', 'role': 'DEFAULT', 'packet_shot': False}, {'path': 'spp-media-p1/60c65b6cad603e1b90cbb35564a8ed06/9e4560de580e49f680f6f0df8b3db0e1.jpg', 'role': 'HOVER', 'packet_shot': False}]  MICHAEL Michael Kors        True                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              [{'sku': 'MK151H0E6-B11', 'url_key': 'michael-michael-kors-clutch-sand-mk151h0e6-b11', 'media': [{'path': 'spp-media-p1/1e2e87e311d035a1b74c2cdd36025ae9/244c417b076d4845bcaeabb119051aab.jpg', 'role': 'FAMILY', 'packet_shot': True}], 'name': 'POUCHES CHAIN POUCHETTE - Clutch - vanilla', 'sizes': ['One Size'], 'price': {'original': '124,95 €', 'promotional': '124,95 €', 'has_different_prices': False, 'has_different_original_prices': False, 'has_different_promotional_prices': False, 'has_discount_on_selected_sizes_only': False}, 'flags': [], 'delivery_promises': [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]}, {'sku': 'MK151H0E6-O11', 'url_key': 'michael-michael-kors-clutch-brown-mk151h0e6-o11', 'media': [{'path': 'spp-media-p1/9b5386b8220e3e808372d90404028a3a/948b37c4fb574c64acfc52ee05e4a7f5.jpg', 'role': 'FAMILY', 'packet_shot': True}], 'name': 'POUCHES CHAIN POUCHETTE - Clutch - brown', 'sizes': ['One Size'], 'price': {'original': '124,95 €', 'promotional': '124,95 €', 'has_different_prices': False, 'has_different_original_prices': False, 'has_different_promotional_prices': False, 'has_discount_on_selected_sizes_only': False}, 'flags': [], 'delivery_promises': [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]}]                                                                             []   accessoires  [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]       124,95 €          124,95 €                       False                                False                                   False                                      False
2  MK151H0ST-O11       SULLIVAN TOTE - Handbag - brown/acorn  [One Size]      michael-michael-kors-sullivan-tote-handbag-brownacorn-mk151h0st-o11   [{'path': 'spp-media-p1/346d861e13623b8a8b43d0d3fcbad325/60cc8931e3ce4c78a37942ddaf046193.jpg', 'role': 'DEFAULT', 'packet_shot': True}, {'path': 'spp-media-p1/0df2c6e3cd033641b05128cb36c4f687/2a2a3d47a0de46faa64fe2981ae3b9d9.jpg', 'role': 'HOVER', 'packet_shot': False}]  MICHAEL Michael Kors        True                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             [{'sku': 'MK151H0ST-O11', 'url_key': 'michael-michael-kors-sullivan-tote-handbag-brownacorn-mk151h0st-o11', 'media': [{'path': 'spp-media-p1/346d861e13623b8a8b43d0d3fcbad325/60cc8931e3ce4c78a37942ddaf046193.jpg', 'role': 'FAMILY', 'packet_shot': True}], 'name': 'SULLIVAN TOTE - Handbag - brown/acorn', 'sizes': ['One Size'], 'price': {'original': '274,95 €', 'promotional': '274,95 €', 'has_different_prices': False, 'has_different_original_prices': False, 'has_different_promotional_prices': False, 'has_discount_on_selected_sizes_only': False}, 'flags': [], 'delivery_promises': [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]}, {'sku': 'MK151H0ST-O12', 'url_key': 'michael-michael-kors-sullivan-tote-handbag-brownblack-mk151h0st-o12', 'media': [{'path': 'spp-media-p1/809deafa120433bba6b979fda5deaf36/0dd2ca0f586445ed94d54300209c2765.jpg', 'role': 'FAMILY', 'packet_shot': True}], 'name': 'SULLIVAN TOTE - Handbag - brown/black', 'sizes': ['One Size'], 'price': {'original': '274,95 €', 'promotional': '274,95 €', 'has_different_prices': False, 'has_different_original_prices': False, 'has_different_promotional_prices': False, 'has_discount_on_selected_sizes_only': False}, 'flags': [], 'delivery_promises': [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]}]                                                                             []   accessoires  [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]       274,95 €          274,95 €                       False                                False                                   False                                      False
3  MK151H0E6-O11    POUCHES CHAIN POUCHETTE - Clutch - brown  [One Size]                          michael-michael-kors-clutch-brown-mk151h0e6-o11  [{'path': 'spp-media-p1/1e96df7e492b39aebda591a3792ef1e9/afe01fccd4864bc1bd2c0df17c8bf0fd.jpg', 'role': 'DEFAULT', 'packet_shot': False}, {'path': 'spp-media-p1/9b699c2aa1b535158848cc44c274520f/60aab3e3c4bf49c6b4ee2e4df3db5aeb.jpg', 'role': 'HOVER', 'packet_shot': False}]  MICHAEL Michael Kors        True                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              [{'sku': 'MK151H0E6-O11', 'url_key': 'michael-michael-kors-clutch-brown-mk151h0e6-o11', 'media': [{'path': 'spp-media-p1/9b5386b8220e3e808372d90404028a3a/948b37c4fb574c64acfc52ee05e4a7f5.jpg', 'role': 'FAMILY', 'packet_shot': True}], 'name': 'POUCHES CHAIN POUCHETTE - Clutch - brown', 'sizes': ['One Size'], 'price': {'original': '124,95 €', 'promotional': '124,95 €', 'has_different_prices': False, 'has_different_original_prices': False, 'has_different_promotional_prices': False, 'has_discount_on_selected_sizes_only': False}, 'flags': [], 'delivery_promises': [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]}, {'sku': 'MK151H0E6-B11', 'url_key': 'michael-michael-kors-clutch-sand-mk151h0e6-b11', 'media': [{'path': 'spp-media-p1/1e2e87e311d035a1b74c2cdd36025ae9/244c417b076d4845bcaeabb119051aab.jpg', 'role': 'FAMILY', 'packet_shot': True}], 'name': 'POUCHES CHAIN POUCHETTE - Clutch - vanilla', 'sizes': ['One Size'], 'price': {'original': '124,95 €', 'promotional': '124,95 €', 'has_different_prices': False, 'has_different_original_prices': False, 'has_different_promotional_prices': False, 'has_discount_on_selected_sizes_only': False}, 'flags': [], 'delivery_promises': [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]}]                                                                             []   accessoires  [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]       124,95 €          124,95 €                       False                                False                                   False                                      False
4  MK151H0SP-Q11          MAXINE MESSENGER - Handbag - black  [One Size]      michael-michael-kors-maxinesm-messenger-handbag-black-mk151h0sp-q11  [{'path': 'spp-media-p1/43f07a4a9ceb3c6ca8195e0014e13918/cdfcd04d7f51427e8f9a0eca0caba2f9.jpg', 'role': 'DEFAULT', 'packet_shot': False}, {'path': 'spp-media-p1/4fa0d8348a683e569c4b4fb0ab8bcc31/5c9f9d9038e0496e910b3f564feab32e.jpg', 'role': 'HOVER', 'packet_shot': False}]  MICHAEL Michael Kors        True  [{'sku': 'MK151H0SP-Q11', 'url_key': 'michael-michael-kors-maxinesm-messenger-handbag-black-mk151h0sp-q11', 'media': [{'path': 'spp-media-p1/7075bf593ad13cba80d4f8e82dd4aa3f/fcfdad585bbc4339b47577f37480fb00.jpg', 'role': 'FAMILY', 'packet_shot': True}], 'name': 'MAXINE MESSENGER - Handbag - black', 'sizes': ['One Size'], 'price': {'original': '294,95 €', 'promotional': '206,95 €', 'has_different_prices': False, 'has_different_original_prices': False, 'has_different_promotional_prices': False, 'has_discount_on_selected_sizes_only': False}, 'flags': [{'key': 'discountRate', 'value': '-30%', 'tracking_value': 'discount rate'}], 'delivery_promises': [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]}, {'sku': 'MK151H0SP-A11', 'url_key': 'michael-michael-kors-maxine-messenger-handbag-optic-white-mk151h0sp-a11', 'media': [{'path': 'spp-media-p1/89615d7cf5ae362db1dfb18199ebac91/9f98d12b9fe14c70b0519045cac290c2.jpg', 'role': 'FAMILY', 'packet_shot': True}], 'name': 'MAXINE MESSENGER - Handbag - optic white', 'sizes': ['One Size'], 'price': {'original': '294,95 €', 'promotional': '205,95 €', 'has_different_prices': False, 'has_different_original_prices': False, 'has_different_promotional_prices': False, 'has_discount_on_selected_sizes_only': False}, 'flags': [{'key': 'discountRate', 'value': '-30%', 'tracking_value': 'discount rate'}], 'delivery_promises': [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]}, {'sku': 'MK151H0SP-J11', 'url_key': 'michael-michael-kors-messenger-handbag-soft-pink-mk151h0sp-j11', 'media': [{'path': 'spp-media-p1/d3b5ffc2e251360291ece80b44f1a2e0/b6512dbb6a7749c189a6d33177f922df.jpg', 'role': 'FAMILY', 'packet_shot': True}], 'name': 'MAXINE MESSENGER - Handbag - soft pink', 'sizes': ['One Size'], 'price': {'original': '294,95 €', 'promotional': '294,95 €', 'has_different_prices': False, 'has_different_original_prices': False, 'has_different_promotional_prices': False, 'has_discount_on_selected_sizes_only': False}, 'flags': [], 'delivery_promises': [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]}, {'sku': 'MK151H0SP-B11', 'url_key': 'michael-michael-kors-messenger-handbag-acorn-mk151h0sp-b11', 'media': [{'path': 'spp-media-p1/7e96e989cf643a2f957b6f6640056837/cf772fdbd7f44e929bc8af90d750ae94.jpg', 'role': 'FAMILY', 'packet_shot': True}], 'name': 'MAXINE MESSENGER - Handbag - acorn', 'sizes': ['One Size'], 'price': {'original': '294,95 €', 'promotional': '294,95 €', 'has_different_prices': False, 'has_different_original_prices': False, 'has_different_promotional_prices': False, 'has_discount_on_selected_sizes_only': False}, 'flags': [], 'delivery_promises': [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]}, {'sku': 'MK151H0SP-K11', 'url_key': 'michael-michael-kors-maxinesm-messenger-handbag-navy-mk151h0sp-k11', 'media': [{'path': 'spp-media-p1/72638ccdea433a7188c9f61582433921/17e1c0f4837146d28d79cb889058ddef.jpg', 'role': 'FAMILY', 'packet_shot': False}], 'name': 'MAXINE MESSENGER - Handbag - navy', 'sizes': ['One Size'], 'price': {'original': '294,95 €', 'promotional': '205,95 €', 'has_different_prices': False, 'has_different_original_prices': False, 'has_different_promotional_prices': False, 'has_discount_on_selected_sizes_only': False}, 'flags': [{'key': 'discountRate', 'value': '-30%', 'tracking_value': 'discount rate'}], 'delivery_promises': [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]}]  [{'key': 'discountRate', 'value': '-30%', 'tracking_value': 'discount rate'}]   accessoires  [{'key': 'plus_flag', 'label': 'Premium Delivery', 'media_key': 'plus_delivery_flag'}]       294,95 €          206,95 €                       False                                False                                   False                                      False