我正在检查以下产品详细信息: https://www.ulta.com/invisible-physical-defense-sunscreen-spf-30?productId=pimprod2014440
我想使用http get请求获取注释部分,然后从json的响应中抓取数据。我有一个要求删除评论的请求,但无法解析数据,例如不知道如何从json响应中获取所需的详细信息。我想抓取每位发表评论的用户的以下信息:
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.options import Options
from time import sleep
from bs4 import BeautifulSoup
from os import popen
import requests
import json
Opening Chrome In Debugging Mode
popen('"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe" --remote-debugging-port=9014')
# declaring chrome optoins
chrome_options = Options()
# Debug Address of Google Chrome (To Control Actuall Chrome Rather then BOT)
chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9014")
# Defining Driver
chrome = webdriver.Chrome(options=chrome_options)
url1 = "https://www.ulta.com/invisible-physical-defense-sunscreen-spf-30?productId=pimprod2014440"
#chrome.maximize_window()
chrome.get(url1)
productPageUrl = chrome.current_url
productPageTitle = chrome.title
soup = BeautifulSoup(chrome.page_source, "lxml")
# productPrice = soup.find("div", class_="ProductPricingPanel").text.strip("Price")
productDetails = soup.find("div", class_="ProductDetail__productContent").text.strip()
http_get_request = 'https://display.powerreviews.com/m/6406/l/en_US/product/' + url1.split('=')[1] + '/reviews?apikey=daa0f241-c242-4483-afb7-4449942d1a2b'
json_data = requests.get(http_get_request)
parse_json = json_data.json()
print(parse_json)
答案 0 :(得分:0)
import requests
from datetime import datetime
import csv
r = requests.get(
"https://display.powerreviews.com/m/6406/l/en_US/product/pimprod2014440/reviews?apikey=daa0f241-c242-4483-afb7-4449942d1a2b").json()
with open("data.csv", 'w', newline="") as f:
writer = csv.writer(f)
writer.writerow(["Nickname", "Location", "Comment", "Date"])
for item in r["results"][0]["reviews"]:
writer.writerow([item["details"]["nickname"], item["details"]
["location"], item["details"]["comments"], datetime.fromtimestamp(
int(item["details"]["created_date"] / 1000))])
输出:View-Online