请在下面找到引发错误的代码
from bs4 import BeautifulSoup
import urllib
url = "https://www.goibibo.com/flights/air-MAA-SIN-20190403--1-0-0-E-I/"
page = urllib.request.urlopen(url=url)
page_content = page.read()
soup = BeautifulSoup(page_content,"html.parser")
price = soup.find("svg",class_="recharts-surface")
print(price)
答案 0 :(得分:1)
对我有用。
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
import re
import os
import time
current_dir=os.getcwd()
#download chromedriver for your os.
driver = webdriver.Chrome(current_dir+'/chromedriver1')
driver.get("https://www.goibibo.com/flights/air-MAA-SIN-20190403--1-0-0-E-I/")
inner_html=driver.page_source
soup=BeautifulSoup(inner_html,'html.parser')
price = soup.find("svg",class_="recharts-surface")
print(price)
答案 1 :(得分:0)
您可以从脚本标签获取价格。您可能需要摆弄json解析才能获得所需的方式
import requests
from bs4 import BeautifulSoup
import re
import json
url = 'https://thor.goibibo.com/v2/thor/rest/flight/search/int?userid=asd&hash=2837423032023&application=fbs&flavour=v2&mime=html&script=y&actionData=[{%22query%22:%22air-MAA-SIN-20190403--1-0-0-E-0%22}]&slotfl=y1&nearbyfl=y1'
headers = {
'User-Agent' : 'Mozilla/5.0'}
r = requests.get(url, headers = headers)
soup = BeautifulSoup(r.content, 'lxml')
s = soup.select_one("script[type='text/javascript']").text
data = re.search(r"window\.parent\.postMessage\((.*), '\*'", s ,flags=re.DOTALL).group(1)
result = json.loads(data)
for item in result['o']:
#print(item)
print('ref ', item['id'])
print('departure ', item['f'][0]['d'])
print('arrival ', item['f'][0]['a'])
try:
print('price ', item['5'])
except:
print('price ', item['p1'])