即使 div 存在,按 id 查找也返回 None

时间:2021-05-17 17:25:10

标签: python selenium web-scraping beautifulsoup

我需要一点帮助,拜托了。 我尝试从站点获取一些数据,但没有成功。虽然 div 存在,但它返回 None 给我。你知道为什么吗?这是网站:https://iasi.inoras.ro/evenimente/

我不想使用Selenium,还有其他方法吗?

enter image description here

from bs4 import BeautifulSoup
import requests

URL = "https://iasi.inoras.ro/evenimente/"

page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')

content = soup.find(id = 'evcal_list')
for i in content:
    print(i)

2 个答案:

答案 0 :(得分:2)

数据通过 JavaScript 动态加载。您可以使用此示例如何使用 requests 模块加载它:

import json
import requests
import datetime as dt
from bs4 import BeautifulSoup

api_url = "https://iasi.inoras.ro/wp-admin/admin-ajax.php"

payload = {
    "action": "eventon_init_load",
    "global[calendars][]": "EVODV",
    "cals[evcal_calendar_172][sc][_cal_evo_rtl]": "no",
    "cals[evcal_calendar_172][sc][accord]": "no",
    "cals[evcal_calendar_172][sc][cal_id]": "",
    "cals[evcal_calendar_172][sc][cal_init_nonajax]": "no",
    "cals[evcal_calendar_172][sc][calendar_type]": "daily",
    "cals[evcal_calendar_172][sc][day_incre]": "0",
    "cals[evcal_calendar_172][sc][dv_view_style]": "def",
    "cals[evcal_calendar_172][sc][etc_override]": "no",
    "cals[evcal_calendar_172][sc][etop_month]": "no",
    "cals[evcal_calendar_172][sc][evc_open]": "no",
    "cals[evcal_calendar_172][sc][event_count]": "0",
    "cals[evcal_calendar_172][sc][event_location]": "all",
    "cals[evcal_calendar_172][sc][event_order]": "DESC",
    "cals[evcal_calendar_172][sc][event_organizer]": "all",
    "cals[evcal_calendar_172][sc][event_past_future]": "all",
    "cals[evcal_calendar_172][sc][event_tag]": "all",
    "cals[evcal_calendar_172][sc][event_type]": "all",
    "cals[evcal_calendar_172][sc][event_type_2]": "all",
    "cals[evcal_calendar_172][sc][event_type_3]": "all",
    "cals[evcal_calendar_172][sc][event_type_4]": "all",
    "cals[evcal_calendar_172][sc][event_type_5]": "all",
    "cals[evcal_calendar_172][sc][eventtop_style]": "0",
    "cals[evcal_calendar_172][sc][exp_jumper]": "no",
    "cals[evcal_calendar_172][sc][exp_so]": "no",
    "cals[evcal_calendar_172][sc][filter_relationship]": "AND",
    "cals[evcal_calendar_172][sc][filter_show_set_only]": "no",
    "cals[evcal_calendar_172][sc][filter_type]": "default",
    "cals[evcal_calendar_172][sc][filters]": "yes",
    "cals[evcal_calendar_172][sc][fixed_day]": "17",
    "cals[evcal_calendar_172][sc][fixed_month]": "5",
    "cals[evcal_calendar_172][sc][fixed_year]": "2021",
    "cals[evcal_calendar_172][sc][focus_end_date_range]": "1622505599",
    "cals[evcal_calendar_172][sc][focus_start_date_range]": "1619827200",
    "cals[evcal_calendar_172][sc][ft_event_priority]": "no",
    "cals[evcal_calendar_172][sc][header_title]": "",
    "cals[evcal_calendar_172][sc][hide_arrows]": "no",
    "cals[evcal_calendar_172][sc][hide_date_box]": "no",
    "cals[evcal_calendar_172][sc][hide_empty_months]": "no",
    "cals[evcal_calendar_172][sc][hide_end_time]": "no",
    "cals[evcal_calendar_172][sc][hide_ft]": "no",
    "cals[evcal_calendar_172][sc][hide_month_headers]": "no",
    "cals[evcal_calendar_172][sc][hide_mult_occur]": "no",
    "cals[evcal_calendar_172][sc][hide_past]": "no",
    "cals[evcal_calendar_172][sc][hide_past_by]": "ee",
    "cals[evcal_calendar_172][sc][hide_so]": "no",
    "cals[evcal_calendar_172][sc][hide_sort_options]": "no",
    "cals[evcal_calendar_172][sc][ics]": "no",
    "cals[evcal_calendar_172][sc][jumper]": "no",
    "cals[evcal_calendar_172][sc][jumper_count]": "5",
    "cals[evcal_calendar_172][sc][jumper_offset]": "0",
    "cals[evcal_calendar_172][sc][lang]": "L1",
    "cals[evcal_calendar_172][sc][layout_changer]": "no",
    "cals[evcal_calendar_172][sc][mapformat]": "roadmap",
    "cals[evcal_calendar_172][sc][mapiconurl]": "",
    "cals[evcal_calendar_172][sc][maps_load]": "yes",
    "cals[evcal_calendar_172][sc][mapscroll]": "true",
    "cals[evcal_calendar_172][sc][mapzoom]": "18",
    "cals[evcal_calendar_172][sc][members_only]": "no",
    "cals[evcal_calendar_172][sc][ml_priority]": "no",
    "cals[evcal_calendar_172][sc][mo1st]": "",
    "cals[evcal_calendar_172][sc][month_incre]": "0",
    "cals[evcal_calendar_172][sc][number_of_months]": "1",
    "cals[evcal_calendar_172][sc][only_ft]": "no",
    "cals[evcal_calendar_172][sc][pec]": "",
    "cals[evcal_calendar_172][sc][s]": "",
    "cals[evcal_calendar_172][sc][search]": "",
    "cals[evcal_calendar_172][sc][sep_month]": "no",
    "cals[evcal_calendar_172][sc][show_et_ft_img]": "no",
    "cals[evcal_calendar_172][sc][show_limit]": "no",
    "cals[evcal_calendar_172][sc][show_limit_ajax]": "no",
    "cals[evcal_calendar_172][sc][show_limit_paged]": "1",
    "cals[evcal_calendar_172][sc][show_limit_redir]": "",
    "cals[evcal_calendar_172][sc][show_repeats]": "no",
    "cals[evcal_calendar_172][sc][show_upcoming]": "0",
    "cals[evcal_calendar_172][sc][show_year]": "no",
    "cals[evcal_calendar_172][sc][sort_by]": "sort_date",
    "cals[evcal_calendar_172][sc][tile_bg]": "0",
    "cals[evcal_calendar_172][sc][tile_count]": "2",
    "cals[evcal_calendar_172][sc][tile_height]": "0",
    "cals[evcal_calendar_172][sc][tile_style]": "0",
    "cals[evcal_calendar_172][sc][tiles]": "no",
    "cals[evcal_calendar_172][sc][ux_val]": "0",
    "cals[evcal_calendar_172][sc][view_switcher]": "no",
    "cals[evcal_calendar_172][sc][wpml_l1]": "",
    "cals[evcal_calendar_172][sc][wpml_l2]": "",
    "cals[evcal_calendar_172][sc][wpml_l3]": "",
    "cals[evcal_calendar_172][sc][yl_priority]": "no",
}

data = requests.post(api_url, data=payload).json()

# uncomment to print all data:
# print(json.dumps(data, indent=4))

for d in data["cals"]["evcal_calendar_172"]["json"]:
    start = dt.datetime.fromtimestamp(d["event_start_unix"]).strftime(
        "%Y-%m-%d %H:%M:%S"
    )
    end = dt.datetime.fromtimestamp(d["event_start_unix"]).strftime(
        "%Y-%m-%d %H:%M:%S"
    )
    print(start, end, BeautifulSoup(d["event_title"], "html.parser").text)

打印:

2021-05-31 07:00:00 2021-05-31 07:00:00 Expoziție în aer liber: „Fryderyk Chopin – viața și creația”
2021-05-30 19:00:00 2021-05-30 19:00:00 IAȘI | Puricele în ureche
2021-05-30 19:00:00 2021-05-30 19:00:00 PURICELE IN URECHE(La cererea publicului)
2021-05-30 19:00:00 2021-05-30 19:00:00 Farmazonul din Hârlău
2021-05-30 11:00:00 2021-05-30 11:00:00 Făt frumos din Lacrimă
2021-05-30 07:00:00 2021-05-30 07:00:00 Expoziție în aer liber: „Fryderyk Chopin – viața și creația”
2021-05-29 19:00:00 2021-05-29 19:00:00 IAȘI | Puricele în ureche
2021-05-29 19:00:00 2021-05-29 19:00:00 Un tramvai numit Dorință
2021-05-29 18:30:00 2021-05-29 18:30:00 Chirița în provinție // Sala Mare
2021-05-29 11:00:00 2021-05-29 11:00:00 Magic Dreamcatcher | Teatro Blu
2021-05-29 07:00:00 2021-05-29 07:00:00 Expoziție în aer liber: „Fryderyk Chopin – viața și creația”
2021-05-28 19:00:00 2021-05-28 19:00:00 Un tramvai numit Dorință | LIVE STREAMING
2021-05-28 18:30:00 2021-05-28 18:30:00 Chirița în provinție // Sala Mare
2021-05-28 07:00:00 2021-05-28 07:00:00 Expoziție în aer liber: „Fryderyk Chopin – viața și creația”

...and so on.

答案 1 :(得分:2)

如果您想从特定日期获取数据,您需要:

1 使用硒。

2 使用显式等待:from selenium.webdriver.support.wait import WebDriverWait

3 等待日期出现并单击它。但是由于 Captchta 日期并不总是可点击的,所以我使用了一些滚动 () 滚动到事件日期主类。

4 等待事件加载并抓取结果数据。

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


driver = webdriver.Chrome(executable_path='/snap/bin/chromium.chromedriver')
driver.get("https://iasi.inoras.ro/evenimente/")
assert "Oras" in driver.title
wait = WebDriverWait(driver, 20)
cal = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".eventon_daily_in")))
driver.execute_script("arguments[0].scrollIntoView();", cal)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "p[data-date='20']"))).click()
events = wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, ".eventon_list_event.evo_eventtop.dayevent")))
names = []
for event in events:
    name = event.find_element_by_css_selector(" .evcal_desc2.evcal_event_title").text
    names.append(name)
print(*names, sep='\n')
driver.close()
driver.quit()

结果:

FENOMEN TROPICAL
ORAȘUL
WHITE JUNE JASMINE
ONLINE & LIVE: ȘTEFAN AFLOROAEI ÎN DIALOG CU AUREL CODOBAN ȘI GEORGE BONDOR
COLIVIA // SALA MARE
ATELIER DE PICTURĂ PENTRU ADULȚI
ATELIER DE PICTURĂ „(AUTO)PORTRET”
INTARCARE - EMOTII, DIFICULTATI SI SOLUTII
EXPOZIȚIE ÎN AER LIBER: „FRYDERYK CHOPIN – VIAȚA ȘI CREAȚIA”
RIDICĂ BARIERA, ALEGE CARIERA!

要获取日历中不可见的事件,您需要单击 > 按钮并等待事件加载。