我正在尝试使用beautifulsoup从以下链接中提取一些信息: https://aiesec.org/opportunity/1212595 我需要的是项目的名称和开始日期。但是,我无法提取该名称,并且总是给出None。
title = soup.find(lambda tag: tag.name == 'div' and tag['class'] == ['opportunity-tile', ''])
在进一步分析中,我发现它甚至没有获得div标签,因为以下内容均不返回“
print(soup.find_all("div"))
我要去哪里错了?
答案 0 :(得分:0)
<text>Header Component</text>
<text>{{propA}}</text>
输出:
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0',
'Authorization': 'e316ebe109dd84ed16734e5161a2d236d0a7e6daf499941f7c110078e3c75493'}
data = {"operationName": "OpportunityQuery", "variables": {"id": "1212595", "cdn_region": "Global"}, "query": "query OpportunityQuery($id: ID, $cdn_region: String) {\n getOpportunity(id: $id) {\n application_processing_time\n applied_to\n applied_to_with\n applications_close_date\n available_openings\n backgrounds {\n constant_id\n constant_name\n option\n __typename\n }\n branch {\n id\n address_detail {\n id\n city\n country\n __typename\n }\n company {\n id\n name\n profile_photo(cdn_region: $cdn_region)\n __typename\n }\n __typename\n }\n cover_photo(cdn_region: $cdn_region)\n description\n duration\n project_duration\n earliest_start_date\n google_place_id\n home_lc {\n id\n email\n full_name\n parent {\n id\n name\n __typename\n }\n __typename\n }\n id\n is_favourited\n is_gep\n languages {\n constant_id\n constant_name\n option\n __typename\n }\n lat\n latest_end_date\n lng\n legal_info {\n health_insurance_info\n visa_duration\n visa_link\n visa_type\n __typename\n }\n location\n logistics_info {\n accommodation_covered\n accommodation_provided\n food_covered\n __typename\n }\n nationalities {\n constant_id\n constant_name\n option\n __typename\n }\n office_footfall_for_exchange\n openings\n opportunity_cost\n opportunity_questions {\n edges {\n node {\n id\n __typename\n }\n __typename\n }\n __typename\n }\n organisation {\n id\n name\n __typename\n }\n percentage_of_fulfillment\n programme {\n id\n short_name_display\n __typename\n }\n remark\n reviews\n role_info {\n selection_process\n learning_points_list\n __typename\n }\n sdg_info {\n id\n sdg_target {\n description\n goal_index\n id\n parent {\n id\n __typename\n }\n target\n __typename\n }\n __typename\n }\n selection_processes(first: 50) {\n edges {\n cursor\n node {\n id\n title\n no_of_days\n __typename\n }\n __typename\n }\n __typename\n }\n skills {\n constant_id\n constant_name\n option\n __typename\n }\n specifics_info {\n computer\n expected_work_schedule\n ef_test_required\n salary\n salary_currency {\n id\n alphabetic_code\n __typename\n }\n salary_periodicity\n saturday_work\n __typename\n }\n status\n study_levels {\n id\n name\n __typename\n }\n title\n transparent_fee_details {\n covers_accomodation\n covers_administrative_costs\n covers_leadership_spaces\n covers_pickup\n sponsored_by\n __typename\n }\n __typename\n }\n}\n"}
r = requests.post('https://gis-api.aiesec.org/graphql',
json=data, headers=headers).json()
print(r['data']['getOpportunity']['title'])
print(r['data']['getOpportunity']['earliest_start_date'])
print(r['data']['getOpportunity']['applications_close_date'])
print(r['data']['getOpportunity']['latest_end_date'])