我是网络报废的新手。我使用了beautifulsoup和下面的代码来获取包含json的脚本。我无法找到提取它的方法。
soupMain1 = BeautifulSoup(rMain1.text, 'html.parser')
all_scripts = soupMain1.find('script', text=re.compile("recentHistoryList"))
print all_scripts
输出如下:
<script type="text/javascript">
require(['ta/Core/TA.Store'], function(taStore) {
taStore.store('typeahead.typeahead2_mixed_ui', true);
taStore.store('typeahead.typeahead2_geo_segmented_ui', true);
taStore.store('typeahead.geoArea', 'Singapore area'); taStore.store('typeahead.worldwide', 'Worldwide'); taStore.store('typeahead.noResultsFound', 'No results found.');
taStore.store('typeahead.flight_enabled', true);
taStore.store('typeahead.localAirports', []);
taStore.store('typeahead.recentHistoryList', [{"war_url":"\/UserReview-g294265-d301581-Fairmont_Singapore-Singapore.html","autobroadened":"false","normalized_name":"fairmont singapore","type":"HOTEL","title":"Hotels","is_vr":false,"url":"\/Hotel_Review-g294265-d301581-Reviews-Fairmont_Singapore-Singapore.html","urls":[{"url_type":"hotel","name":"Fairmont Singapore, Singapore, Singapore","type":"HOTEL","url":"\/Hotel_Review-g294265-d301581-Reviews-Fairmont_Singapore-Singapore.html"}],"is_broad":false,"scope":"global","name":"Fairmont Singapore, Singapore, Singapore","data_type":"LOCATION","details":{"parent_name":"Singapore","grandparent_name":"Singapore","highlighted_name":"Fairmont Singapore","name":"Fairmont Singapore","parent_ids":[294265,294262,2,1],"geo_name":"Singapore, Singapore"},"value":301581,"coords":"1.293826,103.85387"},{"lookbackServlet":null,"autobroadened":"false","normalized_name":"singapore","title":"Destinations","type":"GEO","is_vr":true,"url":"\/Tourism-g294265-Singapore-Vacations.html","urls":[{"url_type":"geo","name":"Singapore Tourism","fallback_url":"\/Tourism-g294265-Singapore-Vacations.html","type":"GEO","url":"\/Tourism-g294265-Singapore-Vacations.html"},{"url_type":"vr","name":"Singapore Holiday Homes","fallback_url":"\/VacationRentals-g294265-Reviews-Singapore-Vacation_Rentals.html","type":"VACATION_RENTAL","url":"\/VacationRentals-g294265-Reviews-Singapore-Vacation_Rentals.html"},{"url_type":"eat","name":"Singapore Restaurants","fallback_url":"\/Restaurants-g294265-Singapore.html","type":"EATERY","url":"\/Restaurants-g294265-Singapore.html"},{"url_type":"attr","name":"Singapore Attractions","fallback_url":"\/Attractions-g294265-Activities-Singapore.html","type":"ATTRACTION","url":"\/Attractions-g294265-Activities-Singapore.html"},{"url_type":"hotel","name":"Singapore Hotels","fallback_url":"\/Hotels-g294265-Singapore-Hotels.html","type":"HOTEL","url":"\/Hotels-g294265-Singapore-Hotels.html"},{"url_type":"flights_to","name":"Flights to Singapore","fallback_url":"\/Flights-g294265-Singapore-Cheap_Discount_Airfares.html","type":"FLIGHTS_TO","url":"\/Flights-g294265-Singapore-Cheap_Discount_Airfares.html"},{"url_type":"nbrhd","name":"Singapore Neighbourhoods","fallback_url":"\/NeighborhoodList-g294265-Singapore.html","type":"NEIGHBORHOOD","url":"\/NeighborhoodList-g294265-Singapore.html"},{"url_type":"tg","name":"Singapore Travel Guides","fallback_url":"\/Travel_Guide-g294265-Singapore.html","type":"TRAVEL_GUIDE","url":"\/Travel_Guide-g294265-Singapore.html"}],"is_broad":false,"scope":"global","name":"Singapore, Singapore, Asia","data_type":"LOCATION","details":{"parent_name":"Singapore","grandparent_name":"Asia","rac_enabled":false,"highlighted_name":"Singapore","name":"Singapore","parent_ids":[294262,2,1],"geo_name":"Singapore, Asia"},"value":294265,"coords":"1.285801,103.85111"}]);
taStore.store('typeahead.restaurant', "Restaurant"); taStore.store('typeahead.attraction', "Attraction"); taStore.store('typeahead.hotel', "Hotel"); taStore.store('typeahead.restaurant_list', "Restaurants"); taStore.store('typeahead.attraction_list', "Attractions"); taStore.store('typeahead.things_to_do', "Places to Visit"); taStore.store('typeahead.hotel_list', "Hotels"); taStore.store('typeahead.flight_list', "Flights"); taStore.store('typeahead.vacation_rental_list', "Holiday Rentals"); taStore.store('typeahead.scoped.static_local_label', '% area'); taStore.store('typeahead.scoped.result_title_text', 'Start typing, or try one of these suggestions...'); taStore.store('typeahead.scoped.poi_overview_geo', '<span class="poi_overview_item">Overview</span> of %'); taStore.store('typeahead.scoped.poi_hotels_geo', '<span class="poi_overview_item">Hotels</span> in %'); taStore.store('typeahead.scoped.poi_hotels_geo_near', '<span class="poi_overview_item">Hotels</span> near %'); taStore.store('typeahead.scoped.poi_vr_geo', '<span class="poi_overview_item">Holiday Rentals</span> in %'); taStore.store('typeahead.scoped.poi_vr_geo_near', '<span class="poi_overview_item">Holiday Rentals</span> near %'); taStore.store('typeahead.scoped.poi_attractions_geo', '<span class="poi_overview_item">Things to Do</span> in %'); taStore.store('typeahead.scoped.poi_eat_geo', '<span class="poi_overview_item">Restaurants</span> in %'); taStore.store('typeahead.scoped.poi_flights_geo', '<span class="poi_overview_item">Flights</span> to %'); taStore.store('typeahead.scoped.poi_nbrhd_geo', '<span class="poi_overview_item">Neighbourhoods</span> in %'); taStore.store('typeahead.scoped.poi_travel_guides_geo', '<span class="poi_overview_item">Travel Guides</span> in %'); taStore.store('typeahead.scoped.overview', 'Overview '); taStore.store('typeahead.scoped.neighborhoods', 'Neighbourhoods'); taStore.store('typeahead.scoped.travel_guides', 'Travel Guides'); taStore.store('typeahead.scoped.geo_area_template', '% area'); taStore.store('typeahead.searchMore', 'Find more results for "%"');
taStore.store('typeahead.history', 'Recently viewed'); taStore.store('typeahead.history.all_caps', 'RECENTLY VIEWED'); taStore.store('typeahead.popular_destinations', 'POPULAR DESTINATIONS');
});
我想获得关键“coords”首次出现的价值。
答案 0 :(得分:0)
import json
from bs4 import BeautifulSoup
html = """
<script type="text/javascript">
require(['ta/Core/TA.Store'], function(taStore) {
taStore.store('typeahead.typeahead2_mixed_ui', true);
taStore.store('typeahead.typeahead2_geo_segmented_ui', true);
taStore.store('typeahead.geoArea', 'Singapore area'); taStore.store('typeahead.worldwide', 'Worldwide'); taStore.store('typeahead.noResultsFound', 'No results found.');
taStore.store('typeahead.flight_enabled', true);
taStore.store('typeahead.localAirports', []);
taStore.store('typeahead.recentHistoryList', [{"war_url":"\/UserReview-g294265-d301581-Fairmont_Singapore-Singapore.html","autobroadened":"false","normalized_name":"fairmont singapore","type":"HOTEL","title":"Hotels","is_vr":false,"url":"\/Hotel_Review-g294265-d301581-Reviews-Fairmont_Singapore-Singapore.html","urls":[{"url_type":"hotel","name":"Fairmont Singapore, Singapore, Singapore","type":"HOTEL","url":"\/Hotel_Review-g294265-d301581-Reviews-Fairmont_Singapore-Singapore.html"}],"is_broad":false,"scope":"global","name":"Fairmont Singapore, Singapore, Singapore","data_type":"LOCATION","details":{"parent_name":"Singapore","grandparent_name":"Singapore","highlighted_name":"Fairmont Singapore","name":"Fairmont Singapore","parent_ids":[294265,294262,2,1],"geo_name":"Singapore, Singapore"},"value":301581,"coords":"1.293826,103.85387"},{"lookbackServlet":null,"autobroadened":"false","normalized_name":"singapore","title":"Destinations","type":"GEO","is_vr":true,"url":"\/Tourism-g294265-Singapore-Vacations.html","urls":[{"url_type":"geo","name":"Singapore Tourism","fallback_url":"\/Tourism-g294265-Singapore-Vacations.html","type":"GEO","url":"\/Tourism-g294265-Singapore-Vacations.html"},{"url_type":"vr","name":"Singapore Holiday Homes","fallback_url":"\/VacationRentals-g294265-Reviews-Singapore-Vacation_Rentals.html","type":"VACATION_RENTAL","url":"\/VacationRentals-g294265-Reviews-Singapore-Vacation_Rentals.html"},{"url_type":"eat","name":"Singapore Restaurants","fallback_url":"\/Restaurants-g294265-Singapore.html","type":"EATERY","url":"\/Restaurants-g294265-Singapore.html"},{"url_type":"attr","name":"Singapore Attractions","fallback_url":"\/Attractions-g294265-Activities-Singapore.html","type":"ATTRACTION","url":"\/Attractions-g294265-Activities-Singapore.html"},{"url_type":"hotel","name":"Singapore Hotels","fallback_url":"\/Hotels-g294265-Singapore-Hotels.html","type":"HOTEL","url":"\/Hotels-g294265-Singapore-Hotels.html"},{"url_type":"flights_to","name":"Flights to Singapore","fallback_url":"\/Flights-g294265-Singapore-Cheap_Discount_Airfares.html","type":"FLIGHTS_TO","url":"\/Flights-g294265-Singapore-Cheap_Discount_Airfares.html"},{"url_type":"nbrhd","name":"Singapore Neighbourhoods","fallback_url":"\/NeighborhoodList-g294265-Singapore.html","type":"NEIGHBORHOOD","url":"\/NeighborhoodList-g294265-Singapore.html"},{"url_type":"tg","name":"Singapore Travel Guides","fallback_url":"\/Travel_Guide-g294265-Singapore.html","type":"TRAVEL_GUIDE","url":"\/Travel_Guide-g294265-Singapore.html"}],"is_broad":false,"scope":"global","name":"Singapore, Singapore, Asia","data_type":"LOCATION","details":{"parent_name":"Singapore","grandparent_name":"Asia","rac_enabled":false,"highlighted_name":"Singapore","name":"Singapore","parent_ids":[294262,2,1],"geo_name":"Singapore, Asia"},"value":294265,"coords":"1.285801,103.85111"}]);
taStore.store('typeahead.restaurant', "Restaurant"); taStore.store('typeahead.attraction', "Attraction"); taStore.store('typeahead.hotel', "Hotel"); taStore.store('typeahead.restaurant_list', "Restaurants"); taStore.store('typeahead.attraction_list', "Attractions"); taStore.store('typeahead.things_to_do', "Places to Visit"); taStore.store('typeahead.hotel_list', "Hotels"); taStore.store('typeahead.flight_list', "Flights"); taStore.store('typeahead.vacation_rental_list', "Holiday Rentals"); taStore.store('typeahead.scoped.static_local_label', '% area'); taStore.store('typeahead.scoped.result_title_text', 'Start typing, or try one of these suggestions...'); taStore.store('typeahead.scoped.poi_overview_geo', '<span class="poi_overview_item">Overview</span> of %'); taStore.store('typeahead.scoped.poi_hotels_geo', '<span class="poi_overview_item">Hotels</span> in %'); taStore.store('typeahead.scoped.poi_hotels_geo_near', '<span class="poi_overview_item">Hotels</span> near %'); taStore.store('typeahead.scoped.poi_vr_geo', '<span class="poi_overview_item">Holiday Rentals</span> in %'); taStore.store('typeahead.scoped.poi_vr_geo_near', '<span class="poi_overview_item">Holiday Rentals</span> near %'); taStore.store('typeahead.scoped.poi_attractions_geo', '<span class="poi_overview_item">Things to Do</span> in %'); taStore.store('typeahead.scoped.poi_eat_geo', '<span class="poi_overview_item">Restaurants</span> in %'); taStore.store('typeahead.scoped.poi_flights_geo', '<span class="poi_overview_item">Flights</span> to %'); taStore.store('typeahead.scoped.poi_nbrhd_geo', '<span class="poi_overview_item">Neighbourhoods</span> in %'); taStore.store('typeahead.scoped.poi_travel_guides_geo', '<span class="poi_overview_item">Travel Guides</span> in %'); taStore.store('typeahead.scoped.overview', 'Overview '); taStore.store('typeahead.scoped.neighborhoods', 'Neighbourhoods'); taStore.store('typeahead.scoped.travel_guides', 'Travel Guides'); taStore.store('typeahead.scoped.geo_area_template', '% area'); taStore.store('typeahead.searchMore', 'Find more results for "%"');
taStore.store('typeahead.history', 'Recently viewed'); taStore.store('typeahead.history.all_caps', 'RECENTLY VIEWED'); taStore.store('typeahead.popular_destinations', 'POPULAR DESTINATIONS');
});
</script>
"""
soup = BeautifulSoup(html,"html.parser")
# Find the script element contaning th JSON.
anchor = "taStore.store('typeahead.recentHistoryList', "
s = soup.find(lambda tag:tag.name=="script" and anchor in tag.text)
# Extract the JSON.
j = s.text[s.text.find(anchor)+45:s.text.find("}]);")+2]
# Load the JSON.
d = json.loads(j)
# Read the data from the JSON.
print ( d[0]['coords'])
输出:
1.293826,103.85387