HTML来源:
<script type="text/javascript">window._sharedData = {"activity_counts":null,"config":{"csrf_token":"P8DvqEB5AxkRuWyoNWhrZ3Bi2scbrVm9","viewer":null},"supports_es6":true,"country_code":"NL","language_code":"en","locale":"en_US","entry_data":{"ProfilePage":[{"logging_page_id":"profilePage_4469324900","show_suggested_profiles":false,"graphql":{"user":{"biography":"","blocked_by_viewer":false,"country_block":false,"external_url":null,"external_url_linkshimmed":null,"edge_followed_by":{"count":143},"followed_by_viewer":false,"edge_follow":{"count":43},"follows_viewer":false,"full_name":"\u0627\u062c\u0627\u0631\u0647 \u0648\u06cc\u0644\u0627 \u062f\u0631 \u06af\u0631\u062f\u0646\u0647 .................. ;</script>
<script type="text/javascript">
(function() {
var docElement = document.documentElement;
var classRE = new RegExp('(^|\\s)no-js(\\s|$)');
var className = docElement.className;
docElement.className = className.replace(classRE, '$1js$2');
})();
</script>
现在,我希望输出显示{strong>仅在window._sharedData =
之后的所有内容
输出:
{"activity_counts":null,"config":{"csrf_token":"P8DvqEB5AxkRuWyoNWhrZ3Bi2scbrVm9","viewer":null},"supports_es6":true,"count .......`
这是我的代码:
url = 'https://www.instagram.com/mehran_eblaghi/'
s = requests.session()
soup = bs(s.get(url).text, 'html.parser').findAll('script')
print(soup)
答案 0 :(得分:1)
使用bs4查找第一个脚本标签,其文本以您要查找的内容开头,然后获取其文本内容并分割其开头,例如:
import json
import requests
import bs4
key = 'window._sharedData = '
soup = bs4.BeautifulSoup(requests.get(url).text, 'html.parser')
script_tag = soup.find('script', text=lambda L: L and L.startswith(key))
if script_tag:
# raw string of data in script
text_data = script_tag.text.partition(key)[2]
# remove the trailing ; and you've json data... interpret as such
data = json.loads(text_data.rstrip(';\n'))
else:
# didn't find a match - up to you what to do here...
如果您设法找到了相关的脚本标签,那么data
将是以下内容的Python字典:
{'activity_counts': None,
'config': {'csrf_token': '1Srrhc6GQmmC19TdM3nLFsDOORtJMpCj', 'viewer': None},
'supports_es6': False,
'country_code': 'GB',
'language_code': 'en',
'locale': 'en_US',
'entry_data': {'ProfilePage': [{'logging_page_id': 'profilePage_4469324900',
'show_suggested_profiles': False,
'graphql': {'user': {'biography': '',
'blocked_by_viewer': False,
'country_block': False,
'external_url': None,
'external_url_linkshimmed': None,
'edge_followed_by': {'count': 143},
'followed_by_viewer': False,
'edge_follow': {'count': 43},
'follows_viewer': False,
'full_name': 'اجاره ویلا در گردنه حیران',
'has_channel': False,
'has_blocked_viewer': False,
'highlight_reel_count': 0,
'has_requested_viewer': False,
'id': '4469324900',
'is_business_account': False,
'is_private': False,
'is_verified': False,
'edge_mutual_followed_by': {'count': 0, 'edges': []},
'profile_pic_url': 'https://scontent-lht6-1.cdninstagram.com/vp/ee763d48bb0c35ac0c6aa22dc1e2ed08/5C31C768/t51.2885-19/s150x150/15876073_1641186492851073_2628164662507601920_n.jpg',
'profile_pic_url_hd': 'https://scontent-lht6-1.cdninstagram.com/vp/fd5c97116848cf46ddf24f8ac8d1fd7e/5C35B210/t51.2885-19/s320x320/15876073_1641186492851073_2628164662507601920_n.jpg',
'requested_by_viewer': False,
'username': 'mehran_eblaghi',
'connected_fb_page': None,
'edge_owner_to_timeline_media': {'count': 2,
'page_info': {'has_next_page': False,
'end_cursor': 'AQBnocogeHdSL1DSSxRdiYR4D1RguUeEj5Ap1do1KIy4U_NutZIe9ZCyRpDExD4TL9k'},
'edges': [{'node': {'__typename': 'GraphImage',
'id': '1429655015362664538',
'edge_media_to_caption': {'edges': [{'node': {'text': 'درصورت نیاز به ویلاتماس بگیرید 09112815125'}}]},
'shortcode': 'BPXJ6luDBha',
'edge_media_to_comment': {'count': 10},
'comments_disabled': False,
'taken_at_timestamp': 1484648180,
'dimensions': {'height': 1080, 'width': 1080},
'display_url': 'https://scontent-lht6-1.cdninstagram.com/vp/abeb67556e5e2166e497cc779e99fab2/5C33A30D/t51.2885-15/e35/14597426_594812037376264_3725484886300033024_n.jpg',
'edge_liked_by': {'count': 42},
'edge_media_preview_like': {'count': 42},
'gating_info': None,
'media_preview': 'ACoqZEv32OQcHHpViMrKoJxkE59cVnFC7HB6epP+NSBGhXdkc8e/5+lNRYc0UaYiBO7J9aR3DDg4IOCaoR3IOFwcr1x0wf8AGkF0vJCHB75POP5UrNF3XyNBpARwQe1VN+OKiEqA5AIbr+Hr0q7tVud/Xn7p/wAahptj0RnKcEkVKXwMY/lTEbHNSbt3OK6rbadF1OaMrXXNZ3elr/oN345A5/xpxk56cGoy5JpBKR9KVutvxKUntzO3+H0/p9SSRz0x/wDqqrg1P5mRj161HVxVuljKcru6lf8ACw8DNH61KANo+lMIFGvRrbt/wSLq7unu+v8AwBhIHamnHYfqKdimEClZ919z/wAwutrfl/kKKKYAKkqlfr+Vv1Ynbof/2Q==',
'owner': {'id': '4469324900'},
'thumbnail_src': 'https://scontent-lht6-1.cdninstagram.com/vp/a50ea8ec7e91454bc0b981b9a347c2b9/5C2CDBE8/t51.2885-15/sh0.08/e35/s640x640/14597426_594812037376264_3725484886300033024_n.jpg',
'thumbnail_resources': [{'src': 'https://scontent-lht6-1.cdninstagram.com/vp/8ecae5da8cdf4f981a29ec7a0c6b0a08/5C30AF4F/t51.2885-15/e35/s150x150/14597426_594812037376264_3725484886300033024_n.jpg',
'config_width': 150,
'config_height': 150},
{'src': 'https://scontent-lht6-1.cdninstagram.com/vp/fe3689ac4d9165c32369e8fc460f0040/5C187505/t51.2885-15/e35/s240x240/14597426_594812037376264_3725484886300033024_n.jpg',
'config_width': 240,
'config_height': 240},
{'src': 'https://scontent-lht6-1.cdninstagram.com/vp/be7a47d6b422add7f77d597c0eecd21e/5C31FBBF/t51.2885-15/e35/s320x320/14597426_594812037376264_3725484886300033024_n.jpg',
'config_width': 320,
'config_height': 320},
{'src': 'https://scontent-lht6-1.cdninstagram.com/vp/2f6d7c80500d9d56f940be6ffa0e8e9a/5C1568E5/t51.2885-15/e35/s480x480/14597426_594812037376264_3725484886300033024_n.jpg',
'config_width': 480,
'config_height': 480},
{'src': 'https://scontent-lht6-1.cdninstagram.com/vp/a50ea8ec7e91454bc0b981b9a347c2b9/5C2CDBE8/t51.2885-15/sh0.08/e35/s640x640/14597426_594812037376264_3725484886300033024_n.jpg',
'config_width': 640,
'config_height': 640}],
'is_video': False,
'accessibility_caption': None}},
{'node': {'__typename': 'GraphImage',
'id': '1429628539162724247',
'edge_media_to_caption': {'edges': []},
'shortcode': 'BPXD5T1jgeX',
'edge_media_to_comment': {'count': 3},
'comments_disabled': False,
'taken_at_timestamp': 1484645024,
'dimensions': {'height': 1080, 'width': 1080},
'display_url': 'https://scontent-lht6-1.cdninstagram.com/vp/b48766cc9da8d14904f702a927884f5b/5C2B24EA/t51.2885-15/e35/16110374_198276563977954_7548368730246348800_n.jpg',
'edge_liked_by': {'count': 42},
'edge_media_preview_like': {'count': 42},
'gating_info': None,
'media_preview': 'ACoqdDpYeEP0J5wTSHR2C5yPzP8AhVn7YVi45xgYNWbWbzjtII4z1BFVzMjQpxacigHILZ6c/wD1qr6jaFDuwMH0rdBjDYGMjg5pJYVkXDcjNF9bkOCvzLc4dhim5rR1K3EEmByD0rNq7miNRmLIQOScVYsJGik+YEbhgfnn+VY4ncd6kSaR+nJH6etZhaxqyXa/PzyW4/Opri+/dqqn+77dqwZFZTyOvI96aZGHB4x7UrBa5PevvlJ69P5VVp5DPz1pm0+hqwL6wRZ5GB7k/wD6qsRwRxncuc/Wq6E0McEY4osTctCJGYM7EsvTt/jUhhiJ3MN59W5/+tUAozSsFywWUYCgY/pTOKhYZGaQGixLP//Z',
'owner': {'id': '4469324900'},
'thumbnail_src': 'https://scontent-lht6-1.cdninstagram.com/vp/d37f58bf9a6bcbe17242a7e0b233c5c0/5C331E0F/t51.2885-15/sh0.08/e35/s640x640/16110374_198276563977954_7548368730246348800_n.jpg',
'thumbnail_resources': [{'src': 'https://scontent-lht6-1.cdninstagram.com/vp/f14bd4b53c62c2fe56ba88f1a3ab85cf/5C1DC3A8/t51.2885-15/e35/s150x150/16110374_198276563977954_7548368730246348800_n.jpg',
'config_width': 150,
'config_height': 150},
{'src': 'https://scontent-lht6-1.cdninstagram.com/vp/616bc4d9abe790d1c9e06dbb22e7b43f/5C266AE2/t51.2885-15/e35/s240x240/16110374_198276563977954_7548368730246348800_n.jpg',
'config_width': 240,
'config_height': 240},
{'src': 'https://scontent-lht6-1.cdninstagram.com/vp/09d6473c69ad0b4e493f05c6d3aad9a4/5C205958/t51.2885-15/e35/s320x320/16110374_198276563977954_7548368730246348800_n.jpg',
'config_width': 320,
'config_height': 320},
{'src': 'https://scontent-lht6-1.cdninstagram.com/vp/e5d6902499831040caded69325585dfc/5C350A02/t51.2885-15/e35/s480x480/16110374_198276563977954_7548368730246348800_n.jpg',
'config_width': 480,
'config_height': 480},
{'src': 'https://scontent-lht6-1.cdninstagram.com/vp/d37f58bf9a6bcbe17242a7e0b233c5c0/5C331E0F/t51.2885-15/sh0.08/e35/s640x640/16110374_198276563977954_7548368730246348800_n.jpg',
'config_width': 640,
'config_height': 640}],
'is_video': False,
'accessibility_caption': None}}]},
'edge_saved_media': {'count': 0,
'page_info': {'has_next_page': False, 'end_cursor': None},
'edges': []},
'edge_media_collections': {'count': 0,
'page_info': {'has_next_page': False, 'end_cursor': None},
'edges': []}}},
'felix_onboarding_video_resources': {'mp4': '/static/videos/felix-onboarding/onboardingVideo.mp4/9d16838ca7f9.mp4',
'poster': '/static/images/felix-onboarding/onboardingVideoPoster.png/8fdba7cf2120.png'}}]},
'gatekeepers': {'cb': True,
'sf': True,
'ld': True,
'seo': True,
'seoht': True,
'saa': True,
'phone_qp': True},
'knobs': {'acct:ntb': 0, 'cb': 0, 'captcha': 0},
'qe': {'form_navigation_dialog': {'g': '', 'p': {}},
'cred_man': {'g': 'test', 'p': {'use_on_landing': 'true'}},
'iab': {'g': '', 'p': {}},
'app_upsell_li': {'g': '', 'p': {}},
'app_upsell': {'g': '', 'p': {}},
'stale_fix': {'g': '', 'p': {}},
'profile_header_name': {'g': '', 'p': {}},
'bc3l': {'g': '', 'p': {}},
'direct_conversation_reporting': {'g': '', 'p': {}},
'general_reporting': {'g': '', 'p': {}},
'reporting': {'g': '', 'p': {}},
'acc_recovery_link': {'g': '', 'p': {}},
'notif': {'g': '', 'p': {}},
'fb_unlink': {'g': '', 'p': {}},
'mobile_stories_doodling': {'g': '', 'p': {}},
'show_copy_link': {'g': '', 'p': {}},
'mobile_logout': {'g': '', 'p': {}},
'p_edit': {'g': '', 'p': {}},
'404_as_react': {'g': '', 'p': {}},
'acc_recovery': {'g': '', 'p': {}},
'collections': {'g': '', 'p': {}},
'comment_ta': {'g': '', 'p': {}},
'su': {'g': '', 'p': {}},
'disc_ppl': {'g': '', 'p': {}},
'ebd_ul': {'g': 'launch', 'p': {'is_enabled': 'true'}},
'ebdsim_li': {'g': '', 'p': {}},
'ebdsim_lo': {'g': '', 'p': {}},
'empty_feed': {'g': '', 'p': {}},
'bundles': {'g': '', 'p': {}},
'exit_story_creation': {'g': '', 'p': {}},
'appsell': {'g': '', 'p': {}},
'imgopt': {'g': '', 'p': {}},
'follow_button': {'g': '', 'p': {}},
'loggedout': {'g': '', 'p': {}},
'loggedout_upsell': {'g': 'control_without_new_loggedout_upsell_content_03_15_18',
'p': {'has_new_loggedout_upsell_content': 'false'}},
'msisdn': {'g': '', 'p': {}},
'bg_sync': {'g': '', 'p': {}},
'onetaplogin': {'g': '', 'p': {}},
'login_poe': {'g': '', 'p': {}},
'private_lo': {'g': '', 'p': {}},
'profile_tabs': {'g': '', 'p': {}},
'push_notifications': {'g': '', 'p': {}},
'reg': {'g': '', 'p': {}},
'reg_vp': {'g': 'test_group_1', 'p': {'hide_value_prop': 'true'}},
'report_media': {'g': '', 'p': {}},
'report_profile': {'g': '', 'p': {}},
'scroll_log': {'g': '', 'p': {}},
'sidecar_swipe': {'g': '', 'p': {}},
'su_universe': {'g': '', 'p': {}},
'stale': {'g': '', 'p': {}},
'stories_lo': {'g': 'test_05_01', 'p': {'location': 'true'}},
'stories': {'g': '', 'p': {}},
'tp_pblshr': {'g': '', 'p': {}},
'video': {'g': '', 'p': {}},
'gdpr_eu_tos': {'g': 'control_05_01',
'p': {'gdpr_required': 'true',
'eu_new_user_flow': 'age_two_button',
'tos_version': 'eu'}},
'gdpr_row_tos': {'g': '', 'p': {}},
'fd_gr': {'g': '', 'p': {}},
'felix': {'g': '', 'p': {}},
'felix_clear_fb_cookie': {'g': '', 'p': {}},
'felix_creation_duration_limits': {'g': '', 'p': {}},
'felix_creation_enabled': {'g': '', 'p': {}},
'felix_creation_fb_crossposting': {'g': '', 'p': {}},
'felix_creation_fb_crossposting_v2': {'g': '', 'p': {}},
'felix_creation_validation': {'g': '', 'p': {}},
'felix_creation_video_upload': {'g': '', 'p': {}},
'felix_early_onboarding': {'g': '', 'p': {}},
'unfollow_confirm': {'g': '', 'p': {}},
'profile_enhance_li': {'g': '', 'p': {}},
'profile_enhance_lo': {'g': '', 'p': {}},
'phone_confirm': {'g': '', 'p': {}},
'comment_enhance': {'g': '', 'p': {}},
'mweb_topical_explore': {'g': '', 'p': {}},
'web_nametag': {'g': '', 'p': {}},
'image_downgrade': {'g': '', 'p': {}},
'image_downgrade_lite': {'g': '', 'p': {}},
'follow_all_fb': {'g': '', 'p': {}},
'lite_direct_upsell': {'g': '', 'p': {}},
'web_loggedout_noop': {'g': '', 'p': {}},
'stories_video_preload': {'g': '', 'p': {}},
'lite_stories_video_preload': {'g': '', 'p': {}},
'a2hs_heuristic_uc': {'g': '', 'p': {}},
'a2hs_heuristic_non_uc': {'g': '', 'p': {}},
'web_hashtag': {'g': '', 'p': {}},
'header_scroll': {'g': '', 'p': {}},
'rout': {'g': '', 'p': {}},
'websr': {'g': '', 'p': {}},
'web_lo_follow': {'g': '', 'p': {}},
'web_share': {'g': '', 'p': {}},
'lite_rating': {'g': '', 'p': {}},
'web_embeds_share': {'g': '', 'p': {}},
'web_share_lo': {'g': '', 'p': {}},
'web_embeds_logged_out': {'g': 'test_comment_input',
'p': {'show_comment_input': 'true'}},
'sl': {'g': '', 'p': {}},
'reg_nux': {'g': '', 'p': {}},
'web_datasaver_mode': {'g': '', 'p': {}},
'lite_datasaver_mode': {'g': '', 'p': {}},
'lite_video_upload': {'g': '', 'p': {}}},
'hostname': 'www.instagram.com',
'platform': 'web',
'rhx_gis': 'b9d7a25d3e0772990918069a0652bc21',
'nonce': 'E+077618aJD12ZjcMWUynA==',
'zero_data': {},
'rollout_hash': '2502ae2429f4',
'bundle_variant': 'base',
'probably_has_app': False}