这个服务器的响应格式是什么?

时间:2017-04-26 10:59:24

标签: python json server request

我使用来自get()模块的Python request函数向this page服务器发出请求。当我访问请求的内容时,我得到了这个(样本):

b'\n\n\n\n<!DOCTYPE html>\n<html\nxmlns:og="http://ogp.me/ns#"\nxmlns:fb="http://www.facebook.com/2008/fbml">\n    <head>\n        <meta charset="utf-8">\n        <meta http-equiv="X-UA-Compatible" content="IE=edge">\n\n    <meta name="apple-itunes-app" content="app-id=342792525, app-argument=imdb:///?src=mdot">\n        \n    \n            <script type="text/javascript">var ue_t0=window.ue_t0||+new Date();</script>\n            <script type="text/javascript">\n                var ue_mid = "A1EVAM02EL8SFB"; \n                var ue_sn = "www.imdb.com";  \n                var ue_furl = "fls-na.amazon.com";\n                var ue_sid = "000-0000000-0000000";\n                var ue_id = "03N6Z2NEAF09T9H26QYE";\n                (function(e){var c=e;var a=c.ue||{};a.main_scope="mainscopecsm";a.q=[];a.t0=c.ue_t0||+new Date();a.d=g;function g(h){return +new Date()-(h?0:a.t0)}function d(h){return function(){a.q.push({n:h,a:arguments,t:a.d()})}}function b(m,l,h,j,i){var k={m:m,f:l,l:h,c:""+j,err:i,fromOnError:1,args:arguments};c.ueLogError(k);return false}b.skipTrace=1;e.onerror=b;function f(){c.uex("ld")}if(e.addEventListener){e.addEventListener("load",f,false)}else{if(e.attachEvent){e.attachEvent("onload",f)}}a.tag=d("tag");a.log=d("log");a.reset=d("rst");c.ue_csm=c;c.ue=a;c.ueLogError=d("err");c.ues=d("ues");c.uet=d("uet");c.uex=d("uex");c.uet("ue")})(window);(function(e,d){var a=e.ue||{};function c(g){if(!g){return}var f=d.head||d.getElementsByTagName("head")[0]||d.documentElement,h=d.createElement("script");h.async="async";h.src=g;f.insertBefore(h,f.firstChild)}function b(){var k=e.ue_cdn||"z-ecx.images-amazon.com",g=e.ue_cdns||"images-na.ssl-images-amazon.com",j="/images/G/01/csminstrumentation/",h=e.ue_file||"ue-full-11e51f253e8ad9d145f4ed644b40f692._V1_.js",f,i;if(h.indexOf("NSTRUMENTATION_FIL")>=0){return}if("ue_https" in e){f=e.ue_https}else{f=e.location&&e.location.protocol=="https:"?1:0}i=f?"https://":"http://";i+=f?g:k;i+=j;i+=h;c(i)}if(!e.ue_inline){if(a.loadUEFull){a.loadUEFull()}else{b()}}a.uels=c;e.ue=a})(window,document);\n                if (!(\'CS\' in window)) { window.CS = {}; }\n                    window.CS.loginLocation = "https://www.imdb.com/registration/signin?u=%2Fsearch%2Ftitle%3Frelease_date%3D2017%26sort%3Dnum_votes%2Cdesc%26page%3D1";\n            </script>\n \n\n        \n        <script type="text/javascript">var IMDbTimer={starttime: new Date().getTime(),pt:\'java\'};</script>\n        \n  <script>(function(t){ (t.events = t.events || {})["csm_head_pre_title"] = new Date().getTime(); })(IMDbTimer);</script>\n        <title>IMDb: Most Voted Titles Released 2017-01-01 to 2017-12-31 - IMDb</title>\n  <script>(function(t){ (t.events = t.events || {})["csm_head_post_title"] = new Date().getTime(); })(IMDbTimer);</script>\n        \n            <link rel="canonical" href="http://www.imdb.com/search/title" />\n            <meta property="og:url" content="http://www.imdb.com/search/title" />\n        \n  <script>(function(t){ (t.events = t.events || {})["csm_head_pre_icon"] = new Date().getTime(); })(IMDbTimer);</script>\n        <link href="http://ia.media-imdb.com/images/G/01/imdb/images/safari-favicon-517611381._CB522736552_.svg" mask rel="icon" sizes="any">\n        <link rel="icon" type="image/ico" href="http://ia.media-imdb.com/images/G/01/imdb/images/favicon-2165806970._CB522736556_.ico" />\n        <meta name="theme-color" content="#000000" />\n        <link rel="shortcut icon" type="image/x-icon" href="http://ia.media-imdb.com/images/G/01/imdb/images/desktop-favicon-2165806970._CB522736561_.ico" />\n        <link href="http://ia.media-imdb.com/images/G/01/imdb/images/mobile/apple-touch-icon-web-4151659188._CB522736129_.png" rel="apple-touch-icon"> \n        <link href="http://ia.media-imdb.com/images/G/01/imdb/images/mobile/apple-touch-icon-web-76x76-53536248._CB522736233_.png" rel="apple-touch-icon" sizes="76x76"> \n        <link href="http://ia.media-imdb.com/images/G/01/imdb/images/mobile/apple-touch-icon-web-120x120-2442878471._CB522736253_.png" rel="apple-touch-icon" sizes="120x120"> \n        <link href="http://ia.media-imdb.com/images/G/01/imdb/images/mobile/apple-touch-icon-web-152x152-1475823641._CB522736557_.png" rel="apple-touch-icon" sizes="152x152">            \n        <link rel="search" type="application/opensearchdescription+xml" href="http://ia.media-imdb.com/images/G/01/imdb/images/imdbsearch-3349468880._CB522736605_.xml" title="IMDb" />\n  <script>(function(t){ (t.events = t.events || {})["csm_head_post_icon"] = new Date().getTime(); })(IMDbTimer);</script>\n        \n        <meta property="pageType" content="search" />\n        <meta property="subpageType" content="title" />\n\n\n        <link rel=\'image_src\' href="http://ia.media-imdb.com/images/G/01/imdb/images/logos/imdb_fb_logo-1730868325._CB522736557_.png">\n        <meta property=\'og:image\' content="http://ia.media-imdb.com/images/G/01/imdb/images/logos/imdb_fb_logo-1730868325._CB522736557_.png" />\n\n    <meta property=\'fb:app_id\' content=\'115109575169727\' />\n\n    <meta property=\'og:title\' content="IMDb: Most Voted Titles Released 2017-01-01 to 2017-12-31" />\n    <meta property=\'og:site_name\' content=\'IMDb\' />\n    <meta name="title" content="IMDb: Most Voted Titles Released 2017-01-01 to 2017-12-31 - IMDb" />\n        <meta name="description" content="IMDb\'s advanced search allows you to run extremely powerful queries over all people and titles in the database. Find exactly what you\'re looking for!" />\n        <meta property="og:description" content="IMDb\'s advanced search allows you to run extremely powerful queries over all people and titles in the database. Find exactly what you\'re looking for!" />\n        <meta name="request_id" content="03N6Z2NEAF09T9H26QYE" />\n        \n  <script>(function(t){ (t.events = t.events || {})["csm_head_pre_css"] = new Date().getTime(); })(IMDbTimer);</script>\n<link rel="stylesheet" type="text/css" href="http://ia.media-imdb.com/images/G/01/imdb/css/collections/consumersite-4100637360._CB530008524_.css" />\n<!-- h=ics-1e-c4-2xl-4b098b82.us-east-1 -->\n<link rel="stylesheet" type="text/css" href="http://ia.media-imdb.com/images/G/01/imdb/css/site/consumer-navbar-mega-238568768._CB532297092_.css" />\n<!--[if IE]><link rel="stylesheet" type="text/css" href="http://ia.media-imdb.com/images/G/01/imdb/css/collections/ie-1170868033._CB522736261_.css" /><![endif]-->\n\n            <link rel="stylesheet" type="text/css" href="http://ia.media-imdb.com/images/G/01/imdb/css/collections/other-3780135229._CB530008515_.css" />\n            <link rel="stylesheet" type="text/css" href="http://ia.media-imdb.com/images/G/01/imdb/css/collections/starbarwidget-2454701167._CB522736579_.css" />\n            <link rel="stylesheet" type="text/css" href="http://ia.media-imdb.com/images/G/01/imdb/css/collections/watchlistButton-3806422028._CB531876201_.css" />\n        <noscript>\n            <link rel="stylesheet" type="text/css" href="http://ia.media-imdb.com/images/G/01/imdb/css/wheel/nojs-2827156349._CB522739048_.css">\n        </noscript>\n  <script>(function(t){ (t.events = t.events || {})["csm_head_post_css"] = new Date().getTime(); })(IMDbTimer);</script>\n        \n  <script>(function(t){ (t.events = t.events || {})["csm_head_pre_ads"] = new Date().getTime(); })(IMDbTimer);</script>\n        \n        <script  type="text/javascript">\n            // ensures js doesn\'t die if ads service fails.  \n            // Note that we need to define the js here, since ad js is being rendered inline after this.\n            (function(f) {\n                // Fallback javascript, when the ad Service call fails.  \n                \n                if((window.csm == null || window.generic == null || window.consoleLog == null)) {\n                    if (window.console && console.log) {\n                        console.log("one or more of window.csm, window.generic or window.consoleLog has been stubbed...");\n                    }\n                }\n                \n                window.csm = window.csm || { measure:f, record:f, duration:f, listen:f, metrics:{} };\n                window.generic = window.generic || { monitoring: { start_timing: f, stop_timing: f } };\n                window.consoleLog = window.consoleLog || f;\n            })(function() {});\n        </script>\n  <script>\n    if (\'csm\' in window) {\n      csm.measure(\'csm_head_delivery_finished\');\n    }\n  </script>\n  

这是什么格式,您希望快速识别它的语法功能是什么?

4 个答案:

答案 0 :(得分:2)

这主要是带有一些内联脚本的HTML ... \n(在开头)是标记语言中的换行符,因为站点开头是空行。

有什么问题?你期望得到什么?

答案 1 :(得分:2)

您获得了字节响应b'....',您可以在answer

中找到更多信息

要从页面获取完整文本,请使用以下示例:

import requests as r

url = 'your_url_here'
content = r.get(url).text

print(content)

UPD:对于解析,您可以使用ScrapyBeautiful Soup工具。

答案 2 :(得分:1)

这是HTML,我从DTD确定了这一点:<!DOCTYPE html> 您可以使用BeautifulSoup对其进行解析。 由于我还不能评论,我还想向@Alex解释。多余的代码很可能来自脚本标记,允许ECMAScript在页面中即时执行。 希望这可以帮助, BoxTechy

答案 3 :(得分:1)

你似乎要问的是

if(!('CS' in window)) { window.CS = {}; }

JavaScript是一种典型的编程语言,您可以将其嵌入到HTML中(如果仔细查看,您肯定会发现它位于&lt; script&gt;和&lt; / script&gt;标记之间)。

你特别看到的是'in'运算符(https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/in)和一个空对象({}部分)。如果'window'对象根本没有'CS'属性,则分配一个空对象。事实上,许多程序员都会编写

window.CS=window.CS || {};