使用Scrapy Xpath从脚本标记获取数据

时间:2017-12-08 19:32:56

标签: javascript python python-2.7 web-scraping scrapy

我一直在尝试使用Scrapy(xpath)从脚本标记中提取数据。我的主要问题是识别正确的div和脚本标签。我是使用xpath的新手,非常感谢任何帮助!

<script>
    (function(e, a) {
            var t = {
                "cdn_url": "https://f.vimeocdn.com",
                "view": 1,
                "request": {
                    "files": {
                        "dash": {
                            "separate_av": true,
                            "streams": [{
                                "profile": 164,
                                "quality": "360p",
                                "id": 830872409,
                                "fps": 30
                            }, {
                                "profile": 175,
                                "quality": "1080p",
                                "id": 830872420,
                                "fps": 30
                            }]
                            "default_cdn": "akfire_interconnect_quic"
                        },
                        "progressive": [{
                            "profile": 165,
                            "width": 960,
                            "mime": "video/mp4",
                            "fps": 30,
                            "url": "https://gcs-vimeo.akamaized.net/exp=1512626028~acl=%2A%2F830872419.mp4%2A~hmac=f6990c94f8fe004ec50059b0c1c648441faa41b248a14e881826a4b1233676c3/vimeo-prod-skyfire-std-us/01/1812/9/234061739/830872419.mp4",
                            "cdn": "akamai_interconnect",
                            "quality": "540p",
                            "id": 830872419,
                            "origin": "gcs",
                            "height": 540
                        }, {
                            "profile": 174,
                            "width": 1280,
                            "mime": "video/mp4",
                            "fps": 30,
                            "url": "https://gcs-vimeo.akamaized.net/exp=1512626028~acl=%2A%2F830872411.mp4%2A~hmac=ad12d7e5de00edb54c2360e23429d4a5de6aa0800ff7f4273f2ecc4495171707/vimeo-prod-skyfire-std-us/01/1812/9/234061739/830872411.mp4",
                            "cdn": "akamai_interconnect",
                            "quality": "720p",
                            "id": 830872411,
                            "origin": "gcs",
                            "height": 720
                        }, {
                            "profile": 164,
                            "width": 640,
                            "mime": "video/mp4",
                            "fps": 30,
                            "url": "https://gcs-vimeo.akamaized.net/exp=1512626028~acl=%2A%2F830872409.mp4%2A~hmac=26c477008c9a74d4ceca9895c5307ab19c6ef79e7480e833a503f15ea60bdf2b/vimeo-prod-skyfire-std-us/01/1812/9/234061739/830872409.mp4",
                            "cdn": "akamai_interconnect",
                            "quality": "360p",
                            "id": 830872409,
                            "origin": "gcs",
                            "height": 360
                        }]
                    },
                    .....
</script>

我终于想要填充&#34; url&#34;来自&#34;进步&#34;其中&#34;宽度&#34; = 960进入我的项目。

0 个答案:

没有答案