屏幕抓取javascript

时间:2011-11-03 20:51:24

标签: javascript screen-scraping screen

所以我从网站上抓取javascript并返回下面的代码,但是如果当然这不会显示flash视频或渲染javscript,因为我只是使用简单的php dom解析器来返回HTML。有没有办法运行这个javascript来返回它输出的嵌入对象?

<script type="text/javascript">
    var attributes = {};
    attributes.id = "flashMovie";
    var flashvars = {};
    flashvars.startjs = "playerLoaded";
    flashvars.activeColor = "83A7D2";
    flashvars.themeColor = "FFFFFF";
    flashvars.config = escape("http://example/0a1cee42025e9e49d25d.fid?key=c3e868caa037531d0d709e238d93013a&VID=189988&catID=1,26,43,50&rollover=1&startThumb=19&embed=&utm_source=&multiview=0&premium=1&country=&user=0&vip=0&heightHD=480p&cd=u&ref=browse");
    //flashvars.config = escape("http://example.com/0a1cee42025e9e49d25d.fid?key=c3e868caa037531d0d709e238d93013a&VID=189988&catID=1,26,43,50&rollover=1&startThumb=19&premium=1&country=&user=0&vip=0&cd=u&ref=browse");
    flashvars.config2 = escape("http://www.example.com/player_feed_local.php?vid=189988&CHIDS=1,26,43,50&link=http%253A%252F%252Fwww.example.com%252Fjump%252FTesting-Video%252Fvideo189988%253Fref%253Dbrowse");
    var params = {};
    params.startjs = "playerLoaded";
    params.loop = "false";
    params.quality = "best";
    params.bgcolor = "#000000";
    params.allowfullscreen = "true";
    params.allowscriptaccess = "always";
    params.wmode= "opaque";
    swfobject.embedSWF("http://www.example.com/Player_v1.11.9.7.swf?v=1.0", "flashMovie", "100%", "500", "9", "expressInstall.swf", flashvars, params, attributes);
</script>

2 个答案:

答案 0 :(得分:4)

您可以像浏览器一样运行Javascript(使用真正的DOM和所有这些),并使用PhantomJSCrowbar等工具提取数据。

答案 1 :(得分:0)

使用像webkit这样的浏览器渲染引擎来执行JavaScript,然后你可以提取生成的html。

以下是一些示例代码:http://webscraping.com/blog/Scraping-JavaScript-webpages-with-webkit/

相关问题