使用bookmarklet进行网页抓取?

时间:2011-04-30 17:18:04

标签: javascript bookmarklet web-scraping

我想使用书签来收集来自不同网页的一些资源。
,而不是使用一些浏览器扩展来从页面获取html元素,我想使用一些javascript bookmarklet来捕获网站上的代码。

[编辑]如何从带有JavaScript书签的页面中获取html元素?
问题是关于使用bookmarklet获取html内部代码,而不是通常使用bookmarklet。

3 个答案:

答案 0 :(得分:1)

您不需要任何库来执行此操作。只需在Firebug或Chrome Inspector中创建您的功能,然后将其格式化为以下一行:

javascript:(function(){alert(1);})();

将其复制并粘贴到位置栏并按Enter键执行它。更换警报(1);用你的代码。我们将它包装在一个自动执行的匿名函数中,否则你执行的操作的响应将取代网页。

如果您的代码非常长,您可以将其全部写在外部javascript文件中,并在上面看到警报,只需使用您的src创建一个脚本标记并将其附加到页面即可。

答案 1 :(得分:1)

由于loading,当有人点击您的书签时,您可能会考虑bookmarklet length limitation javascript代码执行抓取。在访问DOM元素时,请参阅此reference

请注意,由于cross frame security,对于源自与主窗口相同的域的FRAME / IFRAME,才能进行抓取。

答案 2 :(得分:0)

我编写的这个长脚本将为您提供确切的信息,以及其他一些独特的增强功能:

javascript:void function(e){var t=function(e){document.writeln("<!DOCTYPE html>"),document.writeln("<html>"),document.writeln("<body>"),document.writeln(""),document.writeln('<p style="font-size:20px"><b>Public Bookmarklet for viewing a whois of a site. Of course this isnt as complex as the real thing, because I got all the data below from scratch.</b></p><p style="font-size:13px"><i>made by shoe%231327</i></p>'),document.writeln('<p style="font-size:20px">DOMAIN INFO:</p>'),document.writeln(""),document.writeln('{"dig":{"header":{"id":"43226","qr":"1","opcode":"Query","aa":"false","tc":"false","rd":"false","ra":"false","ad":"false","cd":"false","rcode":"NXDOMAIN","qdcount":"1","ancount":"0","nscount":"0","arcount":"0"},"answer":[],"additional":[],"authority":[],"bind":";; Security Level : UNCHECKED\n;; HEADER SECTION\n;; id = 43226\n;; qr = 1    opcode = Query    aa = false    tc = false    rd = false\n;; ra = false    ad = false    cd = false    rcode  = NXDOMAIN\n;; qdcount = 1  ancount = 0  nscount = 0  arcount = 0\n\n;; QUESTION SECTION (1  record)\n;; :fqdn.INANY\n"},"error":false}'),document.writeln('<p id="demo"></p>'),document.writeln("<script>"),document.writeln('document.getElementById("demo").innerHTML = '),document.writeln('"DOMAIN:<br>" + window.location.href;'),document.writeln("</script>"),document.writeln("<!--"),document.writeln('<script type="application/javascript">'),document.writeln("  function getIP(json) {"),document.writeln('    document.write("CLIENT IP: ", json.ip);'),document.writeln("  }"),document.writeln("</script>"),document.writeln(""),document.writeln('<script type="application/javascript" src="https://api.ipify.org%3Fformat=jsonp%26callback=getIP"></script>'),document.writeln("-->"),document.writeln("</body>"),document.writeln("</html>"),document.writeln("<p>statuses: [ <br>"),document.writeln('            "clientTransferProhibited"'),document.writeln("            <br>"),document.writeln("            ]"),document.writeln("</p>"),document.writeln('<p style="font-size:20px">CLIENT INFO:</p>'),document.writeln('<pre id="response"></pre>'),document.writeln(""),e.get("https://api.ipdata.co/%3Fapi-key=test",function(t){e("%23response").html(JSON.stringify(t,null,4))},"jsonp"),document.writeln("</body>"),document.writeln("</html>")},n=e%26%26e.fn%26%26parseFloat(e.fn.jquery)>=1.7;if(n)t(e);else{var o=document.createElement("script");o.src="//ajax.googleapis.com/ajax/libs/jquery/1/jquery.js",o.onload=o.onreadystatechange=function(){var e=this.readyState;e%26%26"loaded"!==e%26%26"complete"!==e||t(jQuery.noConflict())}}document.getElementsByTagName("head")[0].appendChild(o)}(window.jQuery);