您好我正试图通过使用casperjs和phantomjs报废从网上获得投注优惠券。该页面通过ajax加载并打印出包含数据的表格。该网站为:https://www.pamestoixima.gr/UK/1/Print#market-group=12924.1&marketgroup-template=EVENTSPERDAY&marketgroup-longlist=1
我感兴趣的数据与课程位于表中:'市场'
我已尝试过互联网上的所有代码,但我仍然无法获得结果。该页面已被废弃,但它会打印出“浏览器必须启用了javascript'
到目前为止我的代码:
var casper = require('casper').create();
casper.start('https://www.pamestoixima.gr/UK/1/Print#market-group=12924.1&marketgroup-template=EVENTSPERDAY&marketgroup-longlist=1', function() {
this.wait(5000, function() {
console.log(this.getHTML() );
});
});
casper.run();
控制台输出:
C:\Users\Bampis\Desktop\phantom>casperjs test.js
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/x
html1/DTD/xhtml1-strict.dtd"><html lang="en" xmlns="http://www.w3.org/1999/xhtml
" class="ua-dom ua-strict ua-secure ua-windows ua-likegecko ua-safari ua-webkit"
><head><meta http-equiv="content-language" value="en">
<meta http-equiv="content-type" content="text/html; charset=utf-
8">
<title></title>
<!--meta http-equiv="X-UA-Compatible" content="IE=10"/-->
<link href="/Areas/Print/template_1_UK/template.css?ts=201504081
530" rel="stylesheet" type="text/css" media="screen, tv, projection" charset="ut
f-8">
<link href="/debug.css?ts=201504081530" rel="stylesheet" type="t
ext/css" media="screen, tv, projection">
<link href="/Areas/.css/jquery-plugins.css?ts=201504081530" rel=
"Stylesheet" type="text/css">
<link href="/Areas/Print/template_1_UK/print.css?ts=201504081530
" rel="stylesheet" type="text/css" media="print">
<script async="" src="//www.google-analytics.com/analytics.js"><
/script><script src="//ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"
></script>
<script type="text/javascript">window.jQuery || document.write('
<script src="/common/js/jquery/jquery.min.js"><\/script>');</script>
<script src="//ajax.googleapis.com/ajax/libs/jqueryui/1.10.1/jqu
ery-ui.min.js"></script>
<script type="text/javascript">window.jQuery.ui || document.writ
e('<script src="/common/js/jquery/jquery-ui.min.js"><\/script>');</script>
<script src="/common/js/jquery/jquery-plugins.js"></script>
<script src="/common/js/script.js"></script>
<script type="text/javascript">
/*<![CDATA[*/
if(window.location.search != "" && window.location.search.indexOf('?debug') == 0
)
{
document.write(unescape("%3Cscript type='text/javascript' src='/common/j
s/runtime-debug-201504021456.js'%3E %3C/script%3E"));
document.write(unescape("%3Cscript type='text/javascript' src='/Areas/Pr
int/template_1_UK/components-debug-201504081530.js'%3E %3C/script%3E"));
}
else
{
document.write(unescape("%3Cscript type='text/javascript' src='/common/j
s/runtime-201504021456.js'%3E %3C/script%3E"));
document.write(unescape("%3Cscript type='text/javascript' src='/Areas/Pr
int/template_1_UK/components-201504081530.js'%3E %3C/script%3E"));
}
/*]]>*/
</script><script type="text/javascript" src="/common/js/runtime-201504021456.js"
> </script><script type="text/javascript" src="/Areas/Print/template_1_UK/compon
ents-201504081530.js"> </script></head>
<body class=" print col1 lang-UK">
<div class="c">
<div class="bg-content clearfix">
<div class="cc wrapper clearfix">
<noscript>
<div class="noscriptDiv">
<div class="top">&
amp;nbsp;</div>
<div class="middle"&g
t;
<p>Javascr
ipt is currently not active in your browser. Javascript must be enabled for this
website.</p>
<p>Javascr
ipt ╬┤╬╡╬╜ ╬╡╬ψ╬╜╬▒╬╣ ╬╡╬╜╬╡╧Β╬│╬χ ╧Δ╧Ε╬┐╬╜ browser ╧Δ╬▒╧Γ. ╬Ω Javascript ╧Α╧Β╬φ
╧Α╬╡╬╣ ╬╜╬▒ ╬╡╬ψ╬╜╬▒╬╣ ╬╡╬╜╬╡╧Β╬│╬┐╧Α╬┐╬╣╬╖╬╝╬φ╬╜╬╖ ╬│╬╣╬▒ ╬▒╧Ζ╧Ε╬χ ╧Ε╬╖╬╜ ╬╣╧Δ╧
Ε╬┐╧Δ╬╡╬╗╬ψ╬┤╬▒.</p>
</div>
<div class="bottom"&g
t;&nbsp;</div>
</div>
</noscript>
<div id="plchcentre" class="centre place
holder">
<div id="plchFlash"></div>
<div id="plchcentrebox2"></div>
<div class="hidden" id="BodyClassOverrid
eComponent1"></div><div class="market-list" id="MarketListContentComponent2" sty
le="display: block; "><img src="/indicator.gif" alt="Loading"></div></div><!-- .
centre -->
<div class="print-buttons">
<a class="button" href="/Areas/P
rint/template_1_UK/#" onclick="window.print(); return false;">Print</a>
</div>
</div><!-- .c -->
</div>
</div>
<!-- Google Analytics -->
<script src="/static/common/analytics/analytics.js" type="text/javascrip
t"></script>
<script type="text/javascript">
//Built with PageBuilder v. 1.0.0.0
var autoWiring = new Framework.AutoWiring();
autoWiring.init(serviceConfiguration, componentConfiguration, dynamicCom
ponentConfiguration, componentPlacementMap, encodedXsltDocumentsMap);
autoWiring.run();
</script><div id="GarbageCollector" style="display: none; "></div>
<div class="ui-tooltip ui-widget ui-corner-all ui-widget-content" id="warp-toolt
ip" style="position: absolute; left: 0px; top: 0px; z-index: 200000; display: no
ne; "></div></body></html>
答案 0 :(得分:1)
到目前为止,我可以看到上面提到的页面提供了来自XML文件的数据:
https://www.pamestoixima.gr/cache/evenuemarketGroupLimited/EN/12924.1-0.xml?1457949428105(可能查询字符串中的时间戳与某种日志记录有关,因为即使删除它,也会得到相同的结果)。
所以最好直接抓取这个文件而不是模仿浏览器的行为并获取渲染的html。