我正在尝试使用JSOUP从以下页面获取内容:
http://www.peshawarairport.com.pk/Schedule.aspx?Type=Arrival
但它不会获取整个页面内容并返回内容直到标记关闭。它返回的内容如下:
<!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en" dir="ltr"> <![endif]-->
<!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8" lang="en" dir="ltr"> <![endif]-->
<!--[if IE 8]> <html class="no-js lt-ie9" lang="en" dir="ltr"> <![endif]-->
<!--[if IE 9]> <html class="no-js ie9" lang="en" dir="ltr"> <![endif]-->
<!--[if gt IE 9]><html class="no-js" lang="en" dir="ltr"> <![endif]-->
<html>
<head>
<meta charset="utf-8" />
<script type="text/javascript"> var _prum = { id: "5227f1fbabe53ddc1f000000" }; var PRUM_EPISODES = PRUM_EPISODES || {}; PRUM_EPISODES.q = []; PRUM_EPISODES.mark = function (b, a) { PRUM_EPISODES.q.push(["mark", b, a || new Date().getTime()]) }; PRUM_EPISODES.measure = function (b, a, b) { PRUM_EPISODES.q.push(["measure", b, a, b || new Date().getTime()]) }; PRUM_EPISODES.done = function (a) { PRUM_EPISODES.q.push(["done", a]) }; PRUM_EPISODES.mark("firstbyte"); (function () { var b = document.getElementsByTagName("script")[0]; var a = document.createElement("script"); a.type = "text/javascript"; a.async = true; a.charset = "UTF-8"; a.src = "//rum-static.pingdom.net/prum.min.js"; b.parentNode.insertBefore(a, b) })();</script>
<link href="~/images/favicon.ico" rel="CAA Shortcut Icon"></link>
<title>Bacha Khan International Airport, Peshawar | www.peshawarairport.com</title>
<meta name="description" content="">
<meta name="apple-mobile-web-app-capable" content="yes" />
<!-- <meta name="p:domain_verify" content="297cb2c48faff5539c27d75f076408b8"/> -->
<style type="text/css">
@import url("http://www.caapakistan.com.pk/css/jiap-website/system.base.css?nkrgyj");
@import url("http://www.caapakistan.com.pk/css/jiap-website/system.messages.css?nkrgyj");
@import url("http://www.caapakistan.com.pk/css/jiap-website/system.theme.css?nkrgyj");
</style>
<style type="text/css">
@import url("http://www.caapakistan.com.pk/css/jiap-website/comment.css?nkrgyj");
@import url("http://www.caapakistan.com.pk/sites/all/modules/contrib/date/date_api/date9687.css?nkrgyj");
@import url("http://www.caapakistan.com.pk/css/jiap-website/field.css?nkrgyj");
@import url("http://www.caapakistan.com.pk/css/jiap-website/node.css?nkrgyj");
@import url("http://www.caapakistan.com.pk/css/jiap-website/search.css?nkrgyj");
@import url("http://www.caapakistan.com.pk/css/jiap-website/user.css?nkrgyj");
@import url("http://www.caapakistan.com.pk/sites/all/modules/contrib/workflow/workflow_admin_ui/workflow_admin_ui9687.css?nkrgyj");
@import url("http://www.caapakistan.com.pk/sites/all/modules/contrib/views/css/views9687.css?nkrgyj");
</style>
<style type="text/css">
@import url("http://www.caapakistan.com.pk/sites/all/modules/contrib/ctools/css/ctools9687.css?nkrgyj");
@import url("http://www.caapakistan.com.pk/sites/all/modules/contrib/panels/css/panels9687.css?nkrgyj");
</style>
<style type="text/css">
@import url("http://www.caapakistan.com.pk/sites/all/themes/sfo/css/bootstrap-n-responsive.min9687.css?nkrgyj");
@import url("http://www.caapakistan.com.pk/sites/all/themes/sfo/css/base9687.css?nkrgyj");
@import url("http://www.caapakistan.com.pk/sites/all/themes/sfo/css/theme_flysfo9687.css?nkrgyj");
@import url("http://www.caapakistan.com.pk/sites/all/themes/sfo/css/flysfo_cn9687.css?nkrgyj");
@import url("http://www.caapakistan.com.pk/sites/all/themes/sfo/css/mobilestyle9687.css?nkrgyj");
@import url("http://www.caapakistan.com.pk/sites/all/themes/sfo/css/jplayer.sfo/jplayer.blue.monday9687.css?nkrgyj");
</style>
<script type="text/javascript" src="http://www.caapakistan.com.pk/sites/all/themes/sfo/js/libs/modernizr-2.5.3.min.js"></script>
<script type="text/javascript"> var switchTo5x = false;</script>
<script type="text/javascript"> stLight.options({ publisher: "a574d78b-ed29-4436-b50d-0213b9613fe7", doNotHash: true, doNotCopy: true, hashAddressBar: true, offsetTop:
虽然如果我使用浏览器或任何Rest Client访问相同的URL,我会得到整个页面的HTML。
Java代码:
Document doc = Jsoup.connect("http://www.peshawarairport.com.pk/Schedule.aspx?Type=Arrival").userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36").maxBodySize(0).timeout(maxTimeout)
.get();
答案 0 :(得分:0)
// fetch some posts
var posts = Posts.find({}, {limit: 5}).fetch();
// track the count of votes by owner
var votesByOwner = {};
// determine the count of votes by owner
_.each(posts, function(post) {
if (votesByOwner[post.owner] == null)
votesByOwner[post.owner] = 0;
votesByOwner[post.owner] += post.vote;
});
// reshape and sort the vote data
var result = _.chain(votesByOwner)
.map(function(votes, owner) {return {owner: owner, votes: votes};})
.sortBy('owner')
.value();
console.log(result);
尝试此操作以获取内容